69 lines
2.2 KiB
Python
69 lines
2.2 KiB
Python
from typing import Any, Optional, Sequence
|
|
|
|
from llama_index.evaluation.base import BaseEvaluator, EvaluationResult
|
|
from llama_index.prompts.mixin import PromptDictType, PromptMixinType
|
|
|
|
|
|
class AugmentationPrecisionEvaluator(BaseEvaluator):
|
|
"""Tonic Validate's augmentation precision metric.
|
|
|
|
The output score is a float between 0.0 and 1.0.
|
|
|
|
See https://docs.tonic.ai/validate/ for more details.
|
|
|
|
Args:
|
|
openai_service(OpenAIService): The OpenAI service to use. Specifies the chat
|
|
completion model to use as the LLM evaluator. Defaults to "gpt-4".
|
|
"""
|
|
|
|
def __init__(self, openai_service: Optional[Any] = None):
|
|
import_err_msg = (
|
|
"`tonic-validate` package not found, please run `pip install "
|
|
"tonic-validate`"
|
|
)
|
|
try:
|
|
from tonic_validate.metrics.augmentation_precision_metric import (
|
|
AugmentationPrecisionMetric,
|
|
)
|
|
from tonic_validate.services.openai_service import OpenAIService
|
|
except ImportError:
|
|
raise ImportError(import_err_msg)
|
|
|
|
if openai_service is None:
|
|
openai_service = OpenAIService("gpt-4")
|
|
self.openai_service = openai_service
|
|
self.metric = AugmentationPrecisionMetric()
|
|
|
|
async def aevaluate(
|
|
self,
|
|
query: Optional[str] = None,
|
|
response: Optional[str] = None,
|
|
contexts: Optional[Sequence[str]] = None,
|
|
**kwargs: Any
|
|
) -> EvaluationResult:
|
|
from tonic_validate.classes.benchmark import BenchmarkItem
|
|
from tonic_validate.classes.llm_response import LLMResponse
|
|
|
|
benchmark_item = BenchmarkItem(question=query)
|
|
|
|
llm_response = LLMResponse(
|
|
llm_answer=response,
|
|
llm_context_list=contexts,
|
|
benchmark_item=benchmark_item,
|
|
)
|
|
|
|
score = self.metric.score(llm_response, self.openai_service)
|
|
|
|
return EvaluationResult(
|
|
query=query, contexts=contexts, response=response, score=score
|
|
)
|
|
|
|
def _get_prompts(self) -> PromptDictType:
|
|
return {}
|
|
|
|
def _get_prompt_modules(self) -> PromptMixinType:
|
|
return {}
|
|
|
|
def _update_prompts(self, prompts_dict: PromptDictType) -> None:
|
|
return
|