faiss_rag_enterprise/llama_index/evaluation/tonic_validate/augmentation_precision.py

69 lines
2.2 KiB
Python

from typing import Any, Optional, Sequence
from llama_index.evaluation.base import BaseEvaluator, EvaluationResult
from llama_index.prompts.mixin import PromptDictType, PromptMixinType
class AugmentationPrecisionEvaluator(BaseEvaluator):
"""Tonic Validate's augmentation precision metric.
The output score is a float between 0.0 and 1.0.
See https://docs.tonic.ai/validate/ for more details.
Args:
openai_service(OpenAIService): The OpenAI service to use. Specifies the chat
completion model to use as the LLM evaluator. Defaults to "gpt-4".
"""
def __init__(self, openai_service: Optional[Any] = None):
import_err_msg = (
"`tonic-validate` package not found, please run `pip install "
"tonic-validate`"
)
try:
from tonic_validate.metrics.augmentation_precision_metric import (
AugmentationPrecisionMetric,
)
from tonic_validate.services.openai_service import OpenAIService
except ImportError:
raise ImportError(import_err_msg)
if openai_service is None:
openai_service = OpenAIService("gpt-4")
self.openai_service = openai_service
self.metric = AugmentationPrecisionMetric()
async def aevaluate(
self,
query: Optional[str] = None,
response: Optional[str] = None,
contexts: Optional[Sequence[str]] = None,
**kwargs: Any
) -> EvaluationResult:
from tonic_validate.classes.benchmark import BenchmarkItem
from tonic_validate.classes.llm_response import LLMResponse
benchmark_item = BenchmarkItem(question=query)
llm_response = LLMResponse(
llm_answer=response,
llm_context_list=contexts,
benchmark_item=benchmark_item,
)
score = self.metric.score(llm_response, self.openai_service)
return EvaluationResult(
query=query, contexts=contexts, response=response, score=score
)
def _get_prompts(self) -> PromptDictType:
return {}
def _get_prompt_modules(self) -> PromptMixinType:
return {}
def _update_prompts(self, prompts_dict: PromptDictType) -> None:
return