108 lines
3.5 KiB
Python
108 lines
3.5 KiB
Python
"""Evaluation modules."""
|
|
|
|
from llama_index.evaluation.answer_relevancy import AnswerRelevancyEvaluator
|
|
from llama_index.evaluation.base import (
|
|
BaseEvaluator,
|
|
EvaluationResult,
|
|
)
|
|
from llama_index.evaluation.batch_runner import BatchEvalRunner
|
|
from llama_index.evaluation.context_relevancy import ContextRelevancyEvaluator
|
|
from llama_index.evaluation.correctness import CorrectnessEvaluator
|
|
from llama_index.evaluation.dataset_generation import (
|
|
DatasetGenerator,
|
|
QueryResponseDataset,
|
|
)
|
|
from llama_index.evaluation.faithfulness import FaithfulnessEvaluator, ResponseEvaluator
|
|
from llama_index.evaluation.guideline import GuidelineEvaluator
|
|
from llama_index.evaluation.notebook_utils import get_retrieval_results_df
|
|
from llama_index.evaluation.pairwise import PairwiseComparisonEvaluator
|
|
from llama_index.evaluation.relevancy import QueryResponseEvaluator, RelevancyEvaluator
|
|
from llama_index.evaluation.retrieval.base import (
|
|
BaseRetrievalEvaluator,
|
|
RetrievalEvalResult,
|
|
)
|
|
from llama_index.evaluation.retrieval.evaluator import (
|
|
MultiModalRetrieverEvaluator,
|
|
RetrieverEvaluator,
|
|
)
|
|
from llama_index.evaluation.retrieval.metrics import (
|
|
MRR,
|
|
HitRate,
|
|
RetrievalMetricResult,
|
|
resolve_metrics,
|
|
)
|
|
from llama_index.evaluation.semantic_similarity import SemanticSimilarityEvaluator
|
|
from llama_index.evaluation.tonic_validate.answer_consistency import (
|
|
AnswerConsistencyEvaluator,
|
|
)
|
|
from llama_index.evaluation.tonic_validate.answer_consistency_binary import (
|
|
AnswerConsistencyBinaryEvaluator,
|
|
)
|
|
from llama_index.evaluation.tonic_validate.answer_similarity import (
|
|
AnswerSimilarityEvaluator,
|
|
)
|
|
from llama_index.evaluation.tonic_validate.augmentation_accuracy import (
|
|
AugmentationAccuracyEvaluator,
|
|
)
|
|
from llama_index.evaluation.tonic_validate.augmentation_precision import (
|
|
AugmentationPrecisionEvaluator,
|
|
)
|
|
from llama_index.evaluation.tonic_validate.retrieval_precision import (
|
|
RetrievalPrecisionEvaluator,
|
|
)
|
|
from llama_index.evaluation.tonic_validate.tonic_validate_evaluator import (
|
|
TonicValidateEvaluator,
|
|
)
|
|
|
|
# import dataset generation too
|
|
from llama_index.finetuning.embeddings.common import (
|
|
EmbeddingQAFinetuneDataset,
|
|
generate_qa_embedding_pairs,
|
|
)
|
|
|
|
# aliases for generate_qa_embedding_pairs
|
|
generate_question_context_pairs = generate_qa_embedding_pairs
|
|
LabelledQADataset = EmbeddingQAFinetuneDataset
|
|
|
|
__all__ = [
|
|
"BaseEvaluator",
|
|
"AnswerRelevancyEvaluator",
|
|
"ContextRelevancyEvaluator",
|
|
"EvaluationResult",
|
|
"FaithfulnessEvaluator",
|
|
"RelevancyEvaluator",
|
|
"RelevanceEvaluator",
|
|
"DatasetGenerator",
|
|
"QueryResponseDataset",
|
|
"GuidelineEvaluator",
|
|
"CorrectnessEvaluator",
|
|
"SemanticSimilarityEvaluator",
|
|
"PairwiseComparisonEvaluator",
|
|
"BatchEvalRunner",
|
|
# legacy: kept for backward compatibility
|
|
"QueryResponseEvaluator",
|
|
"ResponseEvaluator",
|
|
# retrieval
|
|
"generate_qa_embedding_pairs",
|
|
"generate_question_context_pairs",
|
|
"EmbeddingQAFinetuneDataset",
|
|
"BaseRetrievalEvaluator",
|
|
"RetrievalEvalResult",
|
|
"RetrieverEvaluator",
|
|
"MultiModalRetrieverEvaluator",
|
|
"RetrievalMetricResult",
|
|
"resolve_metrics",
|
|
"HitRate",
|
|
"MRR",
|
|
"get_retrieval_results_df",
|
|
"LabelledQADataset",
|
|
# tonic_validate evaluators
|
|
"AnswerConsistencyEvaluator",
|
|
"AnswerConsistencyBinaryEvaluator",
|
|
"AnswerSimilarityEvaluator",
|
|
"AugmentationAccuracyEvaluator",
|
|
"AugmentationPrecisionEvaluator",
|
|
"RetrievalPrecisionEvaluator",
|
|
"TonicValidateEvaluator",
|
|
]
|