faiss_rag_enterprise/llama_index/callbacks/schema.py

99 lines
3.4 KiB
Python

"""Base schema for callback managers."""
import uuid
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from typing import Any, Dict, Optional
# timestamp for callback events
TIMESTAMP_FORMAT = "%m/%d/%Y, %H:%M:%S.%f"
# base trace_id for the tracemap in callback_manager
BASE_TRACE_EVENT = "root"
class CBEventType(str, Enum):
"""Callback manager event types.
Attributes:
CHUNKING: Logs for the before and after of text splitting.
NODE_PARSING: Logs for the documents and the nodes that they are parsed into.
EMBEDDING: Logs for the number of texts embedded.
LLM: Logs for the template and response of LLM calls.
QUERY: Keeps track of the start and end of each query.
RETRIEVE: Logs for the nodes retrieved for a query.
SYNTHESIZE: Logs for the result for synthesize calls.
TREE: Logs for the summary and level of summaries generated.
SUB_QUESTION: Logs for a generated sub question and answer.
"""
CHUNKING = "chunking"
NODE_PARSING = "node_parsing"
EMBEDDING = "embedding"
LLM = "llm"
QUERY = "query"
RETRIEVE = "retrieve"
SYNTHESIZE = "synthesize"
TREE = "tree"
SUB_QUESTION = "sub_question"
TEMPLATING = "templating"
FUNCTION_CALL = "function_call"
RERANKING = "reranking"
EXCEPTION = "exception"
AGENT_STEP = "agent_step"
class EventPayload(str, Enum):
DOCUMENTS = "documents" # list of documents before parsing
CHUNKS = "chunks" # list of text chunks
NODES = "nodes" # list of nodes
PROMPT = "formatted_prompt" # formatted prompt sent to LLM
MESSAGES = "messages" # list of messages sent to LLM
COMPLETION = "completion" # completion from LLM
RESPONSE = "response" # message response from LLM
QUERY_STR = "query_str" # query used for query engine
SUB_QUESTION = "sub_question" # a sub question & answer + sources
EMBEDDINGS = "embeddings" # list of embeddings
TOP_K = "top_k" # top k nodes retrieved
ADDITIONAL_KWARGS = "additional_kwargs" # additional kwargs for event call
SERIALIZED = "serialized" # serialized object for event caller
FUNCTION_CALL = "function_call" # function call for the LLM
FUNCTION_OUTPUT = "function_call_response" # function call output
TOOL = "tool" # tool used in LLM call
MODEL_NAME = "model_name" # model name used in an event
TEMPLATE = "template" # template used in LLM call
TEMPLATE_VARS = "template_vars" # template variables used in LLM call
SYSTEM_PROMPT = "system_prompt" # system prompt used in LLM call
QUERY_WRAPPER_PROMPT = "query_wrapper_prompt" # query wrapper prompt used in LLM
EXCEPTION = "exception" # exception raised in an event
# events that will never have children events
LEAF_EVENTS = (CBEventType.CHUNKING, CBEventType.LLM, CBEventType.EMBEDDING)
@dataclass
class CBEvent:
"""Generic class to store event information."""
event_type: CBEventType
payload: Optional[Dict[str, Any]] = None
time: str = ""
id_: str = ""
def __post_init__(self) -> None:
"""Init time and id if needed."""
if not self.time:
self.time = datetime.now().strftime(TIMESTAMP_FORMAT)
if not self.id_:
self.id = str(uuid.uuid4())
@dataclass
class EventStats:
"""Time-based Statistics for events."""
total_secs: float
average_secs: float
total_count: int