"""Base schema for callback managers.""" import uuid from dataclasses import dataclass from datetime import datetime from enum import Enum from typing import Any, Dict, Optional # timestamp for callback events TIMESTAMP_FORMAT = "%m/%d/%Y, %H:%M:%S.%f" # base trace_id for the tracemap in callback_manager BASE_TRACE_EVENT = "root" class CBEventType(str, Enum): """Callback manager event types. Attributes: CHUNKING: Logs for the before and after of text splitting. NODE_PARSING: Logs for the documents and the nodes that they are parsed into. EMBEDDING: Logs for the number of texts embedded. LLM: Logs for the template and response of LLM calls. QUERY: Keeps track of the start and end of each query. RETRIEVE: Logs for the nodes retrieved for a query. SYNTHESIZE: Logs for the result for synthesize calls. TREE: Logs for the summary and level of summaries generated. SUB_QUESTION: Logs for a generated sub question and answer. """ CHUNKING = "chunking" NODE_PARSING = "node_parsing" EMBEDDING = "embedding" LLM = "llm" QUERY = "query" RETRIEVE = "retrieve" SYNTHESIZE = "synthesize" TREE = "tree" SUB_QUESTION = "sub_question" TEMPLATING = "templating" FUNCTION_CALL = "function_call" RERANKING = "reranking" EXCEPTION = "exception" AGENT_STEP = "agent_step" class EventPayload(str, Enum): DOCUMENTS = "documents" # list of documents before parsing CHUNKS = "chunks" # list of text chunks NODES = "nodes" # list of nodes PROMPT = "formatted_prompt" # formatted prompt sent to LLM MESSAGES = "messages" # list of messages sent to LLM COMPLETION = "completion" # completion from LLM RESPONSE = "response" # message response from LLM QUERY_STR = "query_str" # query used for query engine SUB_QUESTION = "sub_question" # a sub question & answer + sources EMBEDDINGS = "embeddings" # list of embeddings TOP_K = "top_k" # top k nodes retrieved ADDITIONAL_KWARGS = "additional_kwargs" # additional kwargs for event call SERIALIZED = "serialized" # serialized object for event caller FUNCTION_CALL = "function_call" # function call for the LLM FUNCTION_OUTPUT = "function_call_response" # function call output TOOL = "tool" # tool used in LLM call MODEL_NAME = "model_name" # model name used in an event TEMPLATE = "template" # template used in LLM call TEMPLATE_VARS = "template_vars" # template variables used in LLM call SYSTEM_PROMPT = "system_prompt" # system prompt used in LLM call QUERY_WRAPPER_PROMPT = "query_wrapper_prompt" # query wrapper prompt used in LLM EXCEPTION = "exception" # exception raised in an event # events that will never have children events LEAF_EVENTS = (CBEventType.CHUNKING, CBEventType.LLM, CBEventType.EMBEDDING) @dataclass class CBEvent: """Generic class to store event information.""" event_type: CBEventType payload: Optional[Dict[str, Any]] = None time: str = "" id_: str = "" def __post_init__(self) -> None: """Init time and id if needed.""" if not self.time: self.time = datetime.now().strftime(TIMESTAMP_FORMAT) if not self.id_: self.id = str(uuid.uuid4()) @dataclass class EventStats: """Time-based Statistics for events.""" total_secs: float average_secs: float total_count: int