""" Portkey integration with Llama_index for enhanced monitoring. """ from typing import TYPE_CHECKING, Any, Callable, List, Optional, Sequence, Union, cast from llama_index.bridge.pydantic import Field, PrivateAttr from llama_index.core.llms.types import ( ChatMessage, ChatResponse, ChatResponseGen, CompletionResponse, CompletionResponseGen, LLMMetadata, ) from llama_index.llms.base import llm_chat_callback, llm_completion_callback from llama_index.llms.custom import CustomLLM from llama_index.llms.generic_utils import ( chat_to_completion_decorator, completion_to_chat_decorator, stream_chat_to_completion_decorator, stream_completion_to_chat_decorator, ) from llama_index.llms.portkey_utils import ( IMPORT_ERROR_MESSAGE, generate_llm_metadata, get_llm, is_chat_model, ) from llama_index.types import BaseOutputParser, PydanticProgramMode if TYPE_CHECKING: from portkey import ( LLMOptions, Modes, ModesLiteral, PortkeyResponse, ) DEFAULT_PORTKEY_MODEL = "gpt-3.5-turbo" class Portkey(CustomLLM): """_summary_. Args: LLM (_type_): _description_ """ mode: Optional[Union["Modes", "ModesLiteral"]] = Field( description="The mode for using the Portkey integration" ) model: Optional[str] = Field(default=DEFAULT_PORTKEY_MODEL) llm: "LLMOptions" = Field(description="LLM parameter", default_factory=dict) llms: List["LLMOptions"] = Field(description="LLM parameters", default_factory=list) _client: Any = PrivateAttr() def __init__( self, *, mode: Union["Modes", "ModesLiteral"], api_key: Optional[str] = None, base_url: Optional[str] = None, system_prompt: Optional[str] = None, messages_to_prompt: Optional[Callable[[Sequence[ChatMessage]], str]] = None, completion_to_prompt: Optional[Callable[[str], str]] = None, pydantic_program_mode: PydanticProgramMode = PydanticProgramMode.DEFAULT, output_parser: Optional[BaseOutputParser] = None, ) -> None: """ Initialize a Portkey instance. Args: mode (Optional[Modes]): The mode for using the Portkey integration (default: Modes.SINGLE). api_key (Optional[str]): The API key to authenticate with Portkey. base_url (Optional[str]): The Base url to the self hosted rubeus \ (the opensource version of portkey) or any other self hosted server. """ try: import portkey except ImportError as exc: raise ImportError(IMPORT_ERROR_MESSAGE) from exc super().__init__( base_url=base_url, api_key=api_key, system_prompt=system_prompt, messages_to_prompt=messages_to_prompt, completion_to_prompt=completion_to_prompt, pydantic_program_mode=pydantic_program_mode, output_parser=output_parser, ) if api_key is not None: portkey.api_key = api_key if base_url is not None: portkey.base_url = base_url portkey.mode = mode self._client = portkey self.model = None self.mode = mode @property def metadata(self) -> LLMMetadata: """LLM metadata.""" return generate_llm_metadata(self.llms[0]) def add_llms( self, llm_params: Union["LLMOptions", List["LLMOptions"]] ) -> "Portkey": """ Adds the specified LLM parameters to the list of LLMs. This may be used for fallbacks or load-balancing as specified in the mode. Args: llm_params (Union[LLMOptions, List[LLMOptions]]): A single LLM parameter \ set or a list of LLM parameter sets. Each set should be an instance of \ LLMOptions with the specified attributes. > provider: Optional[ProviderTypes] > model: str > temperature: float > max_tokens: Optional[int] > max_retries: int > trace_id: Optional[str] > cache_status: Optional[CacheType] > cache: Optional[bool] > metadata: Dict[str, Any] > weight: Optional[float] > **kwargs : Other additional parameters that are supported by \ LLMOptions in portkey-ai NOTE: User may choose to pass additional params as well. Returns: self """ try: from portkey import LLMOptions except ImportError as exc: raise ImportError(IMPORT_ERROR_MESSAGE) from exc if isinstance(llm_params, LLMOptions): llm_params = [llm_params] self.llms.extend(llm_params) if self.model is None: self.model = self.llms[0].model return self @llm_completion_callback() def complete( self, prompt: str, formatted: bool = False, **kwargs: Any ) -> CompletionResponse: """Completion endpoint for LLM.""" if self._is_chat_model: complete_fn = chat_to_completion_decorator(self._chat) else: complete_fn = self._complete return complete_fn(prompt, **kwargs) @llm_chat_callback() def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse: if self._is_chat_model: chat_fn = self._chat else: chat_fn = completion_to_chat_decorator(self._complete) return chat_fn(messages, **kwargs) @llm_completion_callback() def stream_complete( self, prompt: str, formatted: bool = False, **kwargs: Any ) -> CompletionResponseGen: """Completion endpoint for LLM.""" if self._is_chat_model: complete_fn = stream_chat_to_completion_decorator(self._stream_chat) else: complete_fn = self._stream_complete return complete_fn(prompt, **kwargs) @llm_chat_callback() def stream_chat( self, messages: Sequence[ChatMessage], **kwargs: Any ) -> ChatResponseGen: if self._is_chat_model: stream_chat_fn = self._stream_chat else: stream_chat_fn = stream_completion_to_chat_decorator(self._stream_complete) return stream_chat_fn(messages, **kwargs) def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse: try: from portkey import Config, Message except ImportError as exc: raise ImportError(IMPORT_ERROR_MESSAGE) from exc _messages = cast( List[Message], [{"role": i.role.value, "content": i.content} for i in messages], ) config = Config(llms=self.llms) response = self._client.ChatCompletions.create( messages=_messages, config=config ) self.llm = self._get_llm(response) message = response.choices[0].message return ChatResponse(message=message, raw=response) def _complete(self, prompt: str, **kwargs: Any) -> CompletionResponse: try: from portkey import Config except ImportError as exc: raise ImportError(IMPORT_ERROR_MESSAGE) from exc config = Config(llms=self.llms) response = self._client.Completions.create(prompt=prompt, config=config) text = response.choices[0].text return CompletionResponse(text=text, raw=response) def _stream_chat( self, messages: Sequence[ChatMessage], **kwargs: Any ) -> ChatResponseGen: try: from portkey import Config, Message except ImportError as exc: raise ImportError(IMPORT_ERROR_MESSAGE) from exc _messages = cast( List[Message], [{"role": i.role.value, "content": i.content} for i in messages], ) config = Config(llms=self.llms) response = self._client.ChatCompletions.create( messages=_messages, config=config, stream=True, **kwargs ) def gen() -> ChatResponseGen: content = "" function_call: Optional[dict] = {} for resp in response: if resp.choices is None: continue delta = resp.choices[0].delta role = delta.get("role", "assistant") content_delta = delta.get("content", "") or "" content += content_delta function_call_delta = delta.get("function_call", None) if function_call_delta is not None: if function_call is None: function_call = function_call_delta # ensure we do not add a blank function call if ( function_call and function_call.get("function_name", "") is None ): del function_call["function_name"] else: function_call["arguments"] += function_call_delta["arguments"] additional_kwargs = {} if function_call is not None: additional_kwargs["function_call"] = function_call yield ChatResponse( message=ChatMessage( role=role, content=content, additional_kwargs=additional_kwargs, ), delta=content_delta, raw=resp, ) return gen() def _stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen: try: from portkey import Config except ImportError as exc: raise ImportError(IMPORT_ERROR_MESSAGE) from exc config = Config(llms=self.llms) response = self._client.Completions.create( prompt=prompt, config=config, stream=True, **kwargs ) def gen() -> CompletionResponseGen: text = "" for resp in response: delta = resp.choices[0].text or "" text += delta yield CompletionResponse( delta=delta, text=text, raw=resp, ) return gen() @property def _is_chat_model(self) -> bool: """Check if a given model is a chat-based language model. Returns: bool: True if the provided model is a chat-based language model, False otherwise. """ return is_chat_model(self.model or "") def _get_llm(self, response: "PortkeyResponse") -> "LLMOptions": return get_llm(response, self.llms)