faiss_rag_enterprise/llama_index/llms/portkey.py

315 lines
11 KiB
Python

"""
Portkey integration with Llama_index for enhanced monitoring.
"""
from typing import TYPE_CHECKING, Any, Callable, List, Optional, Sequence, Union, cast
from llama_index.bridge.pydantic import Field, PrivateAttr
from llama_index.core.llms.types import (
ChatMessage,
ChatResponse,
ChatResponseGen,
CompletionResponse,
CompletionResponseGen,
LLMMetadata,
)
from llama_index.llms.base import llm_chat_callback, llm_completion_callback
from llama_index.llms.custom import CustomLLM
from llama_index.llms.generic_utils import (
chat_to_completion_decorator,
completion_to_chat_decorator,
stream_chat_to_completion_decorator,
stream_completion_to_chat_decorator,
)
from llama_index.llms.portkey_utils import (
IMPORT_ERROR_MESSAGE,
generate_llm_metadata,
get_llm,
is_chat_model,
)
from llama_index.types import BaseOutputParser, PydanticProgramMode
if TYPE_CHECKING:
from portkey import (
LLMOptions,
Modes,
ModesLiteral,
PortkeyResponse,
)
DEFAULT_PORTKEY_MODEL = "gpt-3.5-turbo"
class Portkey(CustomLLM):
"""_summary_.
Args:
LLM (_type_): _description_
"""
mode: Optional[Union["Modes", "ModesLiteral"]] = Field(
description="The mode for using the Portkey integration"
)
model: Optional[str] = Field(default=DEFAULT_PORTKEY_MODEL)
llm: "LLMOptions" = Field(description="LLM parameter", default_factory=dict)
llms: List["LLMOptions"] = Field(description="LLM parameters", default_factory=list)
_client: Any = PrivateAttr()
def __init__(
self,
*,
mode: Union["Modes", "ModesLiteral"],
api_key: Optional[str] = None,
base_url: Optional[str] = None,
system_prompt: Optional[str] = None,
messages_to_prompt: Optional[Callable[[Sequence[ChatMessage]], str]] = None,
completion_to_prompt: Optional[Callable[[str], str]] = None,
pydantic_program_mode: PydanticProgramMode = PydanticProgramMode.DEFAULT,
output_parser: Optional[BaseOutputParser] = None,
) -> None:
"""
Initialize a Portkey instance.
Args:
mode (Optional[Modes]): The mode for using the Portkey integration
(default: Modes.SINGLE).
api_key (Optional[str]): The API key to authenticate with Portkey.
base_url (Optional[str]): The Base url to the self hosted rubeus \
(the opensource version of portkey) or any other self hosted server.
"""
try:
import portkey
except ImportError as exc:
raise ImportError(IMPORT_ERROR_MESSAGE) from exc
super().__init__(
base_url=base_url,
api_key=api_key,
system_prompt=system_prompt,
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
pydantic_program_mode=pydantic_program_mode,
output_parser=output_parser,
)
if api_key is not None:
portkey.api_key = api_key
if base_url is not None:
portkey.base_url = base_url
portkey.mode = mode
self._client = portkey
self.model = None
self.mode = mode
@property
def metadata(self) -> LLMMetadata:
"""LLM metadata."""
return generate_llm_metadata(self.llms[0])
def add_llms(
self, llm_params: Union["LLMOptions", List["LLMOptions"]]
) -> "Portkey":
"""
Adds the specified LLM parameters to the list of LLMs. This may be used for
fallbacks or load-balancing as specified in the mode.
Args:
llm_params (Union[LLMOptions, List[LLMOptions]]): A single LLM parameter \
set or a list of LLM parameter sets. Each set should be an instance of \
LLMOptions with
the specified attributes.
> provider: Optional[ProviderTypes]
> model: str
> temperature: float
> max_tokens: Optional[int]
> max_retries: int
> trace_id: Optional[str]
> cache_status: Optional[CacheType]
> cache: Optional[bool]
> metadata: Dict[str, Any]
> weight: Optional[float]
> **kwargs : Other additional parameters that are supported by \
LLMOptions in portkey-ai
NOTE: User may choose to pass additional params as well.
Returns:
self
"""
try:
from portkey import LLMOptions
except ImportError as exc:
raise ImportError(IMPORT_ERROR_MESSAGE) from exc
if isinstance(llm_params, LLMOptions):
llm_params = [llm_params]
self.llms.extend(llm_params)
if self.model is None:
self.model = self.llms[0].model
return self
@llm_completion_callback()
def complete(
self, prompt: str, formatted: bool = False, **kwargs: Any
) -> CompletionResponse:
"""Completion endpoint for LLM."""
if self._is_chat_model:
complete_fn = chat_to_completion_decorator(self._chat)
else:
complete_fn = self._complete
return complete_fn(prompt, **kwargs)
@llm_chat_callback()
def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
if self._is_chat_model:
chat_fn = self._chat
else:
chat_fn = completion_to_chat_decorator(self._complete)
return chat_fn(messages, **kwargs)
@llm_completion_callback()
def stream_complete(
self, prompt: str, formatted: bool = False, **kwargs: Any
) -> CompletionResponseGen:
"""Completion endpoint for LLM."""
if self._is_chat_model:
complete_fn = stream_chat_to_completion_decorator(self._stream_chat)
else:
complete_fn = self._stream_complete
return complete_fn(prompt, **kwargs)
@llm_chat_callback()
def stream_chat(
self, messages: Sequence[ChatMessage], **kwargs: Any
) -> ChatResponseGen:
if self._is_chat_model:
stream_chat_fn = self._stream_chat
else:
stream_chat_fn = stream_completion_to_chat_decorator(self._stream_complete)
return stream_chat_fn(messages, **kwargs)
def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
try:
from portkey import Config, Message
except ImportError as exc:
raise ImportError(IMPORT_ERROR_MESSAGE) from exc
_messages = cast(
List[Message],
[{"role": i.role.value, "content": i.content} for i in messages],
)
config = Config(llms=self.llms)
response = self._client.ChatCompletions.create(
messages=_messages, config=config
)
self.llm = self._get_llm(response)
message = response.choices[0].message
return ChatResponse(message=message, raw=response)
def _complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
try:
from portkey import Config
except ImportError as exc:
raise ImportError(IMPORT_ERROR_MESSAGE) from exc
config = Config(llms=self.llms)
response = self._client.Completions.create(prompt=prompt, config=config)
text = response.choices[0].text
return CompletionResponse(text=text, raw=response)
def _stream_chat(
self, messages: Sequence[ChatMessage], **kwargs: Any
) -> ChatResponseGen:
try:
from portkey import Config, Message
except ImportError as exc:
raise ImportError(IMPORT_ERROR_MESSAGE) from exc
_messages = cast(
List[Message],
[{"role": i.role.value, "content": i.content} for i in messages],
)
config = Config(llms=self.llms)
response = self._client.ChatCompletions.create(
messages=_messages, config=config, stream=True, **kwargs
)
def gen() -> ChatResponseGen:
content = ""
function_call: Optional[dict] = {}
for resp in response:
if resp.choices is None:
continue
delta = resp.choices[0].delta
role = delta.get("role", "assistant")
content_delta = delta.get("content", "") or ""
content += content_delta
function_call_delta = delta.get("function_call", None)
if function_call_delta is not None:
if function_call is None:
function_call = function_call_delta
# ensure we do not add a blank function call
if (
function_call
and function_call.get("function_name", "") is None
):
del function_call["function_name"]
else:
function_call["arguments"] += function_call_delta["arguments"]
additional_kwargs = {}
if function_call is not None:
additional_kwargs["function_call"] = function_call
yield ChatResponse(
message=ChatMessage(
role=role,
content=content,
additional_kwargs=additional_kwargs,
),
delta=content_delta,
raw=resp,
)
return gen()
def _stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
try:
from portkey import Config
except ImportError as exc:
raise ImportError(IMPORT_ERROR_MESSAGE) from exc
config = Config(llms=self.llms)
response = self._client.Completions.create(
prompt=prompt, config=config, stream=True, **kwargs
)
def gen() -> CompletionResponseGen:
text = ""
for resp in response:
delta = resp.choices[0].text or ""
text += delta
yield CompletionResponse(
delta=delta,
text=text,
raw=resp,
)
return gen()
@property
def _is_chat_model(self) -> bool:
"""Check if a given model is a chat-based language model.
Returns:
bool: True if the provided model is a chat-based language model,
False otherwise.
"""
return is_chat_model(self.model or "")
def _get_llm(self, response: "PortkeyResponse") -> "LLMOptions":
return get_llm(response, self.llms)