159 lines
5.9 KiB
Python
159 lines
5.9 KiB
Python
from typing import Any, Callable, Dict, Optional, Tuple
|
|
|
|
import httpx
|
|
from openai.lib.azure import AsyncAzureOpenAI
|
|
from openai.lib.azure import AzureOpenAI as SyncAzureOpenAI
|
|
|
|
from llama_index.bridge.pydantic import Field, PrivateAttr
|
|
from llama_index.callbacks import CallbackManager
|
|
from llama_index.constants import (
|
|
DEFAULT_CONTEXT_WINDOW,
|
|
DEFAULT_NUM_OUTPUTS,
|
|
DEFAULT_TEMPERATURE,
|
|
)
|
|
from llama_index.llms.generic_utils import get_from_param_or_env
|
|
from llama_index.llms.openai_utils import (
|
|
refresh_openai_azuread_token,
|
|
resolve_from_aliases,
|
|
)
|
|
from llama_index.multi_modal_llms import MultiModalLLMMetadata, OpenAIMultiModal
|
|
|
|
|
|
class AzureOpenAIMultiModal(OpenAIMultiModal):
|
|
"""
|
|
Azure OpenAI.
|
|
|
|
To use this, you must first deploy a model on Azure OpenAI.
|
|
Unlike OpenAI, you need to specify a `engine` parameter to identify
|
|
your deployment (called "model deployment name" in Azure portal).
|
|
|
|
- model: Name of the model (e.g. `text-davinci-003`)
|
|
This in only used to decide completion vs. chat endpoint.
|
|
- engine: This will correspond to the custom name you chose
|
|
for your deployment when you deployed a model.
|
|
|
|
You must have the following environment variables set:
|
|
- `OPENAI_API_VERSION`: set this to `2023-05-15`
|
|
This may change in the future.
|
|
- `AZURE_OPENAI_ENDPOINT`: your endpoint should look like the following
|
|
https://YOUR_RESOURCE_NAME.openai.azure.com/
|
|
- `AZURE_OPENAI_API_KEY`: your API key if the api type is `azure`
|
|
|
|
More information can be found here:
|
|
https://learn.microsoft.com/en-us/azure/cognitive-services/openai/quickstart?tabs=command-line&pivots=programming-language-python
|
|
"""
|
|
|
|
engine: str = Field(description="The name of the deployed azure engine.")
|
|
azure_endpoint: Optional[str] = Field(
|
|
default=None, description="The Azure endpoint to use."
|
|
)
|
|
azure_deployment: Optional[str] = Field(
|
|
default=None, description="The Azure deployment to use."
|
|
)
|
|
use_azure_ad: bool = Field(
|
|
description="Indicates if Microsoft Entra ID (former Azure AD) is used for token authentication"
|
|
)
|
|
|
|
_azure_ad_token: Any = PrivateAttr()
|
|
|
|
def __init__(
|
|
self,
|
|
model: str = "gpt-4-vision-preview",
|
|
engine: Optional[str] = None,
|
|
temperature: float = DEFAULT_TEMPERATURE,
|
|
max_new_tokens: Optional[int] = 300,
|
|
additional_kwargs: Optional[Dict[str, Any]] = None,
|
|
context_window: Optional[int] = DEFAULT_CONTEXT_WINDOW,
|
|
max_retries: int = 3,
|
|
timeout: float = 60.0,
|
|
image_detail: str = "low",
|
|
api_key: Optional[str] = None,
|
|
api_base: Optional[str] = None,
|
|
api_version: Optional[str] = None,
|
|
# azure specific
|
|
azure_endpoint: Optional[str] = None,
|
|
azure_deployment: Optional[str] = None,
|
|
use_azure_ad: bool = False,
|
|
# aliases for engine
|
|
deployment_name: Optional[str] = None,
|
|
deployment_id: Optional[str] = None,
|
|
deployment: Optional[str] = None,
|
|
messages_to_prompt: Optional[Callable] = None,
|
|
completion_to_prompt: Optional[Callable] = None,
|
|
callback_manager: Optional[CallbackManager] = None,
|
|
default_headers: Optional[Dict[str, str]] = None,
|
|
http_client: Optional[httpx.Client] = None,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
engine = resolve_from_aliases(
|
|
engine, deployment_name, deployment_id, deployment, azure_deployment
|
|
)
|
|
|
|
if engine is None:
|
|
raise ValueError("You must specify an `engine` parameter.")
|
|
|
|
azure_endpoint = get_from_param_or_env(
|
|
"azure_endpoint", azure_endpoint, "AZURE_OPENAI_ENDPOINT", ""
|
|
)
|
|
super().__init__(
|
|
engine=engine,
|
|
model=model,
|
|
temperature=temperature,
|
|
max_new_tokens=max_new_tokens,
|
|
additional_kwargs=additional_kwargs,
|
|
context_window=context_window,
|
|
max_retries=max_retries,
|
|
timeout=timeout,
|
|
image_detail=image_detail,
|
|
api_key=api_key,
|
|
api_base=api_base,
|
|
api_version=api_version,
|
|
messages_to_prompt=messages_to_prompt,
|
|
completion_to_prompt=completion_to_prompt,
|
|
callback_manager=callback_manager,
|
|
azure_endpoint=azure_endpoint,
|
|
azure_deployment=azure_deployment,
|
|
use_azure_ad=use_azure_ad,
|
|
default_headers=default_headers,
|
|
http_client=http_client,
|
|
**kwargs,
|
|
)
|
|
|
|
def _get_clients(self, **kwargs: Any) -> Tuple[SyncAzureOpenAI, AsyncAzureOpenAI]:
|
|
client = SyncAzureOpenAI(**self._get_credential_kwargs())
|
|
aclient = AsyncAzureOpenAI(**self._get_credential_kwargs())
|
|
return client, aclient
|
|
|
|
@classmethod
|
|
def class_name(cls) -> str:
|
|
return "azure_openai_multi_modal_llm"
|
|
|
|
@property
|
|
def metadata(self) -> MultiModalLLMMetadata:
|
|
"""Multi Modal LLM metadata."""
|
|
return MultiModalLLMMetadata(
|
|
num_output=self.max_new_tokens or DEFAULT_NUM_OUTPUTS,
|
|
model_name=self.engine,
|
|
)
|
|
|
|
def _get_credential_kwargs(self, **kwargs: Any) -> Dict[str, Any]:
|
|
if self.use_azure_ad:
|
|
self._azure_ad_token = refresh_openai_azuread_token(self._azure_ad_token)
|
|
self.api_key = self._azure_ad_token.token
|
|
|
|
return {
|
|
"api_key": self.api_key or None,
|
|
"max_retries": self.max_retries,
|
|
"azure_endpoint": self.azure_endpoint,
|
|
"azure_deployment": self.azure_deployment,
|
|
"api_version": self.api_version,
|
|
"default_headers": self.default_headers,
|
|
"http_client": self._http_client,
|
|
"timeout": self.timeout,
|
|
}
|
|
|
|
def _get_model_kwargs(self, **kwargs: Any) -> Dict[str, Any]:
|
|
model_kwargs = super()._get_model_kwargs(**kwargs)
|
|
model_kwargs["model"] = self.engine
|
|
return model_kwargs
|