faiss_rag_enterprise/llama_index/multi_modal_llms/azure_openai.py

from typing import Any, Callable, Dict, Optional, Tuple

import httpx
from openai.lib.azure import AsyncAzureOpenAI
from openai.lib.azure import AzureOpenAI as SyncAzureOpenAI

from llama_index.bridge.pydantic import Field, PrivateAttr
from llama_index.callbacks import CallbackManager
from llama_index.constants import (
    DEFAULT_CONTEXT_WINDOW,
    DEFAULT_NUM_OUTPUTS,
    DEFAULT_TEMPERATURE,
)
from llama_index.llms.generic_utils import get_from_param_or_env
from llama_index.llms.openai_utils import (
    refresh_openai_azuread_token,
    resolve_from_aliases,
)
from llama_index.multi_modal_llms import MultiModalLLMMetadata, OpenAIMultiModal


class AzureOpenAIMultiModal(OpenAIMultiModal):
    """
    Azure OpenAI.

    To use this, you must first deploy a model on Azure OpenAI.
    Unlike OpenAI, you need to specify a `engine` parameter to identify
    your deployment (called "model deployment name" in Azure portal).

    - model: Name of the model (e.g. `text-davinci-003`)
        This in only used to decide completion vs. chat endpoint.
    - engine: This will correspond to the custom name you chose
        for your deployment when you deployed a model.

    You must have the following environment variables set:
    - `OPENAI_API_VERSION`: set this to `2023-05-15`
        This may change in the future.
    - `AZURE_OPENAI_ENDPOINT`: your endpoint should look like the following
        https://YOUR_RESOURCE_NAME.openai.azure.com/
    - `AZURE_OPENAI_API_KEY`: your API key if the api type is `azure`

    More information can be found here:
        https://learn.microsoft.com/en-us/azure/cognitive-services/openai/quickstart?tabs=command-line&pivots=programming-language-python
    """

    engine: str = Field(description="The name of the deployed azure engine.")
    azure_endpoint: Optional[str] = Field(
        default=None, description="The Azure endpoint to use."
    )
    azure_deployment: Optional[str] = Field(
        default=None, description="The Azure deployment to use."
    )
    use_azure_ad: bool = Field(
        description="Indicates if Microsoft Entra ID (former Azure AD) is used for token authentication"
    )

    _azure_ad_token: Any = PrivateAttr()

    def __init__(
        self,
        model: str = "gpt-4-vision-preview",
        engine: Optional[str] = None,
        temperature: float = DEFAULT_TEMPERATURE,
        max_new_tokens: Optional[int] = 300,
        additional_kwargs: Optional[Dict[str, Any]] = None,
        context_window: Optional[int] = DEFAULT_CONTEXT_WINDOW,
        max_retries: int = 3,
        timeout: float = 60.0,
        image_detail: str = "low",
        api_key: Optional[str] = None,
        api_base: Optional[str] = None,
        api_version: Optional[str] = None,
        # azure specific
        azure_endpoint: Optional[str] = None,
        azure_deployment: Optional[str] = None,
        use_azure_ad: bool = False,
        # aliases for engine
        deployment_name: Optional[str] = None,
        deployment_id: Optional[str] = None,
        deployment: Optional[str] = None,
        messages_to_prompt: Optional[Callable] = None,
        completion_to_prompt: Optional[Callable] = None,
        callback_manager: Optional[CallbackManager] = None,
        default_headers: Optional[Dict[str, str]] = None,
        http_client: Optional[httpx.Client] = None,
        **kwargs: Any,
    ) -> None:
        engine = resolve_from_aliases(
            engine, deployment_name, deployment_id, deployment, azure_deployment
        )

        if engine is None:
            raise ValueError("You must specify an `engine` parameter.")

        azure_endpoint = get_from_param_or_env(
            "azure_endpoint", azure_endpoint, "AZURE_OPENAI_ENDPOINT", ""
        )
        super().__init__(
            engine=engine,
            model=model,
            temperature=temperature,
            max_new_tokens=max_new_tokens,
            additional_kwargs=additional_kwargs,
            context_window=context_window,
            max_retries=max_retries,
            timeout=timeout,
            image_detail=image_detail,
            api_key=api_key,
            api_base=api_base,
            api_version=api_version,
            messages_to_prompt=messages_to_prompt,
            completion_to_prompt=completion_to_prompt,
            callback_manager=callback_manager,
            azure_endpoint=azure_endpoint,
            azure_deployment=azure_deployment,
            use_azure_ad=use_azure_ad,
            default_headers=default_headers,
            http_client=http_client,
            **kwargs,
        )

    def _get_clients(self, **kwargs: Any) -> Tuple[SyncAzureOpenAI, AsyncAzureOpenAI]:
        client = SyncAzureOpenAI(**self._get_credential_kwargs())
        aclient = AsyncAzureOpenAI(**self._get_credential_kwargs())
        return client, aclient

    @classmethod
    def class_name(cls) -> str:
        return "azure_openai_multi_modal_llm"

    @property
    def metadata(self) -> MultiModalLLMMetadata:
        """Multi Modal LLM metadata."""
        return MultiModalLLMMetadata(
            num_output=self.max_new_tokens or DEFAULT_NUM_OUTPUTS,
            model_name=self.engine,
        )

    def _get_credential_kwargs(self, **kwargs: Any) -> Dict[str, Any]:
        if self.use_azure_ad:
            self._azure_ad_token = refresh_openai_azuread_token(self._azure_ad_token)
            self.api_key = self._azure_ad_token.token

        return {
            "api_key": self.api_key or None,
            "max_retries": self.max_retries,
            "azure_endpoint": self.azure_endpoint,
            "azure_deployment": self.azure_deployment,
            "api_version": self.api_version,
            "default_headers": self.default_headers,
            "http_client": self._http_client,
            "timeout": self.timeout,
        }

    def _get_model_kwargs(self, **kwargs: Any) -> Dict[str, Any]:
        model_kwargs = super()._get_model_kwargs(**kwargs)
        model_kwargs["model"] = self.engine
        return model_kwargs