79 lines
2.7 KiB
Python
79 lines
2.7 KiB
Python
import logging
|
|
from typing import Any, Dict, Optional, Sequence
|
|
|
|
from llama_index.multi_modal_llms.base import ChatMessage
|
|
from llama_index.multi_modal_llms.generic_utils import encode_image
|
|
from llama_index.schema import ImageDocument
|
|
|
|
DEFAULT_OPENAI_API_TYPE = "open_ai"
|
|
DEFAULT_OPENAI_API_BASE = "https://api.openai.com/v1"
|
|
|
|
|
|
GPT4V_MODELS = {
|
|
"gpt-4-vision-preview": 128000,
|
|
}
|
|
|
|
|
|
MISSING_API_KEY_ERROR_MESSAGE = """No API key found for OpenAI.
|
|
Please set either the OPENAI_API_KEY environment variable or \
|
|
openai.api_key prior to initialization.
|
|
API keys can be found or created at \
|
|
https://platform.openai.com/account/api-keys
|
|
"""
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def generate_openai_multi_modal_chat_message(
|
|
prompt: str,
|
|
role: str,
|
|
image_documents: Optional[Sequence[ImageDocument]] = None,
|
|
image_detail: Optional[str] = "low",
|
|
) -> ChatMessage:
|
|
# if image_documents is empty, return text only chat message
|
|
if image_documents is None:
|
|
return ChatMessage(role=role, content=prompt)
|
|
|
|
# if image_documents is not empty, return text with images chat message
|
|
completion_content = [{"type": "text", "text": prompt}]
|
|
for image_document in image_documents:
|
|
image_content: Dict[str, Any] = {}
|
|
mimetype = image_document.image_mimetype or "image/jpeg"
|
|
if image_document.image and image_document.image != "":
|
|
image_content = {
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:{mimetype};base64,{image_document.image}",
|
|
"detail": image_detail,
|
|
},
|
|
}
|
|
elif image_document.image_url and image_document.image_url != "":
|
|
image_content = {
|
|
"type": "image_url",
|
|
"image_url": image_document.image_url,
|
|
}
|
|
elif image_document.image_path and image_document.image_path != "":
|
|
base64_image = encode_image(image_document.image_path)
|
|
image_content = {
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:{mimetype};base64,{base64_image}",
|
|
"detail": image_detail,
|
|
},
|
|
}
|
|
elif (
|
|
"file_path" in image_document.metadata
|
|
and image_document.metadata["file_path"] != ""
|
|
):
|
|
base64_image = encode_image(image_document.metadata["file_path"])
|
|
image_content = {
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:image/jpeg;base64,{base64_image}",
|
|
"detail": image_detail,
|
|
},
|
|
}
|
|
|
|
completion_content.append(image_content)
|
|
return ChatMessage(role=role, content=completion_content)
|