faiss_rag_enterprise/llama_index/multi_modal_llms/generic_utils.py

52 lines
1.7 KiB
Python

import base64
import logging
from typing import List, Sequence
import requests
from llama_index.schema import ImageDocument
logger = logging.getLogger(__name__)
def load_image_urls(image_urls: List[str]) -> List[ImageDocument]:
# load remote image urls into image documents
image_documents = []
for i in range(len(image_urls)):
new_image_document = ImageDocument(image_url=image_urls[i])
image_documents.append(new_image_document)
return image_documents
# Function to encode the image to base64 content
def encode_image(image_path: str) -> str:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
# Supporting Ollama like Multi-Modal images base64 encoding
def image_documents_to_base64(
image_documents: Sequence[ImageDocument],
) -> List[str]:
image_encodings = []
# encode image documents to base64
for image_document in image_documents:
if image_document.image:
image_encodings.append(image_document.image)
elif image_document.image_path:
image_encodings.append(encode_image(image_document.image_path))
elif (
"file_path" in image_document.metadata
and image_document.metadata["file_path"] != ""
):
image_encodings.append(encode_image(image_document.metadata["file_path"]))
elif image_document.image_url:
response = requests.get(image_document.image_url)
try:
image_encodings.append(
base64.b64encode(response.content).decode("utf-8")
)
except Exception as e:
logger.warning(f"Cannot encode the image url-> {e}")
return image_encodings