faiss_rag_enterprise/llama_index/readers/base.py

71 lines
2.4 KiB
Python

"""Base reader class."""
from abc import ABC
from typing import TYPE_CHECKING, Any, Dict, Iterable, List
if TYPE_CHECKING:
from llama_index.bridge.langchain import Document as LCDocument
from llama_index.bridge.pydantic import Field
from llama_index.schema import BaseComponent, Document
class BaseReader(ABC):
"""Utilities for loading data from a directory."""
def lazy_load_data(self, *args: Any, **load_kwargs: Any) -> Iterable[Document]:
"""Load data from the input directory lazily."""
raise NotImplementedError(
f"{self.__class__.__name__} does not provide lazy_load_data method currently"
)
def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
"""Load data from the input directory."""
return list(self.lazy_load_data(*args, **load_kwargs))
def load_langchain_documents(self, **load_kwargs: Any) -> List["LCDocument"]:
"""Load data in LangChain document format."""
docs = self.load_data(**load_kwargs)
return [d.to_langchain_format() for d in docs]
class BasePydanticReader(BaseReader, BaseComponent):
"""Serialiable Data Loader with Pydatnic."""
is_remote: bool = Field(
default=False,
description="Whether the data is loaded from a remote API or a local file.",
)
class Config:
arbitrary_types_allowed = True
class ReaderConfig(BaseComponent):
"""Represents a reader and it's input arguments."""
reader: BasePydanticReader = Field(..., description="Reader to use.")
reader_args: List[Any] = Field(default_factory=list, description="Reader args.")
reader_kwargs: Dict[str, Any] = Field(
default_factory=dict, description="Reader kwargs."
)
class Config:
arbitrary_types_allowed = True
@classmethod
def class_name(cls) -> str:
"""Get the name identifier of the class."""
return "ReaderConfig"
def to_dict(self, **kwargs: Any) -> Dict[str, Any]:
"""Convert the class to a dictionary."""
return {
"loader": self.reader.to_dict(**kwargs),
"reader_args": self.reader_args,
"reader_kwargs": self.reader_kwargs,
"class_name": self.class_name(),
}
def read(self) -> List[Document]:
"""Call the loader with the given arguments."""
return self.reader.load_data(*self.reader_args, **self.reader_kwargs)