faiss_rag_enterprise/llama_index/storage/storage_context.py

226 lines
8.5 KiB
Python

import os
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, Optional, Union
import fsspec
from llama_index.constants import (
DOC_STORE_KEY,
GRAPH_STORE_KEY,
INDEX_STORE_KEY,
VECTOR_STORE_KEY,
)
from llama_index.graph_stores.simple import DEFAULT_PERSIST_FNAME as GRAPH_STORE_FNAME
from llama_index.graph_stores.simple import SimpleGraphStore
from llama_index.graph_stores.types import GraphStore
from llama_index.storage.docstore.simple_docstore import SimpleDocumentStore
from llama_index.storage.docstore.types import DEFAULT_PERSIST_FNAME as DOCSTORE_FNAME
from llama_index.storage.docstore.types import BaseDocumentStore
from llama_index.storage.index_store.simple_index_store import SimpleIndexStore
from llama_index.storage.index_store.types import (
DEFAULT_PERSIST_FNAME as INDEX_STORE_FNAME,
)
from llama_index.storage.index_store.types import BaseIndexStore
from llama_index.utils import concat_dirs
from llama_index.vector_stores.simple import DEFAULT_PERSIST_FNAME as VECTOR_STORE_FNAME
from llama_index.vector_stores.simple import (
DEFAULT_VECTOR_STORE,
NAMESPACE_SEP,
SimpleVectorStore,
)
from llama_index.vector_stores.types import BasePydanticVectorStore, VectorStore
DEFAULT_PERSIST_DIR = "./storage"
IMAGE_STORE_FNAME = "image_store.json"
IMAGE_VECTOR_STORE_NAMESPACE = "image"
@dataclass
class StorageContext:
"""Storage context.
The storage context container is a utility container for storing nodes,
indices, and vectors. It contains the following:
- docstore: BaseDocumentStore
- index_store: BaseIndexStore
- vector_store: VectorStore
- graph_store: GraphStore
"""
docstore: BaseDocumentStore
index_store: BaseIndexStore
vector_stores: Dict[str, VectorStore]
graph_store: GraphStore
@classmethod
def from_defaults(
cls,
docstore: Optional[BaseDocumentStore] = None,
index_store: Optional[BaseIndexStore] = None,
vector_store: Optional[Union[VectorStore, BasePydanticVectorStore]] = None,
image_store: Optional[VectorStore] = None,
vector_stores: Optional[
Dict[str, Union[VectorStore, BasePydanticVectorStore]]
] = None,
graph_store: Optional[GraphStore] = None,
persist_dir: Optional[str] = None,
fs: Optional[fsspec.AbstractFileSystem] = None,
) -> "StorageContext":
"""Create a StorageContext from defaults.
Args:
docstore (Optional[BaseDocumentStore]): document store
index_store (Optional[BaseIndexStore]): index store
vector_store (Optional[VectorStore]): vector store
graph_store (Optional[GraphStore]): graph store
image_store (Optional[VectorStore]): image store
"""
if persist_dir is None:
docstore = docstore or SimpleDocumentStore()
index_store = index_store or SimpleIndexStore()
graph_store = graph_store or SimpleGraphStore()
image_store = image_store or SimpleVectorStore()
if vector_store:
vector_stores = {DEFAULT_VECTOR_STORE: vector_store}
else:
vector_stores = vector_stores or {
DEFAULT_VECTOR_STORE: SimpleVectorStore()
}
if image_store:
# append image store to vector stores
vector_stores[IMAGE_VECTOR_STORE_NAMESPACE] = image_store
else:
docstore = docstore or SimpleDocumentStore.from_persist_dir(
persist_dir, fs=fs
)
index_store = index_store or SimpleIndexStore.from_persist_dir(
persist_dir, fs=fs
)
graph_store = graph_store or SimpleGraphStore.from_persist_dir(
persist_dir, fs=fs
)
if vector_store:
vector_stores = {DEFAULT_VECTOR_STORE: vector_store}
elif vector_stores:
vector_stores = vector_stores
else:
vector_stores = SimpleVectorStore.from_namespaced_persist_dir(
persist_dir, fs=fs
)
if image_store:
# append image store to vector stores
vector_stores[IMAGE_VECTOR_STORE_NAMESPACE] = image_store
return cls(
docstore=docstore,
index_store=index_store,
vector_stores=vector_stores,
graph_store=graph_store,
)
def persist(
self,
persist_dir: Union[str, os.PathLike] = DEFAULT_PERSIST_DIR,
docstore_fname: str = DOCSTORE_FNAME,
index_store_fname: str = INDEX_STORE_FNAME,
vector_store_fname: str = VECTOR_STORE_FNAME,
image_store_fname: str = IMAGE_STORE_FNAME,
graph_store_fname: str = GRAPH_STORE_FNAME,
fs: Optional[fsspec.AbstractFileSystem] = None,
) -> None:
"""Persist the storage context.
Args:
persist_dir (str): directory to persist the storage context
"""
if fs is not None:
persist_dir = str(persist_dir) # NOTE: doesn't support Windows here
docstore_path = concat_dirs(persist_dir, docstore_fname)
index_store_path = concat_dirs(persist_dir, index_store_fname)
graph_store_path = concat_dirs(persist_dir, graph_store_fname)
else:
persist_dir = Path(persist_dir)
docstore_path = str(persist_dir / docstore_fname)
index_store_path = str(persist_dir / index_store_fname)
graph_store_path = str(persist_dir / graph_store_fname)
self.docstore.persist(persist_path=docstore_path, fs=fs)
self.index_store.persist(persist_path=index_store_path, fs=fs)
self.graph_store.persist(persist_path=graph_store_path, fs=fs)
# save each vector store under it's namespace
for vector_store_name, vector_store in self.vector_stores.items():
if fs is not None:
vector_store_path = concat_dirs(
str(persist_dir),
f"{vector_store_name}{NAMESPACE_SEP}{vector_store_fname}",
)
else:
vector_store_path = str(
Path(persist_dir)
/ f"{vector_store_name}{NAMESPACE_SEP}{vector_store_fname}"
)
vector_store.persist(persist_path=vector_store_path, fs=fs)
def to_dict(self) -> dict:
all_simple = (
isinstance(self.docstore, SimpleDocumentStore)
and isinstance(self.index_store, SimpleIndexStore)
and isinstance(self.graph_store, SimpleGraphStore)
and all(
isinstance(vs, SimpleVectorStore) for vs in self.vector_stores.values()
)
)
if not all_simple:
raise ValueError(
"to_dict only available when using simple doc/index/vector stores"
)
assert isinstance(self.docstore, SimpleDocumentStore)
assert isinstance(self.index_store, SimpleIndexStore)
assert isinstance(self.graph_store, SimpleGraphStore)
return {
VECTOR_STORE_KEY: {
key: vector_store.to_dict()
for key, vector_store in self.vector_stores.items()
if isinstance(vector_store, SimpleVectorStore)
},
DOC_STORE_KEY: self.docstore.to_dict(),
INDEX_STORE_KEY: self.index_store.to_dict(),
GRAPH_STORE_KEY: self.graph_store.to_dict(),
}
@classmethod
def from_dict(cls, save_dict: dict) -> "StorageContext":
"""Create a StorageContext from dict."""
docstore = SimpleDocumentStore.from_dict(save_dict[DOC_STORE_KEY])
index_store = SimpleIndexStore.from_dict(save_dict[INDEX_STORE_KEY])
graph_store = SimpleGraphStore.from_dict(save_dict[GRAPH_STORE_KEY])
vector_stores: Dict[str, VectorStore] = {}
for key, vector_store_dict in save_dict[VECTOR_STORE_KEY].items():
vector_stores[key] = SimpleVectorStore.from_dict(vector_store_dict)
return cls(
docstore=docstore,
index_store=index_store,
vector_stores=vector_stores,
graph_store=graph_store,
)
@property
def vector_store(self) -> VectorStore:
"""Backwrds compatibility for vector_store property."""
return self.vector_stores[DEFAULT_VECTOR_STORE]
def add_vector_store(self, vector_store: VectorStore, namespace: str) -> None:
"""Add a vector store to the storage context."""
self.vector_stores[namespace] = vector_store