faiss_rag_enterprise/llama_index/storage/docstore/utils.py

91 lines
2.5 KiB
Python

from llama_index.constants import DATA_KEY, TYPE_KEY
from llama_index.schema import (
BaseNode,
Document,
ImageDocument,
ImageNode,
IndexNode,
NodeRelationship,
RelatedNodeInfo,
TextNode,
)
def doc_to_json(doc: BaseNode) -> dict:
return {
DATA_KEY: doc.dict(),
TYPE_KEY: doc.get_type(),
}
def json_to_doc(doc_dict: dict) -> BaseNode:
doc_type = doc_dict[TYPE_KEY]
data_dict = doc_dict[DATA_KEY]
doc: BaseNode
if "extra_info" in data_dict:
return legacy_json_to_doc(doc_dict)
else:
if doc_type == Document.get_type():
doc = Document.parse_obj(data_dict)
elif doc_type == ImageDocument.get_type():
doc = ImageDocument.parse_obj(data_dict)
elif doc_type == TextNode.get_type():
doc = TextNode.parse_obj(data_dict)
elif doc_type == ImageNode.get_type():
doc = ImageNode.parse_obj(data_dict)
elif doc_type == IndexNode.get_type():
doc = IndexNode.parse_obj(data_dict)
else:
raise ValueError(f"Unknown doc type: {doc_type}")
return doc
def legacy_json_to_doc(doc_dict: dict) -> BaseNode:
"""Todo: Deprecated legacy support for old node versions."""
doc_type = doc_dict[TYPE_KEY]
data_dict = doc_dict[DATA_KEY]
doc: BaseNode
text = data_dict.get("text", "")
metadata = data_dict.get("extra_info", {}) or {}
id_ = data_dict.get("doc_id", None)
relationships = data_dict.get("relationships", {})
relationships = {
NodeRelationship(k): RelatedNodeInfo(node_id=v)
for k, v in relationships.items()
}
if doc_type == Document.get_type():
doc = Document(
text=text, metadata=metadata, id=id_, relationships=relationships
)
elif doc_type == TextNode.get_type():
doc = TextNode(
text=text, metadata=metadata, id=id_, relationships=relationships
)
elif doc_type == ImageNode.get_type():
image = data_dict.get("image", None)
doc = ImageNode(
text=text,
metadata=metadata,
id=id_,
relationships=relationships,
image=image,
)
elif doc_type == IndexNode.get_type():
index_id = data_dict.get("index_id", None)
doc = IndexNode(
text=text,
metadata=metadata,
id=id_,
relationships=relationships,
index_id=index_id,
)
else:
raise ValueError(f"Unknown doc type: {doc_type}")
return doc