91 lines
2.5 KiB
Python
91 lines
2.5 KiB
Python
from llama_index.constants import DATA_KEY, TYPE_KEY
|
|
from llama_index.schema import (
|
|
BaseNode,
|
|
Document,
|
|
ImageDocument,
|
|
ImageNode,
|
|
IndexNode,
|
|
NodeRelationship,
|
|
RelatedNodeInfo,
|
|
TextNode,
|
|
)
|
|
|
|
|
|
def doc_to_json(doc: BaseNode) -> dict:
|
|
return {
|
|
DATA_KEY: doc.dict(),
|
|
TYPE_KEY: doc.get_type(),
|
|
}
|
|
|
|
|
|
def json_to_doc(doc_dict: dict) -> BaseNode:
|
|
doc_type = doc_dict[TYPE_KEY]
|
|
data_dict = doc_dict[DATA_KEY]
|
|
doc: BaseNode
|
|
|
|
if "extra_info" in data_dict:
|
|
return legacy_json_to_doc(doc_dict)
|
|
else:
|
|
if doc_type == Document.get_type():
|
|
doc = Document.parse_obj(data_dict)
|
|
elif doc_type == ImageDocument.get_type():
|
|
doc = ImageDocument.parse_obj(data_dict)
|
|
elif doc_type == TextNode.get_type():
|
|
doc = TextNode.parse_obj(data_dict)
|
|
elif doc_type == ImageNode.get_type():
|
|
doc = ImageNode.parse_obj(data_dict)
|
|
elif doc_type == IndexNode.get_type():
|
|
doc = IndexNode.parse_obj(data_dict)
|
|
else:
|
|
raise ValueError(f"Unknown doc type: {doc_type}")
|
|
|
|
return doc
|
|
|
|
|
|
def legacy_json_to_doc(doc_dict: dict) -> BaseNode:
|
|
"""Todo: Deprecated legacy support for old node versions."""
|
|
doc_type = doc_dict[TYPE_KEY]
|
|
data_dict = doc_dict[DATA_KEY]
|
|
doc: BaseNode
|
|
|
|
text = data_dict.get("text", "")
|
|
metadata = data_dict.get("extra_info", {}) or {}
|
|
id_ = data_dict.get("doc_id", None)
|
|
|
|
relationships = data_dict.get("relationships", {})
|
|
relationships = {
|
|
NodeRelationship(k): RelatedNodeInfo(node_id=v)
|
|
for k, v in relationships.items()
|
|
}
|
|
|
|
if doc_type == Document.get_type():
|
|
doc = Document(
|
|
text=text, metadata=metadata, id=id_, relationships=relationships
|
|
)
|
|
elif doc_type == TextNode.get_type():
|
|
doc = TextNode(
|
|
text=text, metadata=metadata, id=id_, relationships=relationships
|
|
)
|
|
elif doc_type == ImageNode.get_type():
|
|
image = data_dict.get("image", None)
|
|
doc = ImageNode(
|
|
text=text,
|
|
metadata=metadata,
|
|
id=id_,
|
|
relationships=relationships,
|
|
image=image,
|
|
)
|
|
elif doc_type == IndexNode.get_type():
|
|
index_id = data_dict.get("index_id", None)
|
|
doc = IndexNode(
|
|
text=text,
|
|
metadata=metadata,
|
|
id=id_,
|
|
relationships=relationships,
|
|
index_id=index_id,
|
|
)
|
|
else:
|
|
raise ValueError(f"Unknown doc type: {doc_type}")
|
|
|
|
return doc
|