faiss_rag_enterprise/llama_index/indices/list/base.py

144 lines
4.7 KiB
Python

"""Summary index.
A simple data structure where LlamaIndex iterates through document chunks
in sequence in order to answer a given query.
"""
from enum import Enum
from typing import Any, Dict, Optional, Sequence, Union
from llama_index.core.base_retriever import BaseRetriever
from llama_index.data_structs.data_structs import IndexList
from llama_index.indices.base import BaseIndex
from llama_index.schema import BaseNode, IndexNode
from llama_index.service_context import ServiceContext
from llama_index.storage.docstore.types import RefDocInfo
from llama_index.utils import get_tqdm_iterable
class ListRetrieverMode(str, Enum):
DEFAULT = "default"
EMBEDDING = "embedding"
LLM = "llm"
class SummaryIndex(BaseIndex[IndexList]):
"""Summary Index.
The summary index is a simple data structure where nodes are stored in
a sequence. During index construction, the document texts are
chunked up, converted to nodes, and stored in a list.
During query time, the summary index iterates through the nodes
with some optional filter parameters, and synthesizes an
answer from all the nodes.
Args:
text_qa_template (Optional[BasePromptTemplate]): A Question-Answer Prompt
(see :ref:`Prompt-Templates`).
NOTE: this is a deprecated field.
show_progress (bool): Whether to show tqdm progress bars. Defaults to False.
"""
index_struct_cls = IndexList
def __init__(
self,
nodes: Optional[Sequence[BaseNode]] = None,
objects: Optional[Sequence[IndexNode]] = None,
index_struct: Optional[IndexList] = None,
service_context: Optional[ServiceContext] = None,
show_progress: bool = False,
**kwargs: Any,
) -> None:
"""Initialize params."""
super().__init__(
nodes=nodes,
index_struct=index_struct,
service_context=service_context,
show_progress=show_progress,
objects=objects,
**kwargs,
)
def as_retriever(
self,
retriever_mode: Union[str, ListRetrieverMode] = ListRetrieverMode.DEFAULT,
**kwargs: Any,
) -> BaseRetriever:
from llama_index.indices.list.retrievers import (
SummaryIndexEmbeddingRetriever,
SummaryIndexLLMRetriever,
SummaryIndexRetriever,
)
if retriever_mode == ListRetrieverMode.DEFAULT:
return SummaryIndexRetriever(self, object_map=self._object_map, **kwargs)
elif retriever_mode == ListRetrieverMode.EMBEDDING:
return SummaryIndexEmbeddingRetriever(
self, object_map=self._object_map, **kwargs
)
elif retriever_mode == ListRetrieverMode.LLM:
return SummaryIndexLLMRetriever(self, object_map=self._object_map, **kwargs)
else:
raise ValueError(f"Unknown retriever mode: {retriever_mode}")
def _build_index_from_nodes(
self, nodes: Sequence[BaseNode], show_progress: bool = False
) -> IndexList:
"""Build the index from documents.
Args:
documents (List[BaseDocument]): A list of documents.
Returns:
IndexList: The created summary index.
"""
index_struct = IndexList()
nodes_with_progress = get_tqdm_iterable(
nodes, show_progress, "Processing nodes"
)
for n in nodes_with_progress:
index_struct.add_node(n)
return index_struct
def _insert(self, nodes: Sequence[BaseNode], **insert_kwargs: Any) -> None:
"""Insert a document."""
for n in nodes:
self._index_struct.add_node(n)
def _delete_node(self, node_id: str, **delete_kwargs: Any) -> None:
"""Delete a node."""
cur_node_ids = self._index_struct.nodes
cur_nodes = self._docstore.get_nodes(cur_node_ids)
nodes_to_keep = [n for n in cur_nodes if n.node_id != node_id]
self._index_struct.nodes = [n.node_id for n in nodes_to_keep]
@property
def ref_doc_info(self) -> Dict[str, RefDocInfo]:
"""Retrieve a dict mapping of ingested documents and their nodes+metadata."""
node_doc_ids = self._index_struct.nodes
nodes = self.docstore.get_nodes(node_doc_ids)
all_ref_doc_info = {}
for node in nodes:
ref_node = node.source_node
if not ref_node:
continue
ref_doc_info = self.docstore.get_ref_doc_info(ref_node.node_id)
if not ref_doc_info:
continue
all_ref_doc_info[ref_node.node_id] = ref_doc_info
return all_ref_doc_info
# Legacy
GPTListIndex = SummaryIndex
# New name
ListIndex = SummaryIndex