faiss_rag_enterprise/llama_index/indices/managed/vectara/retriever.py

320 lines
11 KiB
Python

"""Vectara index.
An index that is built on top of Vectara.
"""
import json
import logging
from typing import Any, List, Optional, Tuple
from llama_index.callbacks.base import CallbackManager
from llama_index.core.base_retriever import BaseRetriever
from llama_index.indices.managed.types import ManagedIndexQueryMode
from llama_index.indices.managed.vectara.base import VectaraIndex
from llama_index.indices.managed.vectara.prompts import (
DEFAULT_VECTARA_QUERY_PROMPT_TMPL,
)
from llama_index.indices.vector_store.retrievers.auto_retriever.auto_retriever import (
VectorIndexAutoRetriever,
)
from llama_index.schema import NodeWithScore, QueryBundle, TextNode
from llama_index.vector_stores.types import (
FilterCondition,
MetadataFilters,
VectorStoreInfo,
VectorStoreQuerySpec,
)
_logger = logging.getLogger(__name__)
class VectaraRetriever(BaseRetriever):
"""Vectara Retriever.
Args:
index (VectaraIndex): the Vectara Index
similarity_top_k (int): number of top k results to return, defaults to 5.
vectara_query_mode (str): vector store query mode
See reference for vectara_query_mode for full list of supported modes.
lambda_val (float): for hybrid search.
0 = neural search only.
1 = keyword match only.
In between values are a linear interpolation
n_sentences_before (int):
number of sentences before the matched sentence to return in the node
n_sentences_after (int):
number of sentences after the matched sentence to return in the node
filter: metadata filter (if specified)
mmr_k: number of results to fetch for MMR, defaults to 50
mmr_diversity_bias: number between 0 and 1 that determines the degree
of diversity among the results with 0 corresponding
to minimum diversity and 1 to maximum diversity.
Defaults to 0.3.
summary_enabled: whether to generate summaries or not. Defaults to False.
summary_response_lang: language to use for summary generation.
summary_num_results: number of results to use for summary generation.
summary_prompt_name: name of the prompt to use for summary generation.
"""
def __init__(
self,
index: VectaraIndex,
similarity_top_k: int = 5,
vectara_query_mode: ManagedIndexQueryMode = ManagedIndexQueryMode.DEFAULT,
lambda_val: float = 0.025,
n_sentences_before: int = 2,
n_sentences_after: int = 2,
filter: str = "",
mmr_k: int = 50,
mmr_diversity_bias: float = 0.3,
summary_enabled: bool = False,
summary_response_lang: str = "eng",
summary_num_results: int = 7,
summary_prompt_name: str = "vectara-experimental-summary-ext-2023-10-23-small",
callback_manager: Optional[CallbackManager] = None,
**kwargs: Any,
) -> None:
"""Initialize params."""
self._index = index
self._similarity_top_k = similarity_top_k
self._lambda_val = lambda_val
self._n_sentences_before = n_sentences_before
self._n_sentences_after = n_sentences_after
self._filter = filter
if vectara_query_mode == ManagedIndexQueryMode.MMR:
self._mmr = True
self._mmr_k = mmr_k
self._mmr_diversity_bias = mmr_diversity_bias
else:
self._mmr = False
if summary_enabled:
self._summary_enabled = True
self._summary_response_lang = summary_response_lang
self._summary_num_results = summary_num_results
self._summary_prompt_name = summary_prompt_name
else:
self._summary_enabled = False
super().__init__(callback_manager)
def _get_post_headers(self) -> dict:
"""Returns headers that should be attached to each post request."""
return {
"x-api-key": self._index._vectara_api_key,
"customer-id": self._index._vectara_customer_id,
"Content-Type": "application/json",
"X-Source": "llama_index",
}
@property
def similarity_top_k(self) -> int:
"""Return similarity top k."""
return self._similarity_top_k
@similarity_top_k.setter
def similarity_top_k(self, similarity_top_k: int) -> None:
"""Set similarity top k."""
self._similarity_top_k = similarity_top_k
def _retrieve(
self,
query_bundle: QueryBundle,
**kwargs: Any,
) -> List[NodeWithScore]:
"""Retrieve top k most similar nodes.
Args:
query: Query Bundle
"""
return self._vectara_query(query_bundle, **kwargs)[0] # return top_nodes only
def _vectara_query(
self,
query_bundle: QueryBundle,
**kwargs: Any,
) -> Tuple[List[NodeWithScore], str]:
"""Query Vectara index to get for top k most similar nodes.
Args:
query: Query Bundle
"""
corpus_key = {
"customerId": self._index._vectara_customer_id,
"corpusId": self._index._vectara_corpus_id,
"lexicalInterpolationConfig": {"lambda": self._lambda_val},
}
if len(self._filter) > 0:
corpus_key["metadataFilter"] = self._filter
data = {
"query": [
{
"query": query_bundle.query_str,
"start": 0,
"numResults": self._mmr_k if self._mmr else self._similarity_top_k,
"contextConfig": {
"sentencesBefore": self._n_sentences_before,
"sentencesAfter": self._n_sentences_after,
},
"corpusKey": [corpus_key],
}
]
}
if self._mmr:
data["query"][0]["rerankingConfig"] = {
"rerankerId": 272725718,
"mmrConfig": {"diversityBias": self._mmr_diversity_bias},
}
if self._summary_enabled:
data["query"][0]["summary"] = [
{
"responseLang": self._summary_response_lang,
"maxSummarizedResults": self._summary_num_results,
"summarizerPromptName": self._summary_prompt_name,
}
]
response = self._index._session.post(
headers=self._get_post_headers(),
url="https://api.vectara.io/v1/query",
data=json.dumps(data),
timeout=self._index.vectara_api_timeout,
)
if response.status_code != 200:
_logger.error(
"Query failed %s",
f"(code {response.status_code}, reason {response.reason}, details "
f"{response.text})",
)
return [], ""
result = response.json()
responses = result["responseSet"][0]["response"]
documents = result["responseSet"][0]["document"]
summary = (
result["responseSet"][0]["summary"][0]["text"]
if self._summary_enabled
else None
)
metadatas = []
for x in responses:
md = {m["name"]: m["value"] for m in x["metadata"]}
doc_num = x["documentIndex"]
doc_md = {m["name"]: m["value"] for m in documents[doc_num]["metadata"]}
md.update(doc_md)
metadatas.append(md)
top_nodes = []
for x, md in zip(responses, metadatas):
doc_inx = x["documentIndex"]
doc_id = documents[doc_inx]["id"]
node = NodeWithScore(
node=TextNode(text=x["text"], id_=doc_id, metadata=md), score=x["score"] # type: ignore
)
top_nodes.append(node)
return top_nodes[: self._similarity_top_k], summary
async def _avectara_query(
self, query_bundle: QueryBundle
) -> Tuple[List[NodeWithScore], str]:
"""Asynchronously retrieve nodes given query.
Implemented by the user.
"""
return self._vectara_query(query_bundle)
class VectaraAutoRetriever(VectorIndexAutoRetriever):
"""Managed Index auto retriever.
A retriever for a Vectara index that uses an LLM to automatically set
filtering query parameters.
Based on VectorStoreAutoRetriever, and uses some of the vector_store
types that are associated with auto retrieval.
Args:
index (VectaraIndex): Vectara Index instance
vector_store_info (VectorStoreInfo): additional information about
vector store content and supported metadata filters. The natural language
description is used by an LLM to automatically set vector store query
parameters.
Other variables are the same as VectorStoreAutoRetriever or VectaraRetriever
"""
def __init__(
self,
index: VectaraIndex,
vector_store_info: VectorStoreInfo,
**kwargs: Any,
) -> None:
super().__init__(index, vector_store_info, prompt_template_str=DEFAULT_VECTARA_QUERY_PROMPT_TMPL, **kwargs) # type: ignore
self._index = index # type: ignore
self._kwargs = kwargs
self._verbose = self._kwargs.get("verbose", False)
self._explicit_filter = self._kwargs.pop("filter", "")
def _build_retriever_from_spec(
self, spec: VectorStoreQuerySpec
) -> Tuple[VectaraRetriever, QueryBundle]:
query_bundle = self._get_query_bundle(spec.query)
filter_list = [
(filter.key, filter.operator.value, filter.value) for filter in spec.filters
]
if self._verbose:
print(f"Using query str: {spec.query}")
print(f"Using implicit filters: {filter_list}")
# create filter string from implicit filters
if len(spec.filters) == 0:
filter_str = ""
else:
filters = MetadataFilters(
filters=[*spec.filters, *self._extra_filters.filters]
)
condition = " and " if filters.condition == FilterCondition.AND else " or "
filter_str = condition.join(
[
f"(doc.{f.key} {f.operator.value} '{f.value}')"
for f in filters.filters
]
)
# add explicit filter if specified
if self._explicit_filter:
if len(filter_str) > 0:
filter_str = f"({filter_str}) and ({self._explicit_filter})"
else:
filter_str = self._explicit_filter
if self._verbose:
print(f"final filter string: {filter_str}")
return (
VectaraRetriever(
index=self._index, # type: ignore
filter=filter_str,
**self._kwargs,
),
query_bundle,
)
def _vectara_query(
self,
query_bundle: QueryBundle,
**kwargs: Any,
) -> Tuple[List[NodeWithScore], str]:
spec = self.generate_retrieval_spec(query_bundle)
vectara_retriever, new_query = self._build_retriever_from_spec(
VectorStoreQuerySpec(
query=spec.query, filters=spec.filters, top_k=self._similarity_top_k
)
)
return vectara_retriever._vectara_query(new_query, **kwargs)