320 lines
11 KiB
Python
320 lines
11 KiB
Python
"""Vectara index.
|
|
An index that is built on top of Vectara.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
from typing import Any, List, Optional, Tuple
|
|
|
|
from llama_index.callbacks.base import CallbackManager
|
|
from llama_index.core.base_retriever import BaseRetriever
|
|
from llama_index.indices.managed.types import ManagedIndexQueryMode
|
|
from llama_index.indices.managed.vectara.base import VectaraIndex
|
|
from llama_index.indices.managed.vectara.prompts import (
|
|
DEFAULT_VECTARA_QUERY_PROMPT_TMPL,
|
|
)
|
|
from llama_index.indices.vector_store.retrievers.auto_retriever.auto_retriever import (
|
|
VectorIndexAutoRetriever,
|
|
)
|
|
from llama_index.schema import NodeWithScore, QueryBundle, TextNode
|
|
from llama_index.vector_stores.types import (
|
|
FilterCondition,
|
|
MetadataFilters,
|
|
VectorStoreInfo,
|
|
VectorStoreQuerySpec,
|
|
)
|
|
|
|
_logger = logging.getLogger(__name__)
|
|
|
|
|
|
class VectaraRetriever(BaseRetriever):
|
|
"""Vectara Retriever.
|
|
|
|
Args:
|
|
index (VectaraIndex): the Vectara Index
|
|
similarity_top_k (int): number of top k results to return, defaults to 5.
|
|
vectara_query_mode (str): vector store query mode
|
|
See reference for vectara_query_mode for full list of supported modes.
|
|
lambda_val (float): for hybrid search.
|
|
0 = neural search only.
|
|
1 = keyword match only.
|
|
In between values are a linear interpolation
|
|
n_sentences_before (int):
|
|
number of sentences before the matched sentence to return in the node
|
|
n_sentences_after (int):
|
|
number of sentences after the matched sentence to return in the node
|
|
filter: metadata filter (if specified)
|
|
mmr_k: number of results to fetch for MMR, defaults to 50
|
|
mmr_diversity_bias: number between 0 and 1 that determines the degree
|
|
of diversity among the results with 0 corresponding
|
|
to minimum diversity and 1 to maximum diversity.
|
|
Defaults to 0.3.
|
|
summary_enabled: whether to generate summaries or not. Defaults to False.
|
|
summary_response_lang: language to use for summary generation.
|
|
summary_num_results: number of results to use for summary generation.
|
|
summary_prompt_name: name of the prompt to use for summary generation.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
index: VectaraIndex,
|
|
similarity_top_k: int = 5,
|
|
vectara_query_mode: ManagedIndexQueryMode = ManagedIndexQueryMode.DEFAULT,
|
|
lambda_val: float = 0.025,
|
|
n_sentences_before: int = 2,
|
|
n_sentences_after: int = 2,
|
|
filter: str = "",
|
|
mmr_k: int = 50,
|
|
mmr_diversity_bias: float = 0.3,
|
|
summary_enabled: bool = False,
|
|
summary_response_lang: str = "eng",
|
|
summary_num_results: int = 7,
|
|
summary_prompt_name: str = "vectara-experimental-summary-ext-2023-10-23-small",
|
|
callback_manager: Optional[CallbackManager] = None,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
"""Initialize params."""
|
|
self._index = index
|
|
self._similarity_top_k = similarity_top_k
|
|
self._lambda_val = lambda_val
|
|
self._n_sentences_before = n_sentences_before
|
|
self._n_sentences_after = n_sentences_after
|
|
self._filter = filter
|
|
|
|
if vectara_query_mode == ManagedIndexQueryMode.MMR:
|
|
self._mmr = True
|
|
self._mmr_k = mmr_k
|
|
self._mmr_diversity_bias = mmr_diversity_bias
|
|
else:
|
|
self._mmr = False
|
|
|
|
if summary_enabled:
|
|
self._summary_enabled = True
|
|
self._summary_response_lang = summary_response_lang
|
|
self._summary_num_results = summary_num_results
|
|
self._summary_prompt_name = summary_prompt_name
|
|
else:
|
|
self._summary_enabled = False
|
|
super().__init__(callback_manager)
|
|
|
|
def _get_post_headers(self) -> dict:
|
|
"""Returns headers that should be attached to each post request."""
|
|
return {
|
|
"x-api-key": self._index._vectara_api_key,
|
|
"customer-id": self._index._vectara_customer_id,
|
|
"Content-Type": "application/json",
|
|
"X-Source": "llama_index",
|
|
}
|
|
|
|
@property
|
|
def similarity_top_k(self) -> int:
|
|
"""Return similarity top k."""
|
|
return self._similarity_top_k
|
|
|
|
@similarity_top_k.setter
|
|
def similarity_top_k(self, similarity_top_k: int) -> None:
|
|
"""Set similarity top k."""
|
|
self._similarity_top_k = similarity_top_k
|
|
|
|
def _retrieve(
|
|
self,
|
|
query_bundle: QueryBundle,
|
|
**kwargs: Any,
|
|
) -> List[NodeWithScore]:
|
|
"""Retrieve top k most similar nodes.
|
|
|
|
Args:
|
|
query: Query Bundle
|
|
"""
|
|
return self._vectara_query(query_bundle, **kwargs)[0] # return top_nodes only
|
|
|
|
def _vectara_query(
|
|
self,
|
|
query_bundle: QueryBundle,
|
|
**kwargs: Any,
|
|
) -> Tuple[List[NodeWithScore], str]:
|
|
"""Query Vectara index to get for top k most similar nodes.
|
|
|
|
Args:
|
|
query: Query Bundle
|
|
"""
|
|
corpus_key = {
|
|
"customerId": self._index._vectara_customer_id,
|
|
"corpusId": self._index._vectara_corpus_id,
|
|
"lexicalInterpolationConfig": {"lambda": self._lambda_val},
|
|
}
|
|
if len(self._filter) > 0:
|
|
corpus_key["metadataFilter"] = self._filter
|
|
|
|
data = {
|
|
"query": [
|
|
{
|
|
"query": query_bundle.query_str,
|
|
"start": 0,
|
|
"numResults": self._mmr_k if self._mmr else self._similarity_top_k,
|
|
"contextConfig": {
|
|
"sentencesBefore": self._n_sentences_before,
|
|
"sentencesAfter": self._n_sentences_after,
|
|
},
|
|
"corpusKey": [corpus_key],
|
|
}
|
|
]
|
|
}
|
|
if self._mmr:
|
|
data["query"][0]["rerankingConfig"] = {
|
|
"rerankerId": 272725718,
|
|
"mmrConfig": {"diversityBias": self._mmr_diversity_bias},
|
|
}
|
|
|
|
if self._summary_enabled:
|
|
data["query"][0]["summary"] = [
|
|
{
|
|
"responseLang": self._summary_response_lang,
|
|
"maxSummarizedResults": self._summary_num_results,
|
|
"summarizerPromptName": self._summary_prompt_name,
|
|
}
|
|
]
|
|
|
|
response = self._index._session.post(
|
|
headers=self._get_post_headers(),
|
|
url="https://api.vectara.io/v1/query",
|
|
data=json.dumps(data),
|
|
timeout=self._index.vectara_api_timeout,
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
_logger.error(
|
|
"Query failed %s",
|
|
f"(code {response.status_code}, reason {response.reason}, details "
|
|
f"{response.text})",
|
|
)
|
|
return [], ""
|
|
|
|
result = response.json()
|
|
|
|
responses = result["responseSet"][0]["response"]
|
|
documents = result["responseSet"][0]["document"]
|
|
summary = (
|
|
result["responseSet"][0]["summary"][0]["text"]
|
|
if self._summary_enabled
|
|
else None
|
|
)
|
|
|
|
metadatas = []
|
|
for x in responses:
|
|
md = {m["name"]: m["value"] for m in x["metadata"]}
|
|
doc_num = x["documentIndex"]
|
|
doc_md = {m["name"]: m["value"] for m in documents[doc_num]["metadata"]}
|
|
md.update(doc_md)
|
|
metadatas.append(md)
|
|
|
|
top_nodes = []
|
|
for x, md in zip(responses, metadatas):
|
|
doc_inx = x["documentIndex"]
|
|
doc_id = documents[doc_inx]["id"]
|
|
node = NodeWithScore(
|
|
node=TextNode(text=x["text"], id_=doc_id, metadata=md), score=x["score"] # type: ignore
|
|
)
|
|
top_nodes.append(node)
|
|
|
|
return top_nodes[: self._similarity_top_k], summary
|
|
|
|
async def _avectara_query(
|
|
self, query_bundle: QueryBundle
|
|
) -> Tuple[List[NodeWithScore], str]:
|
|
"""Asynchronously retrieve nodes given query.
|
|
|
|
Implemented by the user.
|
|
|
|
"""
|
|
return self._vectara_query(query_bundle)
|
|
|
|
|
|
class VectaraAutoRetriever(VectorIndexAutoRetriever):
|
|
"""Managed Index auto retriever.
|
|
|
|
A retriever for a Vectara index that uses an LLM to automatically set
|
|
filtering query parameters.
|
|
Based on VectorStoreAutoRetriever, and uses some of the vector_store
|
|
types that are associated with auto retrieval.
|
|
|
|
Args:
|
|
index (VectaraIndex): Vectara Index instance
|
|
vector_store_info (VectorStoreInfo): additional information about
|
|
vector store content and supported metadata filters. The natural language
|
|
description is used by an LLM to automatically set vector store query
|
|
parameters.
|
|
Other variables are the same as VectorStoreAutoRetriever or VectaraRetriever
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
index: VectaraIndex,
|
|
vector_store_info: VectorStoreInfo,
|
|
**kwargs: Any,
|
|
) -> None:
|
|
super().__init__(index, vector_store_info, prompt_template_str=DEFAULT_VECTARA_QUERY_PROMPT_TMPL, **kwargs) # type: ignore
|
|
self._index = index # type: ignore
|
|
self._kwargs = kwargs
|
|
self._verbose = self._kwargs.get("verbose", False)
|
|
self._explicit_filter = self._kwargs.pop("filter", "")
|
|
|
|
def _build_retriever_from_spec(
|
|
self, spec: VectorStoreQuerySpec
|
|
) -> Tuple[VectaraRetriever, QueryBundle]:
|
|
query_bundle = self._get_query_bundle(spec.query)
|
|
|
|
filter_list = [
|
|
(filter.key, filter.operator.value, filter.value) for filter in spec.filters
|
|
]
|
|
if self._verbose:
|
|
print(f"Using query str: {spec.query}")
|
|
print(f"Using implicit filters: {filter_list}")
|
|
|
|
# create filter string from implicit filters
|
|
if len(spec.filters) == 0:
|
|
filter_str = ""
|
|
else:
|
|
filters = MetadataFilters(
|
|
filters=[*spec.filters, *self._extra_filters.filters]
|
|
)
|
|
condition = " and " if filters.condition == FilterCondition.AND else " or "
|
|
filter_str = condition.join(
|
|
[
|
|
f"(doc.{f.key} {f.operator.value} '{f.value}')"
|
|
for f in filters.filters
|
|
]
|
|
)
|
|
|
|
# add explicit filter if specified
|
|
if self._explicit_filter:
|
|
if len(filter_str) > 0:
|
|
filter_str = f"({filter_str}) and ({self._explicit_filter})"
|
|
else:
|
|
filter_str = self._explicit_filter
|
|
|
|
if self._verbose:
|
|
print(f"final filter string: {filter_str}")
|
|
|
|
return (
|
|
VectaraRetriever(
|
|
index=self._index, # type: ignore
|
|
filter=filter_str,
|
|
**self._kwargs,
|
|
),
|
|
query_bundle,
|
|
)
|
|
|
|
def _vectara_query(
|
|
self,
|
|
query_bundle: QueryBundle,
|
|
**kwargs: Any,
|
|
) -> Tuple[List[NodeWithScore], str]:
|
|
spec = self.generate_retrieval_spec(query_bundle)
|
|
vectara_retriever, new_query = self._build_retriever_from_spec(
|
|
VectorStoreQuerySpec(
|
|
query=spec.query, filters=spec.filters, top_k=self._similarity_top_k
|
|
)
|
|
)
|
|
return vectara_retriever._vectara_query(new_query, **kwargs)
|