faiss_rag_enterprise/llama_index/prompts/default_prompts.py

468 lines
16 KiB
Python

"""Set of default prompts."""
from llama_index.prompts.base import PromptTemplate
from llama_index.prompts.prompt_type import PromptType
############################################
# Tree
############################################
DEFAULT_SUMMARY_PROMPT_TMPL = (
"Write a summary of the following. Try to use only the "
"information provided. "
"Try to include as many key details as possible.\n"
"\n"
"\n"
"{context_str}\n"
"\n"
"\n"
'SUMMARY:"""\n'
)
DEFAULT_SUMMARY_PROMPT = PromptTemplate(
DEFAULT_SUMMARY_PROMPT_TMPL, prompt_type=PromptType.SUMMARY
)
# insert prompts
DEFAULT_INSERT_PROMPT_TMPL = (
"Context information is below. It is provided in a numbered list "
"(1 to {num_chunks}), "
"where each item in the list corresponds to a summary.\n"
"---------------------\n"
"{context_list}"
"---------------------\n"
"Given the context information, here is a new piece of "
"information: {new_chunk_text}\n"
"Answer with the number corresponding to the summary that should be updated. "
"The answer should be the number corresponding to the "
"summary that is most relevant to the question.\n"
)
DEFAULT_INSERT_PROMPT = PromptTemplate(
DEFAULT_INSERT_PROMPT_TMPL, prompt_type=PromptType.TREE_INSERT
)
# # single choice
DEFAULT_QUERY_PROMPT_TMPL = (
"Some choices are given below. It is provided in a numbered list "
"(1 to {num_chunks}), "
"where each item in the list corresponds to a summary.\n"
"---------------------\n"
"{context_list}"
"\n---------------------\n"
"Using only the choices above and not prior knowledge, return "
"the choice that is most relevant to the question: '{query_str}'\n"
"Provide choice in the following format: 'ANSWER: <number>' and explain why "
"this summary was selected in relation to the question.\n"
)
DEFAULT_QUERY_PROMPT = PromptTemplate(
DEFAULT_QUERY_PROMPT_TMPL, prompt_type=PromptType.TREE_SELECT
)
# multiple choice
DEFAULT_QUERY_PROMPT_MULTIPLE_TMPL = (
"Some choices are given below. It is provided in a numbered "
"list (1 to {num_chunks}), "
"where each item in the list corresponds to a summary.\n"
"---------------------\n"
"{context_list}"
"\n---------------------\n"
"Using only the choices above and not prior knowledge, return the top choices "
"(no more than {branching_factor}, ranked by most relevant to least) that "
"are most relevant to the question: '{query_str}'\n"
"Provide choices in the following format: 'ANSWER: <numbers>' and explain why "
"these summaries were selected in relation to the question.\n"
)
DEFAULT_QUERY_PROMPT_MULTIPLE = PromptTemplate(
DEFAULT_QUERY_PROMPT_MULTIPLE_TMPL, prompt_type=PromptType.TREE_SELECT_MULTIPLE
)
DEFAULT_REFINE_PROMPT_TMPL = (
"The original query is as follows: {query_str}\n"
"We have provided an existing answer: {existing_answer}\n"
"We have the opportunity to refine the existing answer "
"(only if needed) with some more context below.\n"
"------------\n"
"{context_msg}\n"
"------------\n"
"Given the new context, refine the original answer to better "
"answer the query. "
"If the context isn't useful, return the original answer.\n"
"Refined Answer: "
)
DEFAULT_REFINE_PROMPT = PromptTemplate(
DEFAULT_REFINE_PROMPT_TMPL, prompt_type=PromptType.REFINE
)
DEFAULT_TEXT_QA_PROMPT_TMPL = (
"Context information is below.\n"
"---------------------\n"
"{context_str}\n"
"---------------------\n"
"Given the context information and not prior knowledge, "
"answer the query.\n"
"Query: {query_str}\n"
"Answer: "
)
DEFAULT_TEXT_QA_PROMPT = PromptTemplate(
DEFAULT_TEXT_QA_PROMPT_TMPL, prompt_type=PromptType.QUESTION_ANSWER
)
DEFAULT_TREE_SUMMARIZE_TMPL = (
"Context information from multiple sources is below.\n"
"---------------------\n"
"{context_str}\n"
"---------------------\n"
"Given the information from multiple sources and not prior knowledge, "
"answer the query.\n"
"Query: {query_str}\n"
"Answer: "
)
DEFAULT_TREE_SUMMARIZE_PROMPT = PromptTemplate(
DEFAULT_TREE_SUMMARIZE_TMPL, prompt_type=PromptType.SUMMARY
)
############################################
# Keyword Table
############################################
DEFAULT_KEYWORD_EXTRACT_TEMPLATE_TMPL = (
"Some text is provided below. Given the text, extract up to {max_keywords} "
"keywords from the text. Avoid stopwords."
"---------------------\n"
"{text}\n"
"---------------------\n"
"Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'\n"
)
DEFAULT_KEYWORD_EXTRACT_TEMPLATE = PromptTemplate(
DEFAULT_KEYWORD_EXTRACT_TEMPLATE_TMPL, prompt_type=PromptType.KEYWORD_EXTRACT
)
# NOTE: the keyword extraction for queries can be the same as
# the one used to build the index, but here we tune it to see if performance is better.
DEFAULT_QUERY_KEYWORD_EXTRACT_TEMPLATE_TMPL = (
"A question is provided below. Given the question, extract up to {max_keywords} "
"keywords from the text. Focus on extracting the keywords that we can use "
"to best lookup answers to the question. Avoid stopwords.\n"
"---------------------\n"
"{question}\n"
"---------------------\n"
"Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'\n"
)
DEFAULT_QUERY_KEYWORD_EXTRACT_TEMPLATE = PromptTemplate(
DEFAULT_QUERY_KEYWORD_EXTRACT_TEMPLATE_TMPL,
prompt_type=PromptType.QUERY_KEYWORD_EXTRACT,
)
############################################
# Structured Store
############################################
DEFAULT_SCHEMA_EXTRACT_TMPL = (
"We wish to extract relevant fields from an unstructured text chunk into "
"a structured schema. We first provide the unstructured text, and then "
"we provide the schema that we wish to extract. "
"-----------text-----------\n"
"{text}\n"
"-----------schema-----------\n"
"{schema}\n"
"---------------------\n"
"Given the text and schema, extract the relevant fields from the text in "
"the following format: "
"field1: <value>\nfield2: <value>\n...\n\n"
"If a field is not present in the text, don't include it in the output."
"If no fields are present in the text, return a blank string.\n"
"Fields: "
)
DEFAULT_SCHEMA_EXTRACT_PROMPT = PromptTemplate(
DEFAULT_SCHEMA_EXTRACT_TMPL, prompt_type=PromptType.SCHEMA_EXTRACT
)
# NOTE: taken from langchain and adapted
# https://github.com/langchain-ai/langchain/blob/v0.0.303/libs/langchain/langchain/chains/sql_database/prompt.py
DEFAULT_TEXT_TO_SQL_TMPL = (
"Given an input question, first create a syntactically correct {dialect} "
"query to run, then look at the results of the query and return the answer. "
"You can order the results by a relevant column to return the most "
"interesting examples in the database.\n\n"
"Never query for all the columns from a specific table, only ask for a "
"few relevant columns given the question.\n\n"
"Pay attention to use only the column names that you can see in the schema "
"description. "
"Be careful to not query for columns that do not exist. "
"Pay attention to which column is in which table. "
"Also, qualify column names with the table name when needed. "
"You are required to use the following format, each taking one line:\n\n"
"Question: Question here\n"
"SQLQuery: SQL Query to run\n"
"SQLResult: Result of the SQLQuery\n"
"Answer: Final answer here\n\n"
"Only use tables listed below.\n"
"{schema}\n\n"
"Question: {query_str}\n"
"SQLQuery: "
)
DEFAULT_TEXT_TO_SQL_PROMPT = PromptTemplate(
DEFAULT_TEXT_TO_SQL_TMPL,
prompt_type=PromptType.TEXT_TO_SQL,
)
DEFAULT_TEXT_TO_SQL_PGVECTOR_TMPL = """\
Given an input question, first create a syntactically correct {dialect} \
query to run, then look at the results of the query and return the answer. \
You can order the results by a relevant column to return the most \
interesting examples in the database.
Pay attention to use only the column names that you can see in the schema \
description. Be careful to not query for columns that do not exist. \
Pay attention to which column is in which table. Also, qualify column names \
with the table name when needed.
IMPORTANT NOTE: you can use specialized pgvector syntax (`<->`) to do nearest \
neighbors/semantic search to a given vector from an embeddings column in the table. \
The embeddings value for a given row typically represents the semantic meaning of that row. \
The vector represents an embedding representation \
of the question, given below. Do NOT fill in the vector values directly, but rather specify a \
`[query_vector]` placeholder. For instance, some select statement examples below \
(the name of the embeddings column is `embedding`):
SELECT * FROM items ORDER BY embedding <-> '[query_vector]' LIMIT 5;
SELECT * FROM items WHERE id != 1 ORDER BY embedding <-> (SELECT embedding FROM items WHERE id = 1) LIMIT 5;
SELECT * FROM items WHERE embedding <-> '[query_vector]' < 5;
You are required to use the following format, \
each taking one line:
Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result of the SQLQuery
Answer: Final answer here
Only use tables listed below.
{schema}
Question: {query_str}
SQLQuery: \
"""
DEFAULT_TEXT_TO_SQL_PGVECTOR_PROMPT = PromptTemplate(
DEFAULT_TEXT_TO_SQL_PGVECTOR_TMPL,
prompt_type=PromptType.TEXT_TO_SQL,
)
# NOTE: by partially filling schema, we can reduce to a QuestionAnswer prompt
# that we can feed to ur table
DEFAULT_TABLE_CONTEXT_TMPL = (
"We have provided a table schema below. "
"---------------------\n"
"{schema}\n"
"---------------------\n"
"We have also provided context information below. "
"{context_str}\n"
"---------------------\n"
"Given the context information and the table schema, "
"give a response to the following task: {query_str}"
)
DEFAULT_TABLE_CONTEXT_QUERY = (
"Provide a high-level description of the table, "
"as well as a description of each column in the table. "
"Provide answers in the following format:\n"
"TableDescription: <description>\n"
"Column1Description: <description>\n"
"Column2Description: <description>\n"
"...\n\n"
)
DEFAULT_TABLE_CONTEXT_PROMPT = PromptTemplate(
DEFAULT_TABLE_CONTEXT_TMPL, prompt_type=PromptType.TABLE_CONTEXT
)
# NOTE: by partially filling schema, we can reduce to a refine prompt
# that we can feed to ur table
DEFAULT_REFINE_TABLE_CONTEXT_TMPL = (
"We have provided a table schema below. "
"---------------------\n"
"{schema}\n"
"---------------------\n"
"We have also provided some context information below. "
"{context_msg}\n"
"---------------------\n"
"Given the context information and the table schema, "
"give a response to the following task: {query_str}\n"
"We have provided an existing answer: {existing_answer}\n"
"Given the new context, refine the original answer to better "
"answer the question. "
"If the context isn't useful, return the original answer."
)
DEFAULT_REFINE_TABLE_CONTEXT_PROMPT = PromptTemplate(
DEFAULT_REFINE_TABLE_CONTEXT_TMPL, prompt_type=PromptType.TABLE_CONTEXT
)
############################################
# Knowledge-Graph Table
############################################
DEFAULT_KG_TRIPLET_EXTRACT_TMPL = (
"Some text is provided below. Given the text, extract up to "
"{max_knowledge_triplets} "
"knowledge triplets in the form of (subject, predicate, object). Avoid stopwords.\n"
"---------------------\n"
"Example:"
"Text: Alice is Bob's mother."
"Triplets:\n(Alice, is mother of, Bob)\n"
"Text: Philz is a coffee shop founded in Berkeley in 1982.\n"
"Triplets:\n"
"(Philz, is, coffee shop)\n"
"(Philz, founded in, Berkeley)\n"
"(Philz, founded in, 1982)\n"
"---------------------\n"
"Text: {text}\n"
"Triplets:\n"
)
DEFAULT_KG_TRIPLET_EXTRACT_PROMPT = PromptTemplate(
DEFAULT_KG_TRIPLET_EXTRACT_TMPL,
prompt_type=PromptType.KNOWLEDGE_TRIPLET_EXTRACT,
)
############################################
# HYDE
##############################################
HYDE_TMPL = (
"Please write a passage to answer the question\n"
"Try to include as many key details as possible.\n"
"\n"
"\n"
"{context_str}\n"
"\n"
"\n"
'Passage:"""\n'
)
DEFAULT_HYDE_PROMPT = PromptTemplate(HYDE_TMPL, prompt_type=PromptType.SUMMARY)
############################################
# Simple Input
############################################
DEFAULT_SIMPLE_INPUT_TMPL = "{query_str}"
DEFAULT_SIMPLE_INPUT_PROMPT = PromptTemplate(
DEFAULT_SIMPLE_INPUT_TMPL, prompt_type=PromptType.SIMPLE_INPUT
)
############################################
# Pandas
############################################
DEFAULT_PANDAS_TMPL = (
"You are working with a pandas dataframe in Python.\n"
"The name of the dataframe is `df`.\n"
"This is the result of `print(df.head())`:\n"
"{df_str}\n\n"
"Follow these instructions:\n"
"{instruction_str}\n"
"Query: {query_str}\n\n"
"Expression:"
)
DEFAULT_PANDAS_PROMPT = PromptTemplate(
DEFAULT_PANDAS_TMPL, prompt_type=PromptType.PANDAS
)
############################################
# JSON Path
############################################
DEFAULT_JSON_PATH_TMPL = (
"We have provided a JSON schema below:\n"
"{schema}\n"
"Given a task, respond with a JSON Path query that "
"can retrieve data from a JSON value that matches the schema.\n"
"Task: {query_str}\n"
"JSONPath: "
)
DEFAULT_JSON_PATH_PROMPT = PromptTemplate(
DEFAULT_JSON_PATH_TMPL, prompt_type=PromptType.JSON_PATH
)
############################################
# Choice Select
############################################
DEFAULT_CHOICE_SELECT_PROMPT_TMPL = (
"A list of documents is shown below. Each document has a number next to it along "
"with a summary of the document. A question is also provided. \n"
"Respond with the numbers of the documents "
"you should consult to answer the question, in order of relevance, as well \n"
"as the relevance score. The relevance score is a number from 1-10 based on "
"how relevant you think the document is to the question.\n"
"Do not include any documents that are not relevant to the question. \n"
"Example format: \n"
"Document 1:\n<summary of document 1>\n\n"
"Document 2:\n<summary of document 2>\n\n"
"...\n\n"
"Document 10:\n<summary of document 10>\n\n"
"Question: <question>\n"
"Answer:\n"
"Doc: 9, Relevance: 7\n"
"Doc: 3, Relevance: 4\n"
"Doc: 7, Relevance: 3\n\n"
"Let's try this now: \n\n"
"{context_str}\n"
"Question: {query_str}\n"
"Answer:\n"
)
DEFAULT_CHOICE_SELECT_PROMPT = PromptTemplate(
DEFAULT_CHOICE_SELECT_PROMPT_TMPL, prompt_type=PromptType.CHOICE_SELECT
)
############################################
# RankGPT Rerank template
############################################
RANKGPT_RERANK_PROMPT_TMPL = (
"Search Query: {query}. \nRank the {num} passages above "
"based on their relevance to the search query. The passages "
"should be listed in descending order using identifiers. "
"The most relevant passages should be listed first. "
"The output format should be [] > [], e.g., [1] > [2]. "
"Only response the ranking results, "
"do not say any word or explain."
)
RANKGPT_RERANK_PROMPT = PromptTemplate(
RANKGPT_RERANK_PROMPT_TMPL, prompt_type=PromptType.RANKGPT_RERANK
)
############################################
# JSONalyze Query Template
############################################
DEFAULT_JSONALYZE_PROMPT_TMPL = (
"You are given a table named: '{table_name}' with schema, "
"generate SQLite SQL query to answer the given question.\n"
"Table schema:\n"
"{table_schema}\n"
"Question: {question}\n\n"
"SQLQuery: "
)
DEFAULT_JSONALYZE_PROMPT = PromptTemplate(
DEFAULT_JSONALYZE_PROMPT_TMPL, prompt_type=PromptType.TEXT_TO_SQL
)