"""Set of default prompts.""" from llama_index.prompts.base import PromptTemplate from llama_index.prompts.prompt_type import PromptType ############################################ # Tree ############################################ DEFAULT_SUMMARY_PROMPT_TMPL = ( "Write a summary of the following. Try to use only the " "information provided. " "Try to include as many key details as possible.\n" "\n" "\n" "{context_str}\n" "\n" "\n" 'SUMMARY:"""\n' ) DEFAULT_SUMMARY_PROMPT = PromptTemplate( DEFAULT_SUMMARY_PROMPT_TMPL, prompt_type=PromptType.SUMMARY ) # insert prompts DEFAULT_INSERT_PROMPT_TMPL = ( "Context information is below. It is provided in a numbered list " "(1 to {num_chunks}), " "where each item in the list corresponds to a summary.\n" "---------------------\n" "{context_list}" "---------------------\n" "Given the context information, here is a new piece of " "information: {new_chunk_text}\n" "Answer with the number corresponding to the summary that should be updated. " "The answer should be the number corresponding to the " "summary that is most relevant to the question.\n" ) DEFAULT_INSERT_PROMPT = PromptTemplate( DEFAULT_INSERT_PROMPT_TMPL, prompt_type=PromptType.TREE_INSERT ) # # single choice DEFAULT_QUERY_PROMPT_TMPL = ( "Some choices are given below. It is provided in a numbered list " "(1 to {num_chunks}), " "where each item in the list corresponds to a summary.\n" "---------------------\n" "{context_list}" "\n---------------------\n" "Using only the choices above and not prior knowledge, return " "the choice that is most relevant to the question: '{query_str}'\n" "Provide choice in the following format: 'ANSWER: ' and explain why " "this summary was selected in relation to the question.\n" ) DEFAULT_QUERY_PROMPT = PromptTemplate( DEFAULT_QUERY_PROMPT_TMPL, prompt_type=PromptType.TREE_SELECT ) # multiple choice DEFAULT_QUERY_PROMPT_MULTIPLE_TMPL = ( "Some choices are given below. It is provided in a numbered " "list (1 to {num_chunks}), " "where each item in the list corresponds to a summary.\n" "---------------------\n" "{context_list}" "\n---------------------\n" "Using only the choices above and not prior knowledge, return the top choices " "(no more than {branching_factor}, ranked by most relevant to least) that " "are most relevant to the question: '{query_str}'\n" "Provide choices in the following format: 'ANSWER: ' and explain why " "these summaries were selected in relation to the question.\n" ) DEFAULT_QUERY_PROMPT_MULTIPLE = PromptTemplate( DEFAULT_QUERY_PROMPT_MULTIPLE_TMPL, prompt_type=PromptType.TREE_SELECT_MULTIPLE ) DEFAULT_REFINE_PROMPT_TMPL = ( "The original query is as follows: {query_str}\n" "We have provided an existing answer: {existing_answer}\n" "We have the opportunity to refine the existing answer " "(only if needed) with some more context below.\n" "------------\n" "{context_msg}\n" "------------\n" "Given the new context, refine the original answer to better " "answer the query. " "If the context isn't useful, return the original answer.\n" "Refined Answer: " ) DEFAULT_REFINE_PROMPT = PromptTemplate( DEFAULT_REFINE_PROMPT_TMPL, prompt_type=PromptType.REFINE ) DEFAULT_TEXT_QA_PROMPT_TMPL = ( "Context information is below.\n" "---------------------\n" "{context_str}\n" "---------------------\n" "Given the context information and not prior knowledge, " "answer the query.\n" "Query: {query_str}\n" "Answer: " ) DEFAULT_TEXT_QA_PROMPT = PromptTemplate( DEFAULT_TEXT_QA_PROMPT_TMPL, prompt_type=PromptType.QUESTION_ANSWER ) DEFAULT_TREE_SUMMARIZE_TMPL = ( "Context information from multiple sources is below.\n" "---------------------\n" "{context_str}\n" "---------------------\n" "Given the information from multiple sources and not prior knowledge, " "answer the query.\n" "Query: {query_str}\n" "Answer: " ) DEFAULT_TREE_SUMMARIZE_PROMPT = PromptTemplate( DEFAULT_TREE_SUMMARIZE_TMPL, prompt_type=PromptType.SUMMARY ) ############################################ # Keyword Table ############################################ DEFAULT_KEYWORD_EXTRACT_TEMPLATE_TMPL = ( "Some text is provided below. Given the text, extract up to {max_keywords} " "keywords from the text. Avoid stopwords." "---------------------\n" "{text}\n" "---------------------\n" "Provide keywords in the following comma-separated format: 'KEYWORDS: '\n" ) DEFAULT_KEYWORD_EXTRACT_TEMPLATE = PromptTemplate( DEFAULT_KEYWORD_EXTRACT_TEMPLATE_TMPL, prompt_type=PromptType.KEYWORD_EXTRACT ) # NOTE: the keyword extraction for queries can be the same as # the one used to build the index, but here we tune it to see if performance is better. DEFAULT_QUERY_KEYWORD_EXTRACT_TEMPLATE_TMPL = ( "A question is provided below. Given the question, extract up to {max_keywords} " "keywords from the text. Focus on extracting the keywords that we can use " "to best lookup answers to the question. Avoid stopwords.\n" "---------------------\n" "{question}\n" "---------------------\n" "Provide keywords in the following comma-separated format: 'KEYWORDS: '\n" ) DEFAULT_QUERY_KEYWORD_EXTRACT_TEMPLATE = PromptTemplate( DEFAULT_QUERY_KEYWORD_EXTRACT_TEMPLATE_TMPL, prompt_type=PromptType.QUERY_KEYWORD_EXTRACT, ) ############################################ # Structured Store ############################################ DEFAULT_SCHEMA_EXTRACT_TMPL = ( "We wish to extract relevant fields from an unstructured text chunk into " "a structured schema. We first provide the unstructured text, and then " "we provide the schema that we wish to extract. " "-----------text-----------\n" "{text}\n" "-----------schema-----------\n" "{schema}\n" "---------------------\n" "Given the text and schema, extract the relevant fields from the text in " "the following format: " "field1: \nfield2: \n...\n\n" "If a field is not present in the text, don't include it in the output." "If no fields are present in the text, return a blank string.\n" "Fields: " ) DEFAULT_SCHEMA_EXTRACT_PROMPT = PromptTemplate( DEFAULT_SCHEMA_EXTRACT_TMPL, prompt_type=PromptType.SCHEMA_EXTRACT ) # NOTE: taken from langchain and adapted # https://github.com/langchain-ai/langchain/blob/v0.0.303/libs/langchain/langchain/chains/sql_database/prompt.py DEFAULT_TEXT_TO_SQL_TMPL = ( "Given an input question, first create a syntactically correct {dialect} " "query to run, then look at the results of the query and return the answer. " "You can order the results by a relevant column to return the most " "interesting examples in the database.\n\n" "Never query for all the columns from a specific table, only ask for a " "few relevant columns given the question.\n\n" "Pay attention to use only the column names that you can see in the schema " "description. " "Be careful to not query for columns that do not exist. " "Pay attention to which column is in which table. " "Also, qualify column names with the table name when needed. " "You are required to use the following format, each taking one line:\n\n" "Question: Question here\n" "SQLQuery: SQL Query to run\n" "SQLResult: Result of the SQLQuery\n" "Answer: Final answer here\n\n" "Only use tables listed below.\n" "{schema}\n\n" "Question: {query_str}\n" "SQLQuery: " ) DEFAULT_TEXT_TO_SQL_PROMPT = PromptTemplate( DEFAULT_TEXT_TO_SQL_TMPL, prompt_type=PromptType.TEXT_TO_SQL, ) DEFAULT_TEXT_TO_SQL_PGVECTOR_TMPL = """\ Given an input question, first create a syntactically correct {dialect} \ query to run, then look at the results of the query and return the answer. \ You can order the results by a relevant column to return the most \ interesting examples in the database. Pay attention to use only the column names that you can see in the schema \ description. Be careful to not query for columns that do not exist. \ Pay attention to which column is in which table. Also, qualify column names \ with the table name when needed. IMPORTANT NOTE: you can use specialized pgvector syntax (`<->`) to do nearest \ neighbors/semantic search to a given vector from an embeddings column in the table. \ The embeddings value for a given row typically represents the semantic meaning of that row. \ The vector represents an embedding representation \ of the question, given below. Do NOT fill in the vector values directly, but rather specify a \ `[query_vector]` placeholder. For instance, some select statement examples below \ (the name of the embeddings column is `embedding`): SELECT * FROM items ORDER BY embedding <-> '[query_vector]' LIMIT 5; SELECT * FROM items WHERE id != 1 ORDER BY embedding <-> (SELECT embedding FROM items WHERE id = 1) LIMIT 5; SELECT * FROM items WHERE embedding <-> '[query_vector]' < 5; You are required to use the following format, \ each taking one line: Question: Question here SQLQuery: SQL Query to run SQLResult: Result of the SQLQuery Answer: Final answer here Only use tables listed below. {schema} Question: {query_str} SQLQuery: \ """ DEFAULT_TEXT_TO_SQL_PGVECTOR_PROMPT = PromptTemplate( DEFAULT_TEXT_TO_SQL_PGVECTOR_TMPL, prompt_type=PromptType.TEXT_TO_SQL, ) # NOTE: by partially filling schema, we can reduce to a QuestionAnswer prompt # that we can feed to ur table DEFAULT_TABLE_CONTEXT_TMPL = ( "We have provided a table schema below. " "---------------------\n" "{schema}\n" "---------------------\n" "We have also provided context information below. " "{context_str}\n" "---------------------\n" "Given the context information and the table schema, " "give a response to the following task: {query_str}" ) DEFAULT_TABLE_CONTEXT_QUERY = ( "Provide a high-level description of the table, " "as well as a description of each column in the table. " "Provide answers in the following format:\n" "TableDescription: \n" "Column1Description: \n" "Column2Description: \n" "...\n\n" ) DEFAULT_TABLE_CONTEXT_PROMPT = PromptTemplate( DEFAULT_TABLE_CONTEXT_TMPL, prompt_type=PromptType.TABLE_CONTEXT ) # NOTE: by partially filling schema, we can reduce to a refine prompt # that we can feed to ur table DEFAULT_REFINE_TABLE_CONTEXT_TMPL = ( "We have provided a table schema below. " "---------------------\n" "{schema}\n" "---------------------\n" "We have also provided some context information below. " "{context_msg}\n" "---------------------\n" "Given the context information and the table schema, " "give a response to the following task: {query_str}\n" "We have provided an existing answer: {existing_answer}\n" "Given the new context, refine the original answer to better " "answer the question. " "If the context isn't useful, return the original answer." ) DEFAULT_REFINE_TABLE_CONTEXT_PROMPT = PromptTemplate( DEFAULT_REFINE_TABLE_CONTEXT_TMPL, prompt_type=PromptType.TABLE_CONTEXT ) ############################################ # Knowledge-Graph Table ############################################ DEFAULT_KG_TRIPLET_EXTRACT_TMPL = ( "Some text is provided below. Given the text, extract up to " "{max_knowledge_triplets} " "knowledge triplets in the form of (subject, predicate, object). Avoid stopwords.\n" "---------------------\n" "Example:" "Text: Alice is Bob's mother." "Triplets:\n(Alice, is mother of, Bob)\n" "Text: Philz is a coffee shop founded in Berkeley in 1982.\n" "Triplets:\n" "(Philz, is, coffee shop)\n" "(Philz, founded in, Berkeley)\n" "(Philz, founded in, 1982)\n" "---------------------\n" "Text: {text}\n" "Triplets:\n" ) DEFAULT_KG_TRIPLET_EXTRACT_PROMPT = PromptTemplate( DEFAULT_KG_TRIPLET_EXTRACT_TMPL, prompt_type=PromptType.KNOWLEDGE_TRIPLET_EXTRACT, ) ############################################ # HYDE ############################################## HYDE_TMPL = ( "Please write a passage to answer the question\n" "Try to include as many key details as possible.\n" "\n" "\n" "{context_str}\n" "\n" "\n" 'Passage:"""\n' ) DEFAULT_HYDE_PROMPT = PromptTemplate(HYDE_TMPL, prompt_type=PromptType.SUMMARY) ############################################ # Simple Input ############################################ DEFAULT_SIMPLE_INPUT_TMPL = "{query_str}" DEFAULT_SIMPLE_INPUT_PROMPT = PromptTemplate( DEFAULT_SIMPLE_INPUT_TMPL, prompt_type=PromptType.SIMPLE_INPUT ) ############################################ # Pandas ############################################ DEFAULT_PANDAS_TMPL = ( "You are working with a pandas dataframe in Python.\n" "The name of the dataframe is `df`.\n" "This is the result of `print(df.head())`:\n" "{df_str}\n\n" "Follow these instructions:\n" "{instruction_str}\n" "Query: {query_str}\n\n" "Expression:" ) DEFAULT_PANDAS_PROMPT = PromptTemplate( DEFAULT_PANDAS_TMPL, prompt_type=PromptType.PANDAS ) ############################################ # JSON Path ############################################ DEFAULT_JSON_PATH_TMPL = ( "We have provided a JSON schema below:\n" "{schema}\n" "Given a task, respond with a JSON Path query that " "can retrieve data from a JSON value that matches the schema.\n" "Task: {query_str}\n" "JSONPath: " ) DEFAULT_JSON_PATH_PROMPT = PromptTemplate( DEFAULT_JSON_PATH_TMPL, prompt_type=PromptType.JSON_PATH ) ############################################ # Choice Select ############################################ DEFAULT_CHOICE_SELECT_PROMPT_TMPL = ( "A list of documents is shown below. Each document has a number next to it along " "with a summary of the document. A question is also provided. \n" "Respond with the numbers of the documents " "you should consult to answer the question, in order of relevance, as well \n" "as the relevance score. The relevance score is a number from 1-10 based on " "how relevant you think the document is to the question.\n" "Do not include any documents that are not relevant to the question. \n" "Example format: \n" "Document 1:\n\n\n" "Document 2:\n\n\n" "...\n\n" "Document 10:\n\n\n" "Question: \n" "Answer:\n" "Doc: 9, Relevance: 7\n" "Doc: 3, Relevance: 4\n" "Doc: 7, Relevance: 3\n\n" "Let's try this now: \n\n" "{context_str}\n" "Question: {query_str}\n" "Answer:\n" ) DEFAULT_CHOICE_SELECT_PROMPT = PromptTemplate( DEFAULT_CHOICE_SELECT_PROMPT_TMPL, prompt_type=PromptType.CHOICE_SELECT ) ############################################ # RankGPT Rerank template ############################################ RANKGPT_RERANK_PROMPT_TMPL = ( "Search Query: {query}. \nRank the {num} passages above " "based on their relevance to the search query. The passages " "should be listed in descending order using identifiers. " "The most relevant passages should be listed first. " "The output format should be [] > [], e.g., [1] > [2]. " "Only response the ranking results, " "do not say any word or explain." ) RANKGPT_RERANK_PROMPT = PromptTemplate( RANKGPT_RERANK_PROMPT_TMPL, prompt_type=PromptType.RANKGPT_RERANK ) ############################################ # JSONalyze Query Template ############################################ DEFAULT_JSONALYZE_PROMPT_TMPL = ( "You are given a table named: '{table_name}' with schema, " "generate SQLite SQL query to answer the given question.\n" "Table schema:\n" "{table_schema}\n" "Question: {question}\n\n" "SQLQuery: " ) DEFAULT_JSONALYZE_PROMPT = PromptTemplate( DEFAULT_JSONALYZE_PROMPT_TMPL, prompt_type=PromptType.TEXT_TO_SQL )