faiss_rag_enterprise/llama_index/question_gen/openai_generator.py

from typing import List, Optional, Sequence, cast

from llama_index.llms.llm import LLM
from llama_index.llms.openai import OpenAI
from llama_index.program.openai_program import OpenAIPydanticProgram
from llama_index.prompts.mixin import PromptDictType
from llama_index.question_gen.prompts import build_tools_text
from llama_index.question_gen.types import (
    BaseQuestionGenerator,
    SubQuestion,
    SubQuestionList,
)
from llama_index.schema import QueryBundle
from llama_index.tools.types import ToolMetadata

DEFAULT_MODEL_NAME = "gpt-3.5-turbo-0613"

DEFAULT_OPENAI_SUB_QUESTION_PROMPT_TMPL = """\
You are a world class state of the art agent.

You have access to multiple tools, each representing a different data source or API.
Each of the tools has a name and a description, formatted as a JSON dictionary.
The keys of the dictionary are the names of the tools and the values are the \
descriptions.
Your purpose is to help answer a complex user question by generating a list of sub \
questions that can be answered by the tools.

These are the guidelines you consider when completing your task:
* Be as specific as possible
* The sub questions should be relevant to the user question
* The sub questions should be answerable by the tools provided
* You can generate multiple sub questions for each tool
* Tools must be specified by their name, not their description
* You don't need to use a tool if you don't think it's relevant

Output the list of sub questions by calling the SubQuestionList function.

## Tools
```json
{tools_str}
```

## User Question
{query_str}
"""


class OpenAIQuestionGenerator(BaseQuestionGenerator):
    def __init__(
        self,
        program: OpenAIPydanticProgram,
        verbose: bool = False,
    ) -> None:
        self._program = program
        self._verbose = verbose

    @classmethod
    def from_defaults(
        cls,
        prompt_template_str: str = DEFAULT_OPENAI_SUB_QUESTION_PROMPT_TMPL,
        llm: Optional[LLM] = None,
        verbose: bool = False,
    ) -> "OpenAIQuestionGenerator":
        llm = llm or OpenAI(model=DEFAULT_MODEL_NAME)
        program = OpenAIPydanticProgram.from_defaults(
            output_cls=SubQuestionList,
            llm=llm,
            prompt_template_str=prompt_template_str,
            verbose=verbose,
        )
        return cls(program, verbose)

    def _get_prompts(self) -> PromptDictType:
        """Get prompts."""
        return {"question_gen_prompt": self._program.prompt}

    def _update_prompts(self, prompts: PromptDictType) -> None:
        """Update prompts."""
        if "question_gen_prompt" in prompts:
            self._program.prompt = prompts["question_gen_prompt"]

    def generate(
        self, tools: Sequence[ToolMetadata], query: QueryBundle
    ) -> List[SubQuestion]:
        tools_str = build_tools_text(tools)
        query_str = query.query_str
        question_list = cast(
            SubQuestionList, self._program(query_str=query_str, tools_str=tools_str)
        )
        return question_list.items

    async def agenerate(
        self, tools: Sequence[ToolMetadata], query: QueryBundle
    ) -> List[SubQuestion]:
        tools_str = build_tools_text(tools)
        query_str = query.query_str
        question_list = cast(
            SubQuestionList,
            await self._program.acall(query_str=query_str, tools_str=tools_str),
        )
        assert isinstance(question_list, SubQuestionList)
        return question_list.items