faiss_rag_enterprise/llama_index/query_engine/pandas/output_parser.py

"""Pandas output parser."""

import logging
from typing import Any, Dict, Optional

import numpy as np
import pandas as pd

from llama_index.exec_utils import safe_eval, safe_exec
from llama_index.output_parsers.base import ChainableOutputParser
from llama_index.output_parsers.utils import parse_code_markdown

logger = logging.getLogger(__name__)


def default_output_processor(
    output: str, df: pd.DataFrame, **output_kwargs: Any
) -> str:
    """Process outputs in a default manner."""
    import ast
    import sys
    import traceback

    if sys.version_info < (3, 9):
        logger.warning(
            "Python version must be >= 3.9 in order to use "
            "the default output processor, which executes "
            "the Python query. Instead, we will return the "
            "raw Python instructions as a string."
        )
        return output

    local_vars = {"df": df}

    output = parse_code_markdown(output, only_last=True)[0]

    # NOTE: inspired from langchain's tool
    # see langchain.tools.python.tool (PythonAstREPLTool)
    try:
        tree = ast.parse(output)
        module = ast.Module(tree.body[:-1], type_ignores=[])
        safe_exec(ast.unparse(module), {}, local_vars)  # type: ignore
        module_end = ast.Module(tree.body[-1:], type_ignores=[])
        module_end_str = ast.unparse(module_end)  # type: ignore
        if module_end_str.strip("'\"") != module_end_str:
            # if there's leading/trailing quotes, then we need to eval
            # string to get the actual expression
            module_end_str = safe_eval(module_end_str, {"np": np}, local_vars)
        try:
            # str(pd.dataframe) will truncate output by display.max_colwidth
            # set width temporarily to extract more text
            if "max_colwidth" in output_kwargs:
                pd.set_option("display.max_colwidth", output_kwargs["max_colwidth"])
            output_str = str(safe_eval(module_end_str, {"np": np}, local_vars))
            pd.reset_option("display.max_colwidth")
            return output_str

        except Exception:
            raise
    except Exception as e:
        err_string = (
            "There was an error running the output as Python code. "
            f"Error message: {e}"
        )
        traceback.print_exc()
        return err_string


class PandasInstructionParser(ChainableOutputParser):
    """Pandas instruction parser.

    This 'output parser' takes in pandas instructions (in Python code) and
    executes them to return an output.

    """

    def __init__(
        self, df: pd.DataFrame, output_kwargs: Optional[Dict[str, Any]] = None
    ) -> None:
        """Initialize params."""
        self.df = df
        self.output_kwargs = output_kwargs or {}

    def parse(self, output: str) -> Any:
        """Parse, validate, and correct errors programmatically."""
        return default_output_processor(output, self.df, **self.output_kwargs)