faiss_rag_enterprise/llama_index/query_engine/pandas/output_parser.py

87 lines
2.9 KiB
Python

"""Pandas output parser."""
import logging
from typing import Any, Dict, Optional
import numpy as np
import pandas as pd
from llama_index.exec_utils import safe_eval, safe_exec
from llama_index.output_parsers.base import ChainableOutputParser
from llama_index.output_parsers.utils import parse_code_markdown
logger = logging.getLogger(__name__)
def default_output_processor(
output: str, df: pd.DataFrame, **output_kwargs: Any
) -> str:
"""Process outputs in a default manner."""
import ast
import sys
import traceback
if sys.version_info < (3, 9):
logger.warning(
"Python version must be >= 3.9 in order to use "
"the default output processor, which executes "
"the Python query. Instead, we will return the "
"raw Python instructions as a string."
)
return output
local_vars = {"df": df}
output = parse_code_markdown(output, only_last=True)[0]
# NOTE: inspired from langchain's tool
# see langchain.tools.python.tool (PythonAstREPLTool)
try:
tree = ast.parse(output)
module = ast.Module(tree.body[:-1], type_ignores=[])
safe_exec(ast.unparse(module), {}, local_vars) # type: ignore
module_end = ast.Module(tree.body[-1:], type_ignores=[])
module_end_str = ast.unparse(module_end) # type: ignore
if module_end_str.strip("'\"") != module_end_str:
# if there's leading/trailing quotes, then we need to eval
# string to get the actual expression
module_end_str = safe_eval(module_end_str, {"np": np}, local_vars)
try:
# str(pd.dataframe) will truncate output by display.max_colwidth
# set width temporarily to extract more text
if "max_colwidth" in output_kwargs:
pd.set_option("display.max_colwidth", output_kwargs["max_colwidth"])
output_str = str(safe_eval(module_end_str, {"np": np}, local_vars))
pd.reset_option("display.max_colwidth")
return output_str
except Exception:
raise
except Exception as e:
err_string = (
"There was an error running the output as Python code. "
f"Error message: {e}"
)
traceback.print_exc()
return err_string
class PandasInstructionParser(ChainableOutputParser):
"""Pandas instruction parser.
This 'output parser' takes in pandas instructions (in Python code) and
executes them to return an output.
"""
def __init__(
self, df: pd.DataFrame, output_kwargs: Optional[Dict[str, Any]] = None
) -> None:
"""Initialize params."""
self.df = df
self.output_kwargs = output_kwargs or {}
def parse(self, output: str) -> Any:
"""Parse, validate, and correct errors programmatically."""
return default_output_processor(output, self.df, **self.output_kwargs)