87 lines
2.9 KiB
Python
87 lines
2.9 KiB
Python
"""Pandas output parser."""
|
|
|
|
import logging
|
|
from typing import Any, Dict, Optional
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
from llama_index.exec_utils import safe_eval, safe_exec
|
|
from llama_index.output_parsers.base import ChainableOutputParser
|
|
from llama_index.output_parsers.utils import parse_code_markdown
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def default_output_processor(
|
|
output: str, df: pd.DataFrame, **output_kwargs: Any
|
|
) -> str:
|
|
"""Process outputs in a default manner."""
|
|
import ast
|
|
import sys
|
|
import traceback
|
|
|
|
if sys.version_info < (3, 9):
|
|
logger.warning(
|
|
"Python version must be >= 3.9 in order to use "
|
|
"the default output processor, which executes "
|
|
"the Python query. Instead, we will return the "
|
|
"raw Python instructions as a string."
|
|
)
|
|
return output
|
|
|
|
local_vars = {"df": df}
|
|
|
|
output = parse_code_markdown(output, only_last=True)[0]
|
|
|
|
# NOTE: inspired from langchain's tool
|
|
# see langchain.tools.python.tool (PythonAstREPLTool)
|
|
try:
|
|
tree = ast.parse(output)
|
|
module = ast.Module(tree.body[:-1], type_ignores=[])
|
|
safe_exec(ast.unparse(module), {}, local_vars) # type: ignore
|
|
module_end = ast.Module(tree.body[-1:], type_ignores=[])
|
|
module_end_str = ast.unparse(module_end) # type: ignore
|
|
if module_end_str.strip("'\"") != module_end_str:
|
|
# if there's leading/trailing quotes, then we need to eval
|
|
# string to get the actual expression
|
|
module_end_str = safe_eval(module_end_str, {"np": np}, local_vars)
|
|
try:
|
|
# str(pd.dataframe) will truncate output by display.max_colwidth
|
|
# set width temporarily to extract more text
|
|
if "max_colwidth" in output_kwargs:
|
|
pd.set_option("display.max_colwidth", output_kwargs["max_colwidth"])
|
|
output_str = str(safe_eval(module_end_str, {"np": np}, local_vars))
|
|
pd.reset_option("display.max_colwidth")
|
|
return output_str
|
|
|
|
except Exception:
|
|
raise
|
|
except Exception as e:
|
|
err_string = (
|
|
"There was an error running the output as Python code. "
|
|
f"Error message: {e}"
|
|
)
|
|
traceback.print_exc()
|
|
return err_string
|
|
|
|
|
|
class PandasInstructionParser(ChainableOutputParser):
|
|
"""Pandas instruction parser.
|
|
|
|
This 'output parser' takes in pandas instructions (in Python code) and
|
|
executes them to return an output.
|
|
|
|
"""
|
|
|
|
def __init__(
|
|
self, df: pd.DataFrame, output_kwargs: Optional[Dict[str, Any]] = None
|
|
) -> None:
|
|
"""Initialize params."""
|
|
self.df = df
|
|
self.output_kwargs = output_kwargs or {}
|
|
|
|
def parse(self, output: str) -> Any:
|
|
"""Parse, validate, and correct errors programmatically."""
|
|
return default_output_processor(output, self.df, **self.output_kwargs)
|