mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-29 01:48:57 +00:00
Added support for a Pandas DataFrame OutputParser (#13257)
**Description:** Added support for a Pandas DataFrame OutputParser with format instructions, along with unit tests and a demo notebook. Namely, we've added the ability to request data from a DataFrame, have the LLM parse the request, and then use that request to retrieve a well-formatted response. Within LangChain, it seamlessly integrates with language models like OpenAI's `text-davinci-003`, facilitating streamlined interaction using the format instructions (just like the other output parsers). This parser structures its requests as `<operation/column/row>[<optional_array_params>]`. The instructions detail permissible operations, valid columns, and array formats, ensuring clarity and adherence to the required format. For example: - When the LLM receives the input: "Retrieve the mean of `num_legs` from rows 1 to 3." - The provided format instructions guide the LLM to structure the request as: "mean:num_legs[1..3]". The parser processes this formatted request, leveraging the LLM's understanding to extract the mean of `num_legs` from rows 1 to 3 within the Pandas DataFrame. This integration allows users to communicate requests naturally, with the LLM transforming these instructions into structured commands understood by the `PandasDataFrameOutputParser`. The format instructions act as a bridge between natural language queries and precise DataFrame operations, optimizing communication and data retrieval. **Issue:** - https://github.com/langchain-ai/langchain/issues/11532 **Dependencies:** No additional dependencies :) **Tag maintainer:** @baskaryan **Twitter handle:** No need. :) --------- Co-authored-by: Wasee Alam <waseealam@protonmail.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
parent
235bdb9fa7
commit
41a4c06a94
229
docs/docs/modules/model_io/output_parsers/pandas_dataframe.ipynb
Normal file
229
docs/docs/modules/model_io/output_parsers/pandas_dataframe.ipynb
Normal file
@ -0,0 +1,229 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Pandas DataFrame Parser\n",
|
||||
"\n",
|
||||
"A Pandas DataFrame is a popular data structure in the Python programming language, commonly used for data manipulation and analysis. It provides a comprehensive set of tools for working with structured data, making it a versatile option for tasks such as data cleaning, transformation, and analysis.\n",
|
||||
"\n",
|
||||
"This output parser allows users to specify an arbitrary Pandas DataFrame and query LLMs for data in the form of a formatted dictionary that extracts data from the corresponding DataFrame. Keep in mind that large language models are leaky abstractions! You'll have to use an LLM with sufficient capacity to generate a well-formed query as per the defined format instructions.\n",
|
||||
"\n",
|
||||
"Use Pandas' DataFrame object to declare the DataFrame you wish to perform queries on."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pprint\n",
|
||||
"from typing import Any, Dict\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.output_parsers import PandasDataFrameOutputParser\n",
|
||||
"from langchain.prompts import PromptTemplate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_name = \"text-davinci-003\"\n",
|
||||
"temperature = 0.5\n",
|
||||
"model = OpenAI(model_name=model_name, temperature=temperature)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Solely for documentation purposes.\n",
|
||||
"def format_parser_output(parser_output: Dict[str, Any]) -> None:\n",
|
||||
" for key in parser_output.keys():\n",
|
||||
" parser_output[key] = parser_output[key].to_dict()\n",
|
||||
" return pprint.PrettyPrinter(width=4, compact=True).pprint(parser_output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define your desired Pandas DataFrame.\n",
|
||||
"df = pd.DataFrame(\n",
|
||||
" {\n",
|
||||
" \"num_legs\": [2, 4, 8, 0],\n",
|
||||
" \"num_wings\": [2, 0, 0, 0],\n",
|
||||
" \"num_specimen_seen\": [10, 2, 1, 8],\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Set up a parser + inject instructions into the prompt template.\n",
|
||||
"parser = PandasDataFrameOutputParser(dataframe=df)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"LLM Output: column:num_wings\n",
|
||||
"{'num_wings': {0: 2,\n",
|
||||
" 1: 0,\n",
|
||||
" 2: 0,\n",
|
||||
" 3: 0}}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Here's an example of a column operation being performed.\n",
|
||||
"df_query = \"Retrieve the num_wings column.\"\n",
|
||||
"\n",
|
||||
"# Set up the prompt.\n",
|
||||
"prompt = PromptTemplate(\n",
|
||||
" template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n",
|
||||
" input_variables=[\"query\"],\n",
|
||||
" partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"_input = prompt.format_prompt(query=df_query)\n",
|
||||
"output = model(_input.to_string())\n",
|
||||
"print(\"LLM Output:\", output)\n",
|
||||
"parser_output = parser.parse(output)\n",
|
||||
"\n",
|
||||
"format_parser_output(parser_output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"LLM Output: row:1\n",
|
||||
"{'1': {'num_legs': 4,\n",
|
||||
" 'num_specimen_seen': 2,\n",
|
||||
" 'num_wings': 0}}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Here's an example of a row operation being performed.\n",
|
||||
"df_query = \"Retrieve the first row.\"\n",
|
||||
"\n",
|
||||
"# Set up the prompt.\n",
|
||||
"prompt = PromptTemplate(\n",
|
||||
" template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n",
|
||||
" input_variables=[\"query\"],\n",
|
||||
" partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"_input = prompt.format_prompt(query=df_query)\n",
|
||||
"output = model(_input.to_string())\n",
|
||||
"print(\"LLM Output:\", output)\n",
|
||||
"parser_output = parser.parse(output)\n",
|
||||
"\n",
|
||||
"format_parser_output(parser_output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"LLM Output: mean:num_legs[1..3]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'mean': 4.0}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Here's an example of a random Pandas DataFrame operation limiting the number of rows\n",
|
||||
"df_query = \"Retrieve the average of the num_legs column from rows 1 to 3.\"\n",
|
||||
"\n",
|
||||
"# Set up the prompt.\n",
|
||||
"prompt = PromptTemplate(\n",
|
||||
" template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n",
|
||||
" input_variables=[\"query\"],\n",
|
||||
" partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"_input = prompt.format_prompt(query=df_query)\n",
|
||||
"output = model(_input.to_string())\n",
|
||||
"print(\"LLM Output:\", output)\n",
|
||||
"parser.parse(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Here's an example of a poorly formatted query\n",
|
||||
"df_query = \"Retrieve the mean of the num_fingers column.\"\n",
|
||||
"\n",
|
||||
"# Set up the prompt.\n",
|
||||
"prompt = PromptTemplate(\n",
|
||||
" template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n",
|
||||
" input_variables=[\"query\"],\n",
|
||||
" partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"_input = prompt.format_prompt(query=df_query)\n",
|
||||
"output = model(_input.to_string()) # Expected Output: \"Invalid column: num_fingers\".\n",
|
||||
"print(\"LLM Output:\", output)\n",
|
||||
"parser.parse(output) # Expected Output: Will raise an OutputParserException."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -28,6 +28,7 @@ from langchain.output_parsers.openai_tools import (
|
||||
JsonOutputToolsParser,
|
||||
PydanticToolsParser,
|
||||
)
|
||||
from langchain.output_parsers.pandas_dataframe import PandasDataFrameOutputParser
|
||||
from langchain.output_parsers.pydantic import PydanticOutputParser
|
||||
from langchain.output_parsers.rail_parser import GuardrailsOutputParser
|
||||
from langchain.output_parsers.regex import RegexParser
|
||||
@ -47,6 +48,7 @@ __all__ = [
|
||||
"MarkdownListOutputParser",
|
||||
"NumberedListOutputParser",
|
||||
"OutputFixingParser",
|
||||
"PandasDataFrameOutputParser",
|
||||
"PydanticOutputParser",
|
||||
"RegexDictParser",
|
||||
"RegexParser",
|
||||
|
@ -41,3 +41,25 @@ Here are the output tags:
|
||||
```
|
||||
{tags}
|
||||
```"""
|
||||
|
||||
|
||||
PANDAS_DATAFRAME_FORMAT_INSTRUCTIONS = """The output should be formatted as a string as the operation, followed by a colon, followed by the column or row to be queried on, followed by optional array parameters.
|
||||
1. The column names are limited to the possible columns below.
|
||||
2. Arrays must either be a comma-seperated list of numbers formatted as [1,3,5], or it must be in range of numbers formatted as [0..4].
|
||||
3. Remember that arrays are optional and not necessarily required.
|
||||
4. If the column is not in the possible columns or the operation is not a valid Pandas DataFrame operation, return why it is invalid as a sentence starting with either "Invalid column" or "Invalid operation".
|
||||
|
||||
As an example, for the formats:
|
||||
1. String "column:num_legs" is a well-formatted instance which gets the column num_legs, where num_legs is a possible column.
|
||||
2. String "row:1" is a well-formatted instance which gets row 1.
|
||||
3. String "column:num_legs[1,2]" is a well-formatted instance which gets the column num_legs for rows 1 and 2, where num_legs is a possible column.
|
||||
4. String "row:1[num_legs]" is a well-formatted instance which gets row 1, but for just column num_legs, where num_legs is a possible column.
|
||||
5. String "mean:num_legs[1..3]" is a well-formatted instance which takes the mean of num_legs from rows 1 to 3, where num_legs is a possible column and mean is a valid Pandas DataFrame operation.
|
||||
6. String "do_something:num_legs" is a badly-formatted instance, where do_something is not a valid Pandas DataFrame operation.
|
||||
7. String "mean:invalid_col" is a badly-formatted instance, where invalid_col is not a possible column.
|
||||
|
||||
Here are the possible columns:
|
||||
```
|
||||
{columns}
|
||||
```
|
||||
"""
|
||||
|
157
libs/langchain/langchain/output_parsers/pandas_dataframe.py
Normal file
157
libs/langchain/langchain/output_parsers/pandas_dataframe.py
Normal file
@ -0,0 +1,157 @@
|
||||
import re
|
||||
from typing import Any, Dict, List, Tuple, Union
|
||||
|
||||
from langchain.output_parsers.format_instructions import (
|
||||
PANDAS_DATAFRAME_FORMAT_INSTRUCTIONS,
|
||||
)
|
||||
from langchain.pydantic_v1 import validator
|
||||
from langchain.schema import BaseOutputParser, OutputParserException
|
||||
|
||||
|
||||
class PandasDataFrameOutputParser(BaseOutputParser):
|
||||
"""Parse an output using Pandas DataFrame format."""
|
||||
|
||||
"""The Pandas DataFrame to parse."""
|
||||
dataframe: Any
|
||||
|
||||
@validator("dataframe")
|
||||
def validate_dataframe(cls, val: Any) -> Any:
|
||||
import pandas as pd
|
||||
|
||||
if issubclass(type(val), pd.DataFrame):
|
||||
return val
|
||||
if pd.DataFrame(val).empty:
|
||||
raise ValueError("DataFrame cannot be empty.")
|
||||
|
||||
raise TypeError(
|
||||
"Wrong type for 'dataframe', must be a subclass \
|
||||
of Pandas DataFrame (pd.DataFrame)"
|
||||
)
|
||||
|
||||
def parse_array(
|
||||
self, array: str, original_request_params: str
|
||||
) -> Tuple[List[Union[int, str]], str]:
|
||||
parsed_array: List[Union[int, str]] = []
|
||||
|
||||
# Check if the format is [1,3,5]
|
||||
if re.match(r"\[\d+(,\s*\d+)*\]", array):
|
||||
parsed_array = [int(i) for i in re.findall(r"\d+", array)]
|
||||
# Check if the format is [1..5]
|
||||
elif re.match(r"\[(\d+)\.\.(\d+)\]", array):
|
||||
match = re.match(r"\[(\d+)\.\.(\d+)\]", array)
|
||||
if match:
|
||||
start, end = map(int, match.groups())
|
||||
parsed_array = list(range(start, end + 1))
|
||||
else:
|
||||
raise OutputParserException(
|
||||
f"Unable to parse the array provided in {array}. \
|
||||
Please check the format instructions."
|
||||
)
|
||||
# Check if the format is ["column_name"]
|
||||
elif re.match(r"\[[a-zA-Z0-9_]+(?:,[a-zA-Z0-9_]+)*\]", array):
|
||||
match = re.match(r"\[[a-zA-Z0-9_]+(?:,[a-zA-Z0-9_]+)*\]", array)
|
||||
if match:
|
||||
parsed_array = list(map(str, match.group().strip("[]").split(",")))
|
||||
else:
|
||||
raise OutputParserException(
|
||||
f"Unable to parse the array provided in {array}. \
|
||||
Please check the format instructions."
|
||||
)
|
||||
|
||||
# Validate the array
|
||||
if not parsed_array:
|
||||
raise OutputParserException(
|
||||
f"Invalid array format in '{original_request_params}'. \
|
||||
Please check the format instructions."
|
||||
)
|
||||
elif (
|
||||
isinstance(parsed_array[0], int)
|
||||
and parsed_array[-1] > self.dataframe.index.max()
|
||||
):
|
||||
raise OutputParserException(
|
||||
f"The maximum index {parsed_array[-1]} exceeds the maximum index of \
|
||||
the Pandas DataFrame {self.dataframe.index.max()}."
|
||||
)
|
||||
|
||||
return parsed_array, original_request_params.split("[")[0]
|
||||
|
||||
def parse(self, request: str) -> Dict[str, Any]:
|
||||
stripped_request_params = None
|
||||
splitted_request = request.strip().split(":")
|
||||
if len(splitted_request) != 2:
|
||||
raise OutputParserException(
|
||||
f"Request '{request}' is not correctly formatted. \
|
||||
Please refer to the format instructions."
|
||||
)
|
||||
result = {}
|
||||
try:
|
||||
request_type, request_params = splitted_request
|
||||
if request_type in {"Invalid column", "Invalid operation"}:
|
||||
raise OutputParserException(
|
||||
f"{request}. Please check the format instructions."
|
||||
)
|
||||
array_exists = re.search(r"(\[.*?\])", request_params)
|
||||
if array_exists:
|
||||
parsed_array, stripped_request_params = self.parse_array(
|
||||
array_exists.group(1), request_params
|
||||
)
|
||||
if request_type == "column":
|
||||
filtered_df = self.dataframe[
|
||||
self.dataframe.index.isin(parsed_array)
|
||||
]
|
||||
if len(parsed_array) == 1:
|
||||
result[stripped_request_params] = filtered_df[
|
||||
stripped_request_params
|
||||
].iloc[parsed_array[0]]
|
||||
else:
|
||||
result[stripped_request_params] = filtered_df[
|
||||
stripped_request_params
|
||||
]
|
||||
elif request_type == "row":
|
||||
filtered_df = self.dataframe[
|
||||
self.dataframe.columns.intersection(parsed_array)
|
||||
]
|
||||
if len(parsed_array) == 1:
|
||||
result[stripped_request_params] = filtered_df.iloc[
|
||||
int(stripped_request_params)
|
||||
][parsed_array[0]]
|
||||
else:
|
||||
result[stripped_request_params] = filtered_df.iloc[
|
||||
int(stripped_request_params)
|
||||
]
|
||||
else:
|
||||
filtered_df = self.dataframe[
|
||||
self.dataframe.index.isin(parsed_array)
|
||||
]
|
||||
result[request_type] = getattr(
|
||||
filtered_df[stripped_request_params], request_type
|
||||
)()
|
||||
else:
|
||||
if request_type == "column":
|
||||
result[request_params] = self.dataframe[request_params]
|
||||
elif request_type == "row":
|
||||
result[request_params] = self.dataframe.iloc[int(request_params)]
|
||||
else:
|
||||
result[request_type] = getattr(
|
||||
self.dataframe[request_params], request_type
|
||||
)()
|
||||
except (AttributeError, IndexError, KeyError):
|
||||
if request_type not in {"column", "row"}:
|
||||
raise OutputParserException(
|
||||
f"Unsupported request type '{request_type}'. \
|
||||
Please check the format instructions."
|
||||
)
|
||||
raise OutputParserException(
|
||||
f"""Requested index {
|
||||
request_params
|
||||
if stripped_request_params is None
|
||||
else stripped_request_params
|
||||
} is out of bounds."""
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def get_format_instructions(self) -> str:
|
||||
return PANDAS_DATAFRAME_FORMAT_INSTRUCTIONS.format(
|
||||
columns=", ".join(self.dataframe.columns)
|
||||
)
|
@ -11,6 +11,7 @@ EXPECTED_ALL = [
|
||||
"MarkdownListOutputParser",
|
||||
"NumberedListOutputParser",
|
||||
"OutputFixingParser",
|
||||
"PandasDataFrameOutputParser",
|
||||
"PydanticOutputParser",
|
||||
"RegexDictParser",
|
||||
"RegexParser",
|
||||
|
@ -0,0 +1,110 @@
|
||||
"""Test PandasDataframeParser"""
|
||||
import pandas as pd
|
||||
|
||||
from langchain.output_parsers.pandas_dataframe import PandasDataFrameOutputParser
|
||||
from langchain.schema import OutputParserException
|
||||
|
||||
df = pd.DataFrame(
|
||||
{"chicken": [1, 2, 3, 4], "veggies": [5, 4, 3, 2], "steak": [9, 8, 7, 6]}
|
||||
)
|
||||
|
||||
parser = PandasDataFrameOutputParser(dataframe=df)
|
||||
|
||||
|
||||
# Test Invalid Column
|
||||
def test_pandas_output_parser_col_no_array() -> None:
|
||||
try:
|
||||
parser.parse("column:num_legs")
|
||||
assert False, "Should have raised OutputParserException"
|
||||
except OutputParserException:
|
||||
assert True
|
||||
|
||||
|
||||
# Test Column with invalid array (above DataFrame max index)
|
||||
def test_pandas_output_parser_col_oob() -> None:
|
||||
try:
|
||||
parser.parse("row:10")
|
||||
assert False, "Should have raised OutputParserException"
|
||||
except OutputParserException:
|
||||
assert True
|
||||
|
||||
|
||||
# Test Column with array [x]
|
||||
def test_pandas_output_parser_col_first_elem() -> None:
|
||||
expected_output = {"chicken": 1}
|
||||
actual_output = parser.parse("column:chicken[0]")
|
||||
assert actual_output == expected_output
|
||||
|
||||
|
||||
# Test Column with array [x,y,z]
|
||||
def test_pandas_output_parser_col_multi_elem() -> None:
|
||||
expected_output = {"chicken": pd.Series([1, 2], name="chicken", dtype="int64")}
|
||||
actual_output = parser.parse("column:chicken[0, 1]")
|
||||
for key in actual_output.keys():
|
||||
assert expected_output["chicken"].equals(actual_output[key])
|
||||
|
||||
|
||||
# Test Row with invalid row entry
|
||||
def test_pandas_output_parser_row_no_array() -> None:
|
||||
try:
|
||||
parser.parse("row:5")
|
||||
assert False, "Should have raised OutputParserException"
|
||||
except OutputParserException:
|
||||
assert True
|
||||
|
||||
|
||||
# Test Row with valid row entry
|
||||
def test_pandas_output_parser_row_first() -> None:
|
||||
expected_output = {"1": pd.Series({"chicken": 2, "veggies": 4, "steak": 8})}
|
||||
actual_output = parser.parse("row:1")
|
||||
assert actual_output["1"].equals(expected_output["1"])
|
||||
|
||||
|
||||
# Test Row with invalid col entry
|
||||
def test_pandas_output_parser_row_no_column() -> None:
|
||||
try:
|
||||
parser.parse("row:1[num_legs]")
|
||||
assert False, "Should have raised OutputParserException"
|
||||
except OutputParserException:
|
||||
assert True
|
||||
|
||||
|
||||
# Test Row with valid col entry
|
||||
def test_pandas_output_parser_row_col_1() -> None:
|
||||
expected_output = {"1": 2}
|
||||
actual_output = parser.parse("row:1[chicken]")
|
||||
assert actual_output == expected_output
|
||||
|
||||
|
||||
def test_pandas_output_parser_special_ops() -> None:
|
||||
actual_output = [
|
||||
{"mean": 3.0},
|
||||
{"median": 3.0},
|
||||
{"min": 2},
|
||||
{"max": 4},
|
||||
{"var": 1.0},
|
||||
{"std": 1.0},
|
||||
{"count": 3},
|
||||
{"quantile": 3.0},
|
||||
]
|
||||
|
||||
expected_output = [
|
||||
parser.parse("mean:chicken[1..3]"),
|
||||
parser.parse("median:chicken[1..3]"),
|
||||
parser.parse("min:chicken[1..3]"),
|
||||
parser.parse("max:chicken[1..3]"),
|
||||
parser.parse("var:chicken[1..3]"),
|
||||
parser.parse("std:chicken[1..3]"),
|
||||
parser.parse("count:chicken[1..3]"),
|
||||
parser.parse("quantile:chicken[1..3]"),
|
||||
]
|
||||
|
||||
assert actual_output == expected_output
|
||||
|
||||
|
||||
def test_pandas_output_parser_invalid_special_op() -> None:
|
||||
try:
|
||||
parser.parse("riemann_sum:chicken")
|
||||
assert False, "Should have raised OutputParserException"
|
||||
except OutputParserException:
|
||||
assert True
|
Loading…
Reference in New Issue
Block a user