Compare commits

...

2 Commits

Author SHA1 Message Date
vowelparrot
6dc02a63fd Add testing 2023-04-30 15:36:08 -07:00
Sami Liedes
98444779c5 Pandas agent: Pass callback manager to Python tool. (#3646)
The callback manager was not passed to the Python tool, making it hard
to observe some intermediate steps.

NOTE: The mypy hints seem to suggest that the tool does not accept None
as an input—but I didn't check the code—so I use get_callback_manager()
to get the real default callback manager in case None is passed to the
Pandas tool.

Co-authored-by: Sami Liedes <sami.liedes@rocket-science.ch>
2023-04-30 14:13:03 -07:00
7 changed files with 463 additions and 45 deletions

View File

@@ -14,11 +14,26 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": null,
"id": "0cdd9bf5", "id": "65a8ebbe-cd83-401a-8ca7-cb6541121d0c",
"metadata": {}, "metadata": {
"tags": []
},
"outputs": [], "outputs": [],
"source": [ "source": [
"!pip install \"pandas\" > /dev/null"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "0cdd9bf5",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.callbacks import tracing_enabled\n",
"from langchain.agents import create_pandas_dataframe_agent" "from langchain.agents import create_pandas_dataframe_agent"
] ]
}, },
@@ -26,7 +41,9 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 2,
"id": "051ebe84", "id": "051ebe84",
"metadata": {}, "metadata": {
"tags": []
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.llms import OpenAI\n", "from langchain.llms import OpenAI\n",
@@ -37,17 +54,19 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 7,
"id": "4185ff46", "id": "4185ff46",
"metadata": {}, "metadata": {
"tags": []
},
"outputs": [], "outputs": [],
"source": [ "source": [
"agent = create_pandas_dataframe_agent(OpenAI(temperature=0), df, verbose=True)" "agent = create_pandas_dataframe_agent(OpenAI(temperature=0), df, verbose=False)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 9,
"id": "a9207a2e", "id": "a9207a2e",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@@ -55,32 +74,17 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"\n", "Error in on_llm_start callback: Parent run with UUID 1b224cee-9715-4f0b-ba33-527468c2cebb not found.\n",
"\n", "Error in on_llm_end callback: No LLMRun found to be traced\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", "Error in on_llm_start callback: Parent run with UUID 51658270-136c-4763-9c8a-49db12e2efc9 not found.\n",
"\u001b[32;1m\u001b[1;3mThought: I need to count the number of rows\n", "Error in on_llm_end callback: No LLMRun found to be traced\n",
"Action: python_repl_ast\n", "There are 891 rows.\n"
"Action Input: len(df)\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m891\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: There are 891 rows in the dataframe.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
] ]
},
{
"data": {
"text/plain": [
"'There are 891 rows in the dataframe.'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
} }
], ],
"source": [ "source": [
"agent.run(\"how many rows are there?\")" "with tracing_enabled():\n",
" print(agent.run(\"how many rows are there?\"))"
] ]
}, },
{ {
@@ -96,11 +100,91 @@
"\n", "\n",
"\n", "\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\n",
"\n",
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3m\n",
"You are working with a pandas dataframe in Python. The name of the dataframe is `df`.\n",
"You should use the tools below to answer the question posed of you:\n",
"\n",
"python_repl_ast: A Python shell. Use this to execute python commands. Input should be a valid python command. When using this tool, sometimes output is abbreviated - make sure it does not look abbreviated before using it in your answer.\n",
"\n",
"Use the following format:\n",
"\n",
"Question: the input question you must answer\n",
"Thought: you should always think about what to do\n",
"Action: the action to take, should be one of [python_repl_ast]\n",
"Action Input: the input to the action\n",
"Observation: the result of the action\n",
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
"Thought: I now know the final answer\n",
"Final Answer: the final answer to the original input question\n",
"\n",
"\n",
"This is the result of `print(df.head())`:\n",
"| | PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked |\n",
"|---:|--------------:|-----------:|---------:|:----------------------------------------------------|:-------|------:|--------:|--------:|:-----------------|--------:|:--------|:-----------|\n",
"| 0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22 | 1 | 0 | A/5 21171 | 7.25 | nan | S |\n",
"| 1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Thayer) | female | 38 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |\n",
"| 2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26 | 0 | 0 | STON/O2. 3101282 | 7.925 | nan | S |\n",
"| 3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35 | 1 | 0 | 113803 | 53.1 | C123 | S |\n",
"| 4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35 | 0 | 0 | 373450 | 8.05 | nan | S |\n",
"\n",
"Begin!\n",
"Question: how many people have more than 3 sibligngs\n",
"\u001b[0m\n",
"Error in on_llm_start callback: Parent run with UUID 6b86a7f0-bdf6-43e8-b94d-937c743b60f5 not found.\n",
"Error in on_llm_end callback: No LLMRun found to be traced\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mThought: I need to count the number of people with more than 3 siblings\n", "\u001b[32;1m\u001b[1;3mThought: I need to count the number of people with more than 3 siblings\n",
"Action: python_repl_ast\n", "Action: python_repl_ast\n",
"Action Input: df[df['SibSp'] > 3].shape[0]\u001b[0m\n", "Action Input: df[df['SibSp'] > 3].shape[0]\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m30\u001b[0m\n", "Observation: \u001b[36;1m\u001b[1;3m30\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", "Thought:\n",
"\n",
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3m\n",
"You are working with a pandas dataframe in Python. The name of the dataframe is `df`.\n",
"You should use the tools below to answer the question posed of you:\n",
"\n",
"python_repl_ast: A Python shell. Use this to execute python commands. Input should be a valid python command. When using this tool, sometimes output is abbreviated - make sure it does not look abbreviated before using it in your answer.\n",
"\n",
"Use the following format:\n",
"\n",
"Question: the input question you must answer\n",
"Thought: you should always think about what to do\n",
"Action: the action to take, should be one of [python_repl_ast]\n",
"Action Input: the input to the action\n",
"Observation: the result of the action\n",
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
"Thought: I now know the final answer\n",
"Final Answer: the final answer to the original input question\n",
"\n",
"\n",
"This is the result of `print(df.head())`:\n",
"| | PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked |\n",
"|---:|--------------:|-----------:|---------:|:----------------------------------------------------|:-------|------:|--------:|--------:|:-----------------|--------:|:--------|:-----------|\n",
"| 0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22 | 1 | 0 | A/5 21171 | 7.25 | nan | S |\n",
"| 1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Thayer) | female | 38 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |\n",
"| 2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26 | 0 | 0 | STON/O2. 3101282 | 7.925 | nan | S |\n",
"| 3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35 | 1 | 0 | 113803 | 53.1 | C123 | S |\n",
"| 4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35 | 0 | 0 | 373450 | 8.05 | nan | S |\n",
"\n",
"Begin!\n",
"Question: how many people have more than 3 sibligngs\n",
"Thought: I need to count the number of people with more than 3 siblings\n",
"Action: python_repl_ast\n",
"Action Input: df[df['SibSp'] > 3].shape[0]\n",
"Observation: 30\n",
"Thought:\u001b[0m\n",
"Error in on_llm_start callback: Parent run with UUID 73a825e3-1270-4735-b40b-208a0915e8be not found.\n",
"Error in on_llm_end callback: No LLMRun found to be traced\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: 30 people have more than 3 siblings.\u001b[0m\n", "Final Answer: 30 people have more than 3 siblings.\u001b[0m\n",
"\n", "\n",
"\u001b[1m> Finished chain.\u001b[0m\n" "\u001b[1m> Finished chain.\u001b[0m\n"
@@ -134,23 +218,253 @@
"\n", "\n",
"\n", "\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\n",
"\n",
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3m\n",
"You are working with a pandas dataframe in Python. The name of the dataframe is `df`.\n",
"You should use the tools below to answer the question posed of you:\n",
"\n",
"python_repl_ast: A Python shell. Use this to execute python commands. Input should be a valid python command. When using this tool, sometimes output is abbreviated - make sure it does not look abbreviated before using it in your answer.\n",
"\n",
"Use the following format:\n",
"\n",
"Question: the input question you must answer\n",
"Thought: you should always think about what to do\n",
"Action: the action to take, should be one of [python_repl_ast]\n",
"Action Input: the input to the action\n",
"Observation: the result of the action\n",
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
"Thought: I now know the final answer\n",
"Final Answer: the final answer to the original input question\n",
"\n",
"\n",
"This is the result of `print(df.head())`:\n",
"| | PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked |\n",
"|---:|--------------:|-----------:|---------:|:----------------------------------------------------|:-------|------:|--------:|--------:|:-----------------|--------:|:--------|:-----------|\n",
"| 0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22 | 1 | 0 | A/5 21171 | 7.25 | nan | S |\n",
"| 1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Thayer) | female | 38 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |\n",
"| 2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26 | 0 | 0 | STON/O2. 3101282 | 7.925 | nan | S |\n",
"| 3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35 | 1 | 0 | 113803 | 53.1 | C123 | S |\n",
"| 4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35 | 0 | 0 | 373450 | 8.05 | nan | S |\n",
"\n",
"Begin!\n",
"Question: whats the square root of the average age?\n",
"\u001b[0m\n",
"Error in on_llm_start callback: Parent run with UUID 8d771bc6-208a-4580-b43d-ec92ca04e39f not found.\n",
"Error in on_llm_end callback: No LLMRun found to be traced\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mThought: I need to calculate the average age first\n", "\u001b[32;1m\u001b[1;3mThought: I need to calculate the average age first\n",
"Action: python_repl_ast\n", "Action: python_repl_ast\n",
"Action Input: df['Age'].mean()\u001b[0m\n", "Action Input: df['Age'].mean()\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m29.69911764705882\u001b[0m\n", "Observation: \u001b[36;1m\u001b[1;3m29.69911764705882\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I can now calculate the square root\n", "Thought:\n",
"\n",
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3m\n",
"You are working with a pandas dataframe in Python. The name of the dataframe is `df`.\n",
"You should use the tools below to answer the question posed of you:\n",
"\n",
"python_repl_ast: A Python shell. Use this to execute python commands. Input should be a valid python command. When using this tool, sometimes output is abbreviated - make sure it does not look abbreviated before using it in your answer.\n",
"\n",
"Use the following format:\n",
"\n",
"Question: the input question you must answer\n",
"Thought: you should always think about what to do\n",
"Action: the action to take, should be one of [python_repl_ast]\n",
"Action Input: the input to the action\n",
"Observation: the result of the action\n",
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
"Thought: I now know the final answer\n",
"Final Answer: the final answer to the original input question\n",
"\n",
"\n",
"This is the result of `print(df.head())`:\n",
"| | PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked |\n",
"|---:|--------------:|-----------:|---------:|:----------------------------------------------------|:-------|------:|--------:|--------:|:-----------------|--------:|:--------|:-----------|\n",
"| 0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22 | 1 | 0 | A/5 21171 | 7.25 | nan | S |\n",
"| 1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Thayer) | female | 38 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |\n",
"| 2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26 | 0 | 0 | STON/O2. 3101282 | 7.925 | nan | S |\n",
"| 3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35 | 1 | 0 | 113803 | 53.1 | C123 | S |\n",
"| 4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35 | 0 | 0 | 373450 | 8.05 | nan | S |\n",
"\n",
"Begin!\n",
"Question: whats the square root of the average age?\n",
"Thought: I need to calculate the average age first\n",
"Action: python_repl_ast\n",
"Action Input: df['Age'].mean()\n",
"Observation: 29.69911764705882\n",
"Thought:\u001b[0m\n",
"Error in on_llm_start callback: Parent run with UUID b30b9eac-eee3-406c-b7c3-a3db3b549948 not found.\n",
"Error in on_llm_end callback: No LLMRun found to be traced\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m I now need to calculate the square root of the average age\n",
"Action: python_repl_ast\n", "Action: python_repl_ast\n",
"Action Input: math.sqrt(df['Age'].mean())\u001b[0m\n", "Action Input: math.sqrt(df['Age'].mean())\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3mname 'math' is not defined\u001b[0m\n", "Observation: \u001b[36;1m\u001b[1;3mname 'math' is not defined\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I need to import the math library\n", "Thought:\n",
"\n",
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3m\n",
"You are working with a pandas dataframe in Python. The name of the dataframe is `df`.\n",
"You should use the tools below to answer the question posed of you:\n",
"\n",
"python_repl_ast: A Python shell. Use this to execute python commands. Input should be a valid python command. When using this tool, sometimes output is abbreviated - make sure it does not look abbreviated before using it in your answer.\n",
"\n",
"Use the following format:\n",
"\n",
"Question: the input question you must answer\n",
"Thought: you should always think about what to do\n",
"Action: the action to take, should be one of [python_repl_ast]\n",
"Action Input: the input to the action\n",
"Observation: the result of the action\n",
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
"Thought: I now know the final answer\n",
"Final Answer: the final answer to the original input question\n",
"\n",
"\n",
"This is the result of `print(df.head())`:\n",
"| | PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked |\n",
"|---:|--------------:|-----------:|---------:|:----------------------------------------------------|:-------|------:|--------:|--------:|:-----------------|--------:|:--------|:-----------|\n",
"| 0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22 | 1 | 0 | A/5 21171 | 7.25 | nan | S |\n",
"| 1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Thayer) | female | 38 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |\n",
"| 2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26 | 0 | 0 | STON/O2. 3101282 | 7.925 | nan | S |\n",
"| 3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35 | 1 | 0 | 113803 | 53.1 | C123 | S |\n",
"| 4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35 | 0 | 0 | 373450 | 8.05 | nan | S |\n",
"\n",
"Begin!\n",
"Question: whats the square root of the average age?\n",
"Thought: I need to calculate the average age first\n",
"Action: python_repl_ast\n",
"Action Input: df['Age'].mean()\n",
"Observation: 29.69911764705882\n",
"Thought: I now need to calculate the square root of the average age\n",
"Action: python_repl_ast\n",
"Action Input: math.sqrt(df['Age'].mean())\n",
"Observation: name 'math' is not defined\n",
"Thought:\u001b[0m\n",
"Error in on_llm_start callback: Parent run with UUID 5c463668-dfa6-45e5-856d-2da5c91516d8 not found.\n",
"Error in on_llm_end callback: No LLMRun found to be traced\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m I need to import the math library\n",
"Action: python_repl_ast\n", "Action: python_repl_ast\n",
"Action Input: import math\u001b[0m\n", "Action Input: import math\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3mNone\u001b[0m\n", "Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I can now calculate the square root\n", "Thought:\n",
"\n",
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3m\n",
"You are working with a pandas dataframe in Python. The name of the dataframe is `df`.\n",
"You should use the tools below to answer the question posed of you:\n",
"\n",
"python_repl_ast: A Python shell. Use this to execute python commands. Input should be a valid python command. When using this tool, sometimes output is abbreviated - make sure it does not look abbreviated before using it in your answer.\n",
"\n",
"Use the following format:\n",
"\n",
"Question: the input question you must answer\n",
"Thought: you should always think about what to do\n",
"Action: the action to take, should be one of [python_repl_ast]\n",
"Action Input: the input to the action\n",
"Observation: the result of the action\n",
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
"Thought: I now know the final answer\n",
"Final Answer: the final answer to the original input question\n",
"\n",
"\n",
"This is the result of `print(df.head())`:\n",
"| | PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked |\n",
"|---:|--------------:|-----------:|---------:|:----------------------------------------------------|:-------|------:|--------:|--------:|:-----------------|--------:|:--------|:-----------|\n",
"| 0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22 | 1 | 0 | A/5 21171 | 7.25 | nan | S |\n",
"| 1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Thayer) | female | 38 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |\n",
"| 2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26 | 0 | 0 | STON/O2. 3101282 | 7.925 | nan | S |\n",
"| 3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35 | 1 | 0 | 113803 | 53.1 | C123 | S |\n",
"| 4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35 | 0 | 0 | 373450 | 8.05 | nan | S |\n",
"\n",
"Begin!\n",
"Question: whats the square root of the average age?\n",
"Thought: I need to calculate the average age first\n",
"Action: python_repl_ast\n",
"Action Input: df['Age'].mean()\n",
"Observation: 29.69911764705882\n",
"Thought: I now need to calculate the square root of the average age\n",
"Action: python_repl_ast\n",
"Action Input: math.sqrt(df['Age'].mean())\n",
"Observation: name 'math' is not defined\n",
"Thought: I need to import the math library\n",
"Action: python_repl_ast\n",
"Action Input: import math\n",
"Observation: \n",
"Thought:\u001b[0m\n",
"Error in on_llm_start callback: Parent run with UUID 3e09fe75-cb84-4b8d-83a1-7037d8e12aab not found.\n",
"Error in on_llm_end callback: No LLMRun found to be traced\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m I now need to calculate the square root of the average age\n",
"Action: python_repl_ast\n", "Action: python_repl_ast\n",
"Action Input: math.sqrt(df['Age'].mean())\u001b[0m\n", "Action Input: math.sqrt(df['Age'].mean())\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m5.449689683556195\u001b[0m\n", "Observation: \u001b[36;1m\u001b[1;3m5.449689683556195\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", "Thought:\n",
"\n",
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3m\n",
"You are working with a pandas dataframe in Python. The name of the dataframe is `df`.\n",
"You should use the tools below to answer the question posed of you:\n",
"\n",
"python_repl_ast: A Python shell. Use this to execute python commands. Input should be a valid python command. When using this tool, sometimes output is abbreviated - make sure it does not look abbreviated before using it in your answer.\n",
"\n",
"Use the following format:\n",
"\n",
"Question: the input question you must answer\n",
"Thought: you should always think about what to do\n",
"Action: the action to take, should be one of [python_repl_ast]\n",
"Action Input: the input to the action\n",
"Observation: the result of the action\n",
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
"Thought: I now know the final answer\n",
"Final Answer: the final answer to the original input question\n",
"\n",
"\n",
"This is the result of `print(df.head())`:\n",
"| | PassengerId | Survived | Pclass | Name | Sex | Age | SibSp | Parch | Ticket | Fare | Cabin | Embarked |\n",
"|---:|--------------:|-----------:|---------:|:----------------------------------------------------|:-------|------:|--------:|--------:|:-----------------|--------:|:--------|:-----------|\n",
"| 0 | 1 | 0 | 3 | Braund, Mr. Owen Harris | male | 22 | 1 | 0 | A/5 21171 | 7.25 | nan | S |\n",
"| 1 | 2 | 1 | 1 | Cumings, Mrs. John Bradley (Florence Briggs Thayer) | female | 38 | 1 | 0 | PC 17599 | 71.2833 | C85 | C |\n",
"| 2 | 3 | 1 | 3 | Heikkinen, Miss. Laina | female | 26 | 0 | 0 | STON/O2. 3101282 | 7.925 | nan | S |\n",
"| 3 | 4 | 1 | 1 | Futrelle, Mrs. Jacques Heath (Lily May Peel) | female | 35 | 1 | 0 | 113803 | 53.1 | C123 | S |\n",
"| 4 | 5 | 0 | 3 | Allen, Mr. William Henry | male | 35 | 0 | 0 | 373450 | 8.05 | nan | S |\n",
"\n",
"Begin!\n",
"Question: whats the square root of the average age?\n",
"Thought: I need to calculate the average age first\n",
"Action: python_repl_ast\n",
"Action Input: df['Age'].mean()\n",
"Observation: 29.69911764705882\n",
"Thought: I now need to calculate the square root of the average age\n",
"Action: python_repl_ast\n",
"Action Input: math.sqrt(df['Age'].mean())\n",
"Observation: name 'math' is not defined\n",
"Thought: I need to import the math library\n",
"Action: python_repl_ast\n",
"Action Input: import math\n",
"Observation: \n",
"Thought: I now need to calculate the square root of the average age\n",
"Action: python_repl_ast\n",
"Action Input: math.sqrt(df['Age'].mean())\n",
"Observation: 5.449689683556195\n",
"Thought:\u001b[0m\n",
"Error in on_llm_start callback: Parent run with UUID 32e38f5f-a71a-4040-89c5-cca7752ba72d not found.\n",
"Error in on_llm_end callback: No LLMRun found to be traced\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: 5.449689683556195\u001b[0m\n", "Final Answer: 5.449689683556195\u001b[0m\n",
"\n", "\n",
"\u001b[1m> Finished chain.\u001b[0m\n" "\u001b[1m> Finished chain.\u001b[0m\n"
@@ -196,7 +510,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.9.1" "version": "3.11.2"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -1,19 +1,41 @@
"""Agent for working with pandas objects.""" """Agent for working with pandas objects."""
import warnings
from typing import Any, List, Optional from typing import Any, List, Optional
from langchain.agents.agent import AgentExecutor from langchain.agents.agent import AgentExecutor
from langchain.agents.agent_toolkits.pandas.prompt import PREFIX, SUFFIX from langchain.agents.agent_toolkits.pandas.prompt import PREFIX, SUFFIX
from langchain.agents.mrkl.base import ZeroShotAgent from langchain.agents.mrkl.base import ZeroShotAgent
from langchain.callbacks.base import BaseCallbackManager from langchain.callbacks.base import BaseCallbackManager
from langchain.callbacks.manager import CallbackManager, Callbacks
from langchain.chains.llm import LLMChain from langchain.chains.llm import LLMChain
from langchain.llms.base import BaseLLM from langchain.llms.base import BaseLLM
from langchain.tools.python.tool import PythonAstREPLTool from langchain.tools.python.tool import PythonAstREPLTool
def _get_callbacks(
callbacks: Callbacks,
callbacks_manager: Optional[BaseCallbackManager],
verbose: bool,
) -> CallbackManager:
if callbacks_manager is not None and callbacks is not None:
raise ValueError(
"Arguments 'callbacks' and "
"'callbacks_manager' are mutually exclusive. "
" Please only provide 'callbacks'"
)
if callbacks_manager is not None:
warnings.warn(
"callback_manager is deprecated. Please use callbacks instead.",
DeprecationWarning,
)
callbacks = callbacks_manager
return CallbackManager.configure(inheritable_callbacks=callbacks, verbose=verbose)
def create_pandas_dataframe_agent( def create_pandas_dataframe_agent(
llm: BaseLLM, llm: BaseLLM,
df: Any, df: Any,
callback_manager: Optional[BaseCallbackManager] = None, callbacks: Callbacks = None,
prefix: str = PREFIX, prefix: str = PREFIX,
suffix: str = SUFFIX, suffix: str = SUFFIX,
input_variables: Optional[List[str]] = None, input_variables: Optional[List[str]] = None,
@@ -22,16 +44,44 @@ def create_pandas_dataframe_agent(
max_iterations: Optional[int] = 15, max_iterations: Optional[int] = 15,
max_execution_time: Optional[float] = None, max_execution_time: Optional[float] = None,
early_stopping_method: str = "force", early_stopping_method: str = "force",
callback_manager: Optional[BaseCallbackManager] = None,
**kwargs: Any, **kwargs: Any,
) -> AgentExecutor: ) -> AgentExecutor:
"""Construct a pandas agent from an LLM and dataframe.""" """Construct a pandas agent from an LLM and dataframe.
Args:
llm: The LLM to use
df: The dataframe to provide to the REPL
callbacks: The callbacks to use for tracing
prefix: The prefix to use for the prompt
suffix: The suffix to use for the prompt
input_variables: The input variables
verbose: Whether to print output
return_intermediate_steps: Whether to return intermediate steps
max_iterations: The maximum number of iterations before exiting
max_execution_time: The maximum execution time before exiting
early_stopping_method: The early stopping method
callback_manager: (DEPRECATED) Use 'callbacks' instead
"""
import pandas as pd import pandas as pd
if not isinstance(df, pd.DataFrame): if not isinstance(df, pd.DataFrame):
raise ValueError(f"Expected pandas object, got {type(df)}") raise ValueError(f"Expected pandas object, got {type(df)}")
if input_variables is None: if input_variables is None:
input_variables = ["df", "input", "agent_scratchpad"] input_variables = ["df", "input", "agent_scratchpad"]
tools = [PythonAstREPLTool(locals={"df": df})]
callback_manager_ = _get_callbacks(
callbacks=callbacks,
callbacks_manager=callback_manager,
verbose=verbose,
)
tools = [
PythonAstREPLTool(
locals={"df": df},
callbacks=callback_manager_,
)
]
prompt = ZeroShotAgent.create_prompt( prompt = ZeroShotAgent.create_prompt(
tools, prefix=prefix, suffix=suffix, input_variables=input_variables tools, prefix=prefix, suffix=suffix, input_variables=input_variables
) )
@@ -39,22 +89,22 @@ def create_pandas_dataframe_agent(
llm_chain = LLMChain( llm_chain = LLMChain(
llm=llm, llm=llm,
prompt=partial_prompt, prompt=partial_prompt,
callback_manager=callback_manager, callbacks=callback_manager_,
) )
tool_names = [tool.name for tool in tools] tool_names = [tool.name for tool in tools]
agent = ZeroShotAgent( agent = ZeroShotAgent(
llm_chain=llm_chain, llm_chain=llm_chain,
allowed_tools=tool_names, allowed_tools=tool_names,
callback_manager=callback_manager, callbacks=callback_manager_,
**kwargs, **kwargs,
) )
return AgentExecutor.from_agent_and_tools( return AgentExecutor.from_agent_and_tools(
agent=agent, agent=agent,
tools=tools, tools=tools,
callbacks=callback_manager_,
verbose=verbose, verbose=verbose,
return_intermediate_steps=return_intermediate_steps, return_intermediate_steps=return_intermediate_steps,
max_iterations=max_iterations, max_iterations=max_iterations,
max_execution_time=max_execution_time, max_execution_time=max_execution_time,
early_stopping_method=early_stopping_method, early_stopping_method=early_stopping_method,
callback_manager=callback_manager,
) )

View File

@@ -6,6 +6,7 @@ from mypy_extensions import Arg, KwArg
from langchain.agents.tools import Tool from langchain.agents.tools import Tool
from langchain.callbacks.base import BaseCallbackManager from langchain.callbacks.base import BaseCallbackManager
from langchain.callbacks.manager import Callbacks
from langchain.chains.api import news_docs, open_meteo_docs, podcast_docs, tmdb_docs from langchain.chains.api import news_docs, open_meteo_docs, podcast_docs, tmdb_docs
from langchain.chains.api.base import APIChain from langchain.chains.api.base import APIChain
from langchain.chains.llm_math.base import LLMMathChain from langchain.chains.llm_math.base import LLMMathChain
@@ -256,6 +257,7 @@ def load_tools(
tool_names: List[str], tool_names: List[str],
llm: Optional[BaseLLM] = None, llm: Optional[BaseLLM] = None,
callback_manager: Optional[BaseCallbackManager] = None, callback_manager: Optional[BaseCallbackManager] = None,
callbacks: Optional[Callbacks] = None,
**kwargs: Any, **kwargs: Any,
) -> List[BaseTool]: ) -> List[BaseTool]:
"""Load tools based on their name. """Load tools based on their name.

View File

@@ -5,7 +5,7 @@ from typing import Optional
from langchain.callbacks.manager import ( from langchain.callbacks.manager import (
AsyncCallbackManagerForToolRun, AsyncCallbackManagerForToolRun,
RunManager, CallbackManagerForToolRun,
) )
from langchain.tools.base import BaseTool from langchain.tools.base import BaseTool
@@ -20,7 +20,9 @@ class StdInInquireTool(BaseTool):
" question (to disambiguate) or a request for more context." " question (to disambiguate) or a request for more context."
) )
def _run(self, prompt: str, run_manager: Optional[RunManager] = None) -> str: def _run(
self, prompt: str, run_manager: Optional[CallbackManagerForToolRun] = None
) -> str:
"""Prompt the user for more input.""" """Prompt the user for more input."""
return input(f"\n{prompt}") return input(f"\n{prompt}")

View File

@@ -0,0 +1,50 @@
import pandas as pd
import pytest
from langchain.agents.agent import AgentExecutor
from langchain.agents.agent_toolkits.pandas.base import (
_get_callbacks,
create_pandas_dataframe_agent,
)
from langchain.callbacks.manager import CallbackManager
from tests.unit_tests.llms.fake_llm import FakeLLM
# Test cases for _get_callbacks function
def test_get_callbacks_default() -> None:
callbacks = _get_callbacks(None, None, False)
assert isinstance(callbacks, CallbackManager)
def test_get_callbacks_with_callback_manager() -> None:
cb = CallbackManager.configure()
with pytest.raises(ValueError):
_get_callbacks(cb, cb, False)
def test_get_callbacks_deprecation_warning() -> None:
cb = CallbackManager.configure()
with pytest.warns(DeprecationWarning):
_get_callbacks(None, cb, False)
# Test cases for create_pandas_dataframe_agent function
def test_create_agent_with_non_dataframe() -> None:
with pytest.raises(ValueError, match="Expected pandas object, got"):
create_pandas_dataframe_agent(FakeLLM(), "Not a dataframe")
def test_create_agent_with_dataframe() -> None:
agent = create_pandas_dataframe_agent(FakeLLM(), pd.DataFrame())
assert isinstance(agent, AgentExecutor)
def test_create_agent_with_callback_manager() -> None:
cb = CallbackManager.configure()
with pytest.raises(ValueError):
create_pandas_dataframe_agent(
FakeLLM(),
pd.DataFrame(),
callback_manager=cb,
callbacks=cb,
)