diff --git a/docs/extras/modules/agents/toolkits/xorbits.ipynb b/docs/extras/modules/agents/toolkits/xorbits.ipynb new file mode 100644 index 00000000000..dd3e6a108a5 --- /dev/null +++ b/docs/extras/modules/agents/toolkits/xorbits.ipynb @@ -0,0 +1,742 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Xorbits Agent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook shows how to use agents to interact with [Xorbits Pandas](https://doc.xorbits.io/en/latest/reference/pandas/index.html) dataframe and [Xorbits Numpy](https://doc.xorbits.io/en/latest/reference/numpy/index.html) ndarray. It is mostly optimized for question answering.\n", + "\n", + "**NOTE: this agent calls the Python agent under the hood, which executes LLM generated Python code - this can be bad if the LLM generated Python code is harmful. Use cautiously.**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pandas examples" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2023-07-13T08:06:33.955439Z", + "start_time": "2023-07-13T08:06:33.767539500Z" + } + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "05b7c067b1114ce9a8aef4a58a5d5fef", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0.00/100 [00:00 Entering new chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mThought: I need to count the number of rows and columns\n", + "Action: python_repl_ast\n", + "Action Input: data.shape\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m(891, 12)\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: There are 891 rows and 12 columns.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'There are 891 rows and 12 columns.'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(\"How many rows and columns are there?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2023-07-13T08:11:23.189275300Z", + "start_time": "2023-07-13T08:11:11.029030900Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8c63d745a7eb41a484043a5dba357997", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0.00/100 [00:00 Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'There are 216 people in pclass 1.'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(\"How many people are in pclass 1?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mThought: I need to calculate the mean age\n", + "Action: python_repl_ast\n", + "Action Input: data['Age'].mean()\u001b[0m" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "29af2e29f2d64a3397c212812adf0e9b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0.00/100 [00:00 Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'The mean age is 29.69911764705882.'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(\"whats the mean age?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mThought: I need to group the data by sex and then find the average age for each group\n", + "Action: python_repl_ast\n", + "Action Input: data.groupby('Sex')['Age'].mean()\u001b[0m" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c3d28625c35946fd91ebc2a47f8d8c5b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0.00/100 [00:00 Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'The average age for female passengers is 27.92 and the average age for male passengers is 30.73.'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(\"Group the data by sex and find the average age for each group\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c72aab63b20d47599f4f9806f6887a69", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0.00/100 [00:00 30) & (data['Fare'] > 30) & (data['Fare'] < 50) & ((data['Pclass'] == 1) | (data['Pclass'] == 2))].shape[0]\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m20\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: 20\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'20'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(\n", + " \"Show the number of people whose age is greater than 30 and fare is between 30 and 50 , and pclass is either 1 or 2\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Numpy examples" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fa8baf315a0c41c89392edc4a24b76f5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0.00/100 [00:00 Entering new chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mThought: I need to find out the shape of the array\n", + "Action: python_repl_ast\n", + "Action Input: data.shape\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m(6,)\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", + "Final Answer: The shape of the array is (6,).\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'The shape of the array is (6,).'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(\"Give the shape of the array \")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mThought: I need to access the 2nd element of the array\n", + "Action: python_repl_ast\n", + "Action Input: data[1]\u001b[0m" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "64efcc74f81f404eb0a7d3f0326cd8b3", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0.00/100 [00:00 Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'2'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(\"Give the 2nd element of the array \")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mThought: I need to reshape the array and then transpose it\n", + "Action: python_repl_ast\n", + "Action Input: np.reshape(data, (2,3)).T\u001b[0m" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fce51acf6fb347c0b400da67c6750534", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0.00/100 [00:00 Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'The reshaped and transposed array is [[1 4], [2 5], [3 6]].'" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(\n", + " \"Reshape the array into a 2-dimensional array with 2 rows and 3 columns, and then transpose it\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mThought: I need to reshape the array and then sum it\n", + "Action: python_repl_ast\n", + "Action Input: np.sum(np.reshape(data, (3,2)), axis=0)\u001b[0m" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "27fd4a0bbf694936bc41a6991064dec2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0.00/100 [00:00 Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'The sum of the array along the first axis is [9, 12].'" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(\n", + " \"Reshape the array into a 2-dimensional array with 3 rows and 2 columns and sum the array along the first axis\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a591b6d7913f45cba98d2f3b71a5120a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0.00/100 [00:00 Entering new chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mThought: I need to use the numpy covariance function\n", + "Action: python_repl_ast\n", + "Action Input: np.cov(data)\u001b[0m" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5fe40f83cfae48d0919c147627b5839f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0.00/100 [00:00 Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'The covariance matrix is [[1. 1. 1.], [1. 1. 1.], [1. 1. 1.]].'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(\"calculate the covariance matrix\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mThought: I need to use the SVD function\n", + "Action: python_repl_ast\n", + "Action Input: U, S, V = np.linalg.svd(data)\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I now have the U matrix\n", + "Final Answer: U = [[-0.70710678 -0.70710678]\n", + " [-0.70710678 0.70710678]]\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'U = [[-0.70710678 -0.70710678]\\n [-0.70710678 0.70710678]]'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.run(\"compute the U of Singular Value Decomposition of the matrix\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/langchain/agents/__init__.py b/langchain/agents/__init__.py index 37d6e0c019c..a7213959a84 100644 --- a/langchain/agents/__init__.py +++ b/langchain/agents/__init__.py @@ -19,6 +19,7 @@ from langchain.agents.agent_toolkits import ( create_sql_agent, create_vectorstore_agent, create_vectorstore_router_agent, + create_xorbits_agent, ) from langchain.agents.agent_types import AgentType from langchain.agents.conversational.base import ConversationalAgent @@ -74,4 +75,5 @@ __all__ = [ "load_huggingface_tool", "load_tools", "tool", + "create_xorbits_agent", ] diff --git a/langchain/agents/agent_toolkits/__init__.py b/langchain/agents/agent_toolkits/__init__.py index c0edc98f073..fb582de0e75 100644 --- a/langchain/agents/agent_toolkits/__init__.py +++ b/langchain/agents/agent_toolkits/__init__.py @@ -35,6 +35,7 @@ from langchain.agents.agent_toolkits.vectorstore.toolkit import ( VectorStoreRouterToolkit, VectorStoreToolkit, ) +from langchain.agents.agent_toolkits.xorbits.base import create_xorbits_agent from langchain.agents.agent_toolkits.zapier.toolkit import ZapierToolkit __all__ = [ @@ -66,4 +67,5 @@ __all__ = [ "PlayWrightBrowserToolkit", "AzureCognitiveServicesToolkit", "O365Toolkit", + "create_xorbits_agent", ] diff --git a/langchain/agents/agent_toolkits/xorbits/__init__.py b/langchain/agents/agent_toolkits/xorbits/__init__.py new file mode 100644 index 00000000000..71bf7b70ea2 --- /dev/null +++ b/langchain/agents/agent_toolkits/xorbits/__init__.py @@ -0,0 +1 @@ +"""Xorbits toolkit.""" diff --git a/langchain/agents/agent_toolkits/xorbits/base.py b/langchain/agents/agent_toolkits/xorbits/base.py new file mode 100644 index 00000000000..70411ef61b0 --- /dev/null +++ b/langchain/agents/agent_toolkits/xorbits/base.py @@ -0,0 +1,90 @@ +"""Agent for working with xorbits objects.""" +from typing import Any, Dict, List, Optional + +from langchain.agents.agent import AgentExecutor +from langchain.agents.agent_toolkits.xorbits.prompt import ( + NP_PREFIX, + NP_SUFFIX, + PD_PREFIX, + PD_SUFFIX, +) +from langchain.agents.mrkl.base import ZeroShotAgent +from langchain.callbacks.base import BaseCallbackManager +from langchain.chains.llm import LLMChain +from langchain.llms.base import BaseLLM +from langchain.tools.python.tool import PythonAstREPLTool + + +def create_xorbits_agent( + llm: BaseLLM, + data: Any, + callback_manager: Optional[BaseCallbackManager] = None, + prefix: str = "", + suffix: str = "", + input_variables: Optional[List[str]] = None, + verbose: bool = False, + return_intermediate_steps: bool = False, + max_iterations: Optional[int] = 15, + max_execution_time: Optional[float] = None, + early_stopping_method: str = "force", + agent_executor_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Dict[str, Any], +) -> AgentExecutor: + """Construct a xorbits agent from an LLM and dataframe.""" + try: + from xorbits import numpy as np + from xorbits import pandas as pd + except ImportError: + raise ValueError( + "Xorbits package not installed, please install with `pip install xorbits`" + ) + + if not isinstance(data, (pd.DataFrame, np.ndarray)): + raise ValueError( + f"Expected Xorbits DataFrame or ndarray object, got {type(data)}" + ) + if input_variables is None: + input_variables = ["data", "input", "agent_scratchpad"] + tools = [PythonAstREPLTool(locals={"data": data})] + prompt, partial_input = None, None + + if isinstance(data, pd.DataFrame): + prompt = ZeroShotAgent.create_prompt( + tools, + prefix=PD_PREFIX if prefix == "" else prefix, + suffix=PD_SUFFIX if suffix == "" else suffix, + input_variables=input_variables, + ) + partial_input = str(data.head()) + else: + prompt = ZeroShotAgent.create_prompt( + tools, + prefix=NP_PREFIX if prefix == "" else prefix, + suffix=NP_SUFFIX if suffix == "" else suffix, + input_variables=input_variables, + ) + partial_input = str(data[: len(data) // 2]) + partial_prompt = prompt.partial(data=partial_input) + llm_chain = LLMChain( + llm=llm, + prompt=partial_prompt, + callback_manager=callback_manager, + ) + tool_names = [tool.name for tool in tools] + agent = ZeroShotAgent( + llm_chain=llm_chain, + allowed_tools=tool_names, + callback_manager=callback_manager, + **kwargs, + ) + return AgentExecutor.from_agent_and_tools( + agent=agent, + tools=tools, + callback_manager=callback_manager, + verbose=verbose, + return_intermediate_steps=return_intermediate_steps, + max_iterations=max_iterations, + max_execution_time=max_execution_time, + early_stopping_method=early_stopping_method, + **(agent_executor_kwargs or {}), + ) diff --git a/langchain/agents/agent_toolkits/xorbits/prompt.py b/langchain/agents/agent_toolkits/xorbits/prompt.py new file mode 100644 index 00000000000..6db3a41a79f --- /dev/null +++ b/langchain/agents/agent_toolkits/xorbits/prompt.py @@ -0,0 +1,33 @@ +PD_PREFIX = """ +You are working with Xorbits dataframe object in Python. +Before importing Numpy or Pandas in the current script, +remember to import the xorbits version of the library instead. +To import the xorbits version of Numpy, replace the original import statement +`import pandas as pd` with `import xorbits.pandas as pd`. +The name of the input is `data`. +You should use the tools below to answer the question posed of you:""" + +PD_SUFFIX = """ +This is the result of `print(data)`: +{data} + +Begin! +Question: {input} +{agent_scratchpad}""" + +NP_PREFIX = """ +You are working with Xorbits ndarray object in Python. +Before importing Numpy in the current script, +remember to import the xorbits version of the library instead. +To import the xorbits version of Numpy, replace the original import statement +`import numpy as np` with `import xorbits.numpy as np`. +The name of the input is `data`. +You should use the tools below to answer the question posed of you:""" + +NP_SUFFIX = """ +This is the result of `print(data)`: +{data} + +Begin! +Question: {input} +{agent_scratchpad}""" diff --git a/tests/unit_tests/agents/test_public_api.py b/tests/unit_tests/agents/test_public_api.py index b22367a6e37..1ae893edfad 100644 --- a/tests/unit_tests/agents/test_public_api.py +++ b/tests/unit_tests/agents/test_public_api.py @@ -36,6 +36,7 @@ _EXPECTED = [ "load_huggingface_tool", "load_tools", "tool", + "create_xorbits_agent", ]