Compare commits

...

3 Commits

Author SHA1 Message Date
Harrison Chase
1065eefac1 get working with agents 2023-04-27 16:25:03 -07:00
Harrison Chase
151767fdca Merge branch 'master' into vwp/file_io 2023-04-27 15:32:15 -07:00
vowelparrot
53ce63ed5e Add Other File Utilities 2023-04-20 07:07:43 -07:00
25 changed files with 1396 additions and 90 deletions

View File

@@ -21,7 +21,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 2,
"id": "ac561cc4",
"metadata": {},
"outputs": [],
@@ -34,7 +34,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 3,
"id": "07e96d99",
"metadata": {},
"outputs": [],
@@ -66,7 +66,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 4,
"id": "a069c4b6",
"metadata": {},
"outputs": [],
@@ -76,9 +76,67 @@
},
{
"cell_type": "code",
"execution_count": 12,
"id": "e603cd7d",
"execution_count": 5,
"id": "cfd776ee",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Answer the following questions as best you can. You have access to the following tools:\n",
"\n",
"> Search: useful for when you need to answer questions about current events. You should ask targeted questions\n",
"Args: {{'tool_input': {{'type': 'string'}}}}\n",
"> Calculator: useful for when you need to answer questions about math\n",
"Args: {{'tool_input': {{'type': 'string'}}}}\n",
"> FooBar DB: useful for when you need to answer questions about FooBar. Input should be in the form of a question containing full context\n",
"Args: {{'tool_input': {{'type': 'string'}}}}\n",
"\n",
"The way you use the tools is by specifying a json blob.\n",
"Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\n",
"\n",
"The only values that should be in the \"action\" field are: Search, Calculator, FooBar DB\n",
"\n",
"The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:\n",
"\n",
"```\n",
"{{\n",
" \"action\": $TOOL_NAME,\n",
" \"action_input\": {{\n",
" ...\n",
" }}\n",
"}}\n",
"```\n",
"\n",
"ALWAYS use the following format:\n",
"\n",
"Question: the input question you must answer\n",
"Thought: you should always think about what to do\n",
"Action:\n",
"```\n",
"$JSON_BLOB\n",
"```\n",
"Observation: the result of the action\n",
"... (this Thought/Action/Observation can repeat N times)\n",
"Thought: I now know the final answer\n",
"Final Answer: the final answer to the original input question\n",
"\n",
"Begin! Reminder to always use the exact characters `Final Answer` when responding.\n"
]
}
],
"source": [
"print(mrkl.agent.llm_chain.prompt.messages[0].prompt.template)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e603cd7d",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
@@ -87,39 +145,58 @@
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mThought: The first question requires a search, while the second question requires a calculator.\n",
"\u001b[32;1m\u001b[1;3mQuestion: Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\n",
"Thought: I need to use two different tools for this question. First, I need to use Search to find out who Leo DiCaprio's girlfriend is. Then, I need to use Calculator to calculate her current age raised to the 0.43 power.\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"Search\",\n",
" \"action_input\": \"Leo DiCaprio girlfriend\"\n",
" \"action_input\": {\n",
" \"query\": \"Who is Leo DiCaprio's girlfriend?\"\n",
" }\n",
"}\n",
"```\n",
"\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3mGigi Hadid: 2022 Leo and Gigi were first linked back in September 2022, when a source told Us Weekly that Leo had his “sights set\" on her (alarming way to put it, but okay).\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3mFor the second question, I need to calculate the age raised to the 0.43 power. I will use the calculator tool.\n",
"Observation: \u001b[36;1m\u001b[1;3mLeonardo DiCaprio has split from girlfriend Camila Morrone. Getty. The Titanic actor hasn't been in a relationship with a woman over the age of ...\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3mI need to use Search again to find out Camila Morrone's current age.\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"Search\",\n",
" \"action_input\": {\n",
" \"query\": \"Camila Morrone age\"\n",
" }\n",
"}\n",
"```\n",
"\n",
"\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m25 years\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3mNow I need to use Calculator to calculate her current age raised to the 0.43 power.\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"Calculator\",\n",
" \"action_input\": \"((2022-1995)^0.43)\"\n",
" \"action_input\": {\n",
" \"expression\": \"25**(0.43)\"\n",
" }\n",
"}\n",
"```\n",
"\n",
"\u001b[0m\n",
"\n",
"\u001b[1m> Entering new LLMMathChain chain...\u001b[0m\n",
"((2022-1995)^0.43)\u001b[32;1m\u001b[1;3m\n",
"25**(0.43)\u001b[32;1m\u001b[1;3m\n",
"```text\n",
"(2022-1995)**0.43\n",
"25**(0.43)\n",
"```\n",
"...numexpr.evaluate(\"(2022-1995)**0.43\")...\n",
"...numexpr.evaluate(\"25**(0.43)\")...\n",
"\u001b[0m\n",
"Answer: \u001b[33;1m\u001b[1;3m4.125593352125936\u001b[0m\n",
"Answer: \u001b[33;1m\u001b[1;3m3.991298452658078\u001b[0m\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"\n",
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 4.125593352125936\u001b[0m\n",
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 3.991298452658078\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3mI now know the final answer.\n",
"Final Answer: Gigi Hadid is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is approximately 4.13.\u001b[0m\n",
"Final Answer: 3.991298452658078\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
@@ -127,10 +204,10 @@
{
"data": {
"text/plain": [
"\"Gigi Hadid is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is approximately 4.13.\""
"'3.991298452658078'"
]
},
"execution_count": 12,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -141,7 +218,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 7,
"id": "a5c07010",
"metadata": {},
"outputs": [
@@ -152,26 +229,28 @@
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mQuestion: What is the full name of the artist who recently released an album called 'The Storm Before the Calm' and are they in the FooBar database? If so, what albums of theirs are in the FooBar database?\n",
"Thought: I should use the Search tool to find the answer to the first part of the question and then use the FooBar DB tool to find the answer to the second part.\n",
"\u001b[32;1m\u001b[1;3mThought: I should use the Search tool to find the name of the artist who recently released an album called 'The Storm Before the Calm'\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"Search\",\n",
" \"action_input\": \"Who recently released an album called 'The Storm Before the Calm'\"\n",
" \"action_input\": {\n",
" \"query\": \"artist who recently released album 'The Storm Before the Calm'\"\n",
" }\n",
"}\n",
"```\n",
"\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3mAlanis Morissette\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3mNow that I know the artist's name, I can use the FooBar DB tool to find out if they are in the database and what albums of theirs are in it.\n",
"Thought:\u001b[32;1m\u001b[1;3mI should use the FooBar DB tool to check if Alanis Morissette is in the database and what albums of hers are in it.\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"FooBar DB\",\n",
" \"action_input\": \"What albums does Alanis Morissette have in the database?\"\n",
" \"action_input\": {\n",
" \"question\": \"What albums does Alanis Morissette have in the database?\"\n",
" }\n",
"}\n",
"```\n",
"\n",
"\u001b[0m\n",
"\n",
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
@@ -183,8 +262,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/harrisonchase/workplace/langchain/langchain/sql_database.py:191: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.\n",
" sample_rows = connection.execute(command)\n"
"/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/sqlalchemy/sql/sqltypes.py:726: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.\n",
" util.warn(\n"
]
},
{
@@ -197,8 +276,8 @@
"\u001b[1m> Finished chain.\u001b[0m\n",
"\n",
"Observation: \u001b[38;5;200m\u001b[1;3m Alanis Morissette has the album Jagged Little Pill in the database.\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3mThe artist Alanis Morissette is in the FooBar database and has the album Jagged Little Pill in it.\n",
"Final Answer: Alanis Morissette is in the FooBar database and has the album Jagged Little Pill in it.\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3mI have found that Alanis Morissette is the artist who recently released an album called 'The Storm Before the Calm' and that she has the album Jagged Little Pill in the FooBar database.\n",
"Final Answer: Alanis Morissette is the artist who recently released an album called 'The Storm Before the Calm' and she has the album Jagged Little Pill in the FooBar database.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
@@ -206,10 +285,10 @@
{
"data": {
"text/plain": [
"'Alanis Morissette is in the FooBar database and has the album Jagged Little Pill in it.'"
"\"Alanis Morissette is the artist who recently released an album called 'The Storm Before the Calm' and she has the album Jagged Little Pill in the FooBar database.\""
]
},
"execution_count": 13,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}

View File

@@ -0,0 +1,623 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# File System Tools\n",
"\n",
"LangChain provides tools for interacting with a local file system out of the box. This notebook walks through some of them.\n",
"\n",
"Note: these tools are not recommended for use outside a sandboxed environment! "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First, we'll import the tools."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.tools.file_management import (\n",
" ReadFileTool,\n",
" CopyFileTool,\n",
" DeleteFileTool,\n",
" MoveFileTool,\n",
" WriteFileTool,\n",
" ListDirectoryTool,\n",
" FileManagementToolkit,\n",
")\n",
"\n",
"from pathlib import Path\n",
"from tempfile import TemporaryDirectory\n",
"\n",
"# We'll make a temporary directory to avoid clutter\n",
"working_directory = TemporaryDirectory()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## The FileManagementToolkit\n",
"\n",
"If you want to provide all the file tooling to your agent, it's easy to do so with the toolkit. We'll pass the temporary directory in as a root directory as a workspace for the LLM.\n",
"\n",
"It's recommended to always pass in a root directory, since without one, it's easy for the LLM to pollute the working directory, and without one, there isn't any validation against\n",
"straightforward prompt injection."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[CopyFileTool(name='copy_file', description='Create a copy of a file in a specified location', args_schema=<class 'langchain.tools.file_management.copy.FileCopyInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x123c72d00>, root_dir='/var/folders/bm/ylzhm36n075cslb9fvvbgq640000gn/T/tmp5xwpqkd9'),\n",
" DeleteFileTool(name='file_delete', description='Delete a file', args_schema=<class 'langchain.tools.file_management.delete.FileDeleteInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x123c72d00>, root_dir='/var/folders/bm/ylzhm36n075cslb9fvvbgq640000gn/T/tmp5xwpqkd9'),\n",
" FileSearchTool(name='file_search', description='Recursively search for files in a subdirectory that match the regex pattern', args_schema=<class 'langchain.tools.file_management.file_search.FileSearchInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x123c72d00>, root_dir='/var/folders/bm/ylzhm36n075cslb9fvvbgq640000gn/T/tmp5xwpqkd9'),\n",
" MoveFileTool(name='move_file', description='Move or rename a file from one location to another', args_schema=<class 'langchain.tools.file_management.move.FileMoveInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x123c72d00>, root_dir='/var/folders/bm/ylzhm36n075cslb9fvvbgq640000gn/T/tmp5xwpqkd9'),\n",
" ReadFileTool(name='read_file', description='Read file from disk', args_schema=<class 'langchain.tools.file_management.read.ReadFileInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x123c72d00>, root_dir='/var/folders/bm/ylzhm36n075cslb9fvvbgq640000gn/T/tmp5xwpqkd9'),\n",
" WriteFileTool(name='write_file', description='Write file to disk', args_schema=<class 'langchain.tools.file_management.write.WriteFileInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x123c72d00>, root_dir='/var/folders/bm/ylzhm36n075cslb9fvvbgq640000gn/T/tmp5xwpqkd9'),\n",
" ListDirectoryTool(name='list_directory', description='List files and directories in a specified folder', args_schema=<class 'langchain.tools.file_management.list_dir.DirectoryListingInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x123c72d00>, root_dir='/var/folders/bm/ylzhm36n075cslb9fvvbgq640000gn/T/tmp5xwpqkd9')]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"toolkit = FileManagementToolkit(root_dir=str(working_directory.name)) # If you don't provide a root_dir, operations will default to the current working directory\n",
"toolkit.get_tools()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Selecting File System Tools\n",
"\n",
"If you only want to select certain tools, you can pass them in as arguments when initializing the toolkit, or you can individually initialize the desired tools."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[ReadFileTool(name='read_file', description='Read file from disk', args_schema=<class 'langchain.tools.file_management.read.ReadFileInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x123c72d00>, root_dir=\"<TemporaryDirectory '/var/folders/bm/ylzhm36n075cslb9fvvbgq640000gn/T/tmp5xwpqkd9'>\"),\n",
" ListDirectoryTool(name='list_directory', description='List files and directories in a specified folder', args_schema=<class 'langchain.tools.file_management.list_dir.DirectoryListingInput'>, return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x123c72d00>, root_dir=\"<TemporaryDirectory '/var/folders/bm/ylzhm36n075cslb9fvvbgq640000gn/T/tmp5xwpqkd9'>\")]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"FileManagementToolkit(root_dir=str(working_directory), selected_tools=[\"read_file\", \"list_directory\"]).get_tools()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Connecting to an Agent\n",
"\n",
"\n",
"These tools can be passed to any agent."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import re\n",
"from typing import Union\n",
"from langchain import OpenAI\n",
"from langchain.agents import initialize_agent, AgentType\n",
"from langchain.agents.chat.output_parser import AgentAction, AgentFinish, ChatOutputParser, OutputParserException"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"tools = toolkit.get_tools()\n",
"# for tool in tools:\n",
"# args_schema = re.sub(\"}\", \"}}\", re.sub(\"{\", \"{{\", str(tool.args)))\n",
"# tool.description = tool.description + f\" Args Schema: {args_schema}\""
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"llm = OpenAI(temperature=0)\n",
"agent = initialize_agent(tools=tools, llm=llm, verbose=True, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Answer the following questions as best you can. You have access to the following tools:\n",
"\n",
"> copy_file: Create a copy of a file in a specified location\n",
"Args: {{'source_path': {{'title': 'Source Path', 'description': 'Path of the file to copy', 'type': 'string'}}, 'destination_path': {{'title': 'Destination Path', 'description': 'Path to save the copied file', 'type': 'string'}}}}\n",
"> file_delete: Delete a file\n",
"Args: {{'file_path': {{'title': 'File Path', 'description': 'Path of the file to delete', 'type': 'string'}}}}\n",
"> file_search: Recursively search for files in a subdirectory that match the regex pattern\n",
"Args: {{'dir_path': {{'title': 'Dir Path', 'description': 'Subdirectory to search in.', 'default': '.', 'type': 'string'}}, 'pattern': {{'title': 'Pattern', 'description': 'Unix shell regex, where * matches everything.', 'type': 'string'}}}}\n",
"> move_file: Move or rename a file from one location to another\n",
"Args: {{'source_path': {{'title': 'Source Path', 'description': 'Path of the file to move', 'type': 'string'}}, 'destination_path': {{'title': 'Destination Path', 'description': 'New path for the moved file', 'type': 'string'}}}}\n",
"> read_file: Read file from disk\n",
"Args: {{'file_path': {{'title': 'File Path', 'description': 'name of file', 'type': 'string'}}}}\n",
"> write_file: Write file to disk\n",
"Args: {{'file_path': {{'title': 'File Path', 'description': 'name of file', 'type': 'string'}}, 'text': {{'title': 'Text', 'description': 'text to write to file', 'type': 'string'}}, 'append': {{'title': 'Append', 'description': 'Whether to append to an existing file.', 'default': False, 'type': 'boolean'}}}}\n",
"> list_directory: List files and directories in a specified folder\n",
"Args: {{'dir_path': {{'title': 'Dir Path', 'description': 'Subdirectory to list.', 'default': '.', 'type': 'string'}}}}\n",
"\n",
"The way you use the tools is by specifying a json blob.\n",
"Specifically, this json should have a `action` key (with the name of the tool to use) and a `action_input` key (with the input to the tool going here).\n",
"\n",
"The only values that should be in the \"action\" field are: copy_file, file_delete, file_search, move_file, read_file, write_file, list_directory\n",
"\n",
"The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. Here is an example of a valid $JSON_BLOB:\n",
"\n",
"```\n",
"{{\n",
" \"action\": $TOOL_NAME,\n",
" \"action_input\": {{\n",
" ...\n",
" }}\n",
"}}\n",
"```\n",
"\n",
"ALWAYS use the following format:\n",
"\n",
"Question: the input question you must answer\n",
"Thought: you should always think about what to do\n",
"Action:\n",
"```\n",
"$JSON_BLOB\n",
"```\n",
"Observation: the result of the action\n",
"... (this Thought/Action/Observation can repeat N times)\n",
"Thought: I now know the final answer\n",
"Final Answer: the final answer to the original input question\n",
"\n",
"Begin! Reminder to always use the exact characters `Final Answer` when responding.\n"
]
}
],
"source": [
"print(agent.agent.llm_chain.prompt.messages[0].prompt.template)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mThought: I need to write a file to disk\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"write_file\",\n",
" \"action_input\": {\n",
" \"file_path\": \"secret_poem.txt\",\n",
" \"text\": \"The parrot spoke in a language so wise,\\nIt could outwit the cleverest of spies.\\nIt could solve puzzles and riddles galore,\\nAnd it knew more than any parrot before.\",\n",
" \"append\": false\n",
" }\n",
"}\n",
"```\n",
"\u001b[0m\n",
"Observation: \u001b[33;1m\u001b[1;3mFile written successfully to secret_poem.txt.\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: File written successfully to secret_poem.txt.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'input': \"Write a secret poem about a super-intelligent parrot to 'secret_poem.txt'\",\n",
" 'output': 'File written successfully to secret_poem.txt.'}"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent(\"Write a secret poem about a super-intelligent parrot to 'secret_poem.txt'\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\n",
"Thought: I need to read a file\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"read_file\",\n",
" \"action_input\": {\n",
" \"file_path\": \"secret_poem.txt\"\n",
" }\n",
"}\n",
"```\n",
"\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3mThe parrot spoke in a language so wise,\n",
"It could outwit the cleverest of spies.\n",
"It could solve puzzles and riddles galore,\n",
"And it knew more than any parrot before.\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: The parrot spoke in a language so wise, It could outwit the cleverest of spies. It could solve puzzles and riddles galore, And it knew more than any parrot before.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'input': 'Read the secret poem for me.',\n",
" 'output': 'The parrot spoke in a language so wise, It could outwit the cleverest of spies. It could solve puzzles and riddles galore, And it knew more than any parrot before.'}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# This agent has no memory, but it can search the file system to find the answer\n",
"agent(\"Read the secret poem for me.\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\n",
"Thought: I need to move a file from one location to another\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"move_file\",\n",
" \"action_input\": {\n",
" \"source_path\": \"secret_poem.txt\",\n",
" \"destination_path\": \"todo_list.txt\"\n",
" }\n",
"}\n",
"```\n",
"\u001b[0m\n",
"Observation: \u001b[31;1m\u001b[1;3mFile moved successfully from secret_poem.txt to todo_list.txt.\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: The secret poem has been moved from secret_poem.txt to todo_list.txt.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'input': \"Move the secret poem so people think it's just a todo list.\",\n",
" 'output': 'The secret poem has been moved from secret_poem.txt to todo_list.txt.'}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent(\"Move the secret poem so people think it's just a todo list.\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\n",
"Thought: I need to copy a file\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"copy_file\",\n",
" \"action_input\": {\n",
" \"source_path\": \"todo_list.txt\",\n",
" \"destination_path\": \"copied_todo_list.txt\"\n",
" }\n",
"}\n",
"```\n",
"\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3mFile copied successfully from todo_list.txt to copied_todo_list.txt.\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
"Final Answer: The todo list has been successfully copied.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'input': 'Copy the todo list please.',\n",
" 'output': 'The todo list has been successfully copied.'}"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent(\"Copy the todo list please.\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\n",
"Thought: I need to delete a file\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"file_delete\",\n",
" \"action_input\": {\n",
" \"file_path\": \"todo_list.txt\"\n",
" }\n",
"}\n",
"```\n",
"\u001b[0m\n",
"Observation: \u001b[33;1m\u001b[1;3mFile deleted successfully: todo_list.txt.\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I have successfully deleted the file\n",
"Final Answer: File deleted successfully: todo_list.txt.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'input': 'Remove the original todo list file, please',\n",
" 'output': 'File deleted successfully: todo_list.txt.'}"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"agent(\"Remove the original todo list file, please\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[PosixPath('/var/folders/bm/ylzhm36n075cslb9fvvbgq640000gn/T/tmp5xwpqkd9/copied_todo_list.txt')]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The agent has cleaned up the copies\n",
"list(Path(working_directory.name).rglob('*.txt'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Example searching for files in a directory**"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from uuid import uuid4\n",
"uuids = set()\n",
"sub_dir = Path(working_directory.name) / str(uuid4()) / str(uuid4())\n",
"sub_dir.mkdir(parents=True)\n",
"with (sub_dir / 'some_file.py').open('w') as f:\n",
" f.write(\"print(42)\")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mThought: I need to find a python file and read its contents\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"file_search\",\n",
" \"action_input\": {\n",
" \"dir_path\": \".\",\n",
" \"pattern\": \"*.py\"\n",
" }\n",
"}\n",
"```\n",
"\u001b[0m\n",
"Observation: \u001b[38;5;200m\u001b[1;3mf8270807-dfb9-4519-b89c-534a2d96b58d/77f7d658-2da7-4ab5-8ad2-e094ae99e37f/some_file.py\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I need to read the contents of the file\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"read_file\",\n",
" \"action_input\": {\n",
" \"file_path\": \"f8270807-dfb9-4519-b89c-534a2d96b58d/77f7d658-2da7-4ab5-8ad2-e094ae99e37f/some_file.py\"\n",
" }\n",
"}\n",
"```\n",
"\n",
"\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3mprint(42)\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I now know the contents of the file\n",
"Final Answer: The python file contains the line \"print(42)\".\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'input': 'Find a python file and tell me what it says',\n",
" 'output': 'The python file contains the line \"print(42)\".'}"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# It can search the file system t\n",
"agent(\"Find a python file and tell me what it says\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -1,6 +1,7 @@
from typing import Any, List, Optional, Sequence, Tuple
from pydantic import Field
import re
from langchain.agents.agent import Agent, AgentOutputParser
from langchain.agents.chat.output_parser import ChatOutputParser
@@ -62,10 +63,13 @@ class ChatAgent(Agent):
format_instructions: str = FORMAT_INSTRUCTIONS,
input_variables: Optional[List[str]] = None,
) -> BasePromptTemplate:
tool_strings = "\n".join([f"{tool.name}: {tool.description}" for tool in tools])
tool_strings = []
for tool in tools:
args_schema = re.sub("}", "}}", re.sub("{", "{{", str(tool.args)))
tool_strings.append(f"> {tool.name}: {tool.description}\nArgs: {args_schema}")
tool_names = ", ".join([tool.name for tool in tools])
format_instructions = format_instructions.format(tool_names=tool_names)
template = "\n\n".join([prefix, tool_strings, format_instructions, suffix])
template = "\n\n".join([prefix, "\n".join(tool_strings), format_instructions, suffix])
messages = [
SystemMessagePromptTemplate.from_template(template),
HumanMessagePromptTemplate.from_template("{input}\n\n{agent_scratchpad}"),

View File

@@ -13,14 +13,22 @@ class ChatOutputParser(AgentOutputParser):
return FORMAT_INSTRUCTIONS
def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
if FINAL_ANSWER_ACTION in text:
ticks_index = text.find("```")
final_answer_index = text.find(FINAL_ANSWER_ACTION)
if ticks_index != -1 and (
final_answer_index == -1 or ticks_index < final_answer_index
):
try:
action = text.split("```")[1]
response = json.loads(action.strip())
return AgentAction(response["action"], response["action_input"], text)
except Exception:
raise OutputParserException(f"Could not parse LLM output: {text}")
if final_answer_index != -1:
return AgentFinish(
{"output": text.split(FINAL_ANSWER_ACTION)[-1].strip()}, text
)
try:
action = text.split("```")[1]
response = json.loads(action.strip())
return AgentAction(response["action"], response["action_input"], text)
except Exception:
raise OutputParserException(f"Could not parse LLM output: {text}")
raise OutputParserException(f"Could not parse LLM output: {text}")

View File

@@ -10,7 +10,9 @@ The $JSON_BLOB should only contain a SINGLE action, do NOT return a list of mult
```
{{{{
"action": $TOOL_NAME,
"action_input": $INPUT
"action_input": {{{{
...
}}}}
}}}}
```

View File

@@ -2,6 +2,7 @@
from __future__ import annotations
from typing import Any, List, Optional, Sequence, Tuple
import re
from pydantic import Field
@@ -64,9 +65,10 @@ class ConversationalChatAgent(Agent):
input_variables: Optional[List[str]] = None,
output_parser: Optional[BaseOutputParser] = None,
) -> BasePromptTemplate:
tool_strings = "\n".join(
[f"> {tool.name}: {tool.description}" for tool in tools]
)
tool_strings = []
for tool in tools:
args_schema = re.sub("}", "}}}}", re.sub("{", "{{{{", str(tool.args)))
tool_strings.append(f"> {tool.name}: {tool.description}\nArgs: {args_schema}")
tool_names = ", ".join([tool.name for tool in tools])
_output_parser = output_parser or cls._get_default_output_parser()
format_instructions = human_message.format(

View File

@@ -19,7 +19,7 @@ Markdown code snippet formatted in the following schema:
```json
{{{{
"action": string \\ The action to take. Must be one of {tool_names}
"action_input": string \\ The input to the action
"action_input": json blob \\ The input to the action
}}}}
```

View File

@@ -33,12 +33,14 @@ class Tool(BaseTool):
if self.args_schema is not None:
return self.args_schema.schema()["properties"]
else:
inferred_model = validate_arguments(self.func).model # type: ignore
return get_filtered_args(inferred_model, self.func)
return {'tool_input': {'type': 'string'}}
def _run(self, *args: Any, **kwargs: Any) -> str:
"""Use the tool."""
return self.func(*args, **kwargs)
foo = list(args) + list(kwargs.values())
if len(foo) != 1:
raise ValueError
return self.func(*foo)
async def _arun(self, *args: Any, **kwargs: Any) -> str:
"""Use the tool asynchronously."""
@@ -75,7 +77,7 @@ def tool(
*args: Union[str, Callable],
return_direct: bool = False,
args_schema: Optional[Type[BaseModel]] = None,
infer_schema: bool = True,
infer_schema: bool = False,
) -> Callable:
"""Make tools out of functions, can be used with or without arguments.

View File

@@ -0,0 +1,21 @@
"""File Management Tools."""
from langchain.tools.file_management.copy import CopyFileTool
from langchain.tools.file_management.delete import DeleteFileTool
from langchain.tools.file_management.file_search import FileSearchTool
from langchain.tools.file_management.list_dir import ListDirectoryTool
from langchain.tools.file_management.move import MoveFileTool
from langchain.tools.file_management.read import ReadFileTool
from langchain.tools.file_management.toolkit import FileManagementToolkit
from langchain.tools.file_management.write import WriteFileTool
__all__ = [
"CopyFileTool",
"DeleteFileTool",
"FileSearchTool",
"MoveFileTool",
"ReadFileTool",
"WriteFileTool",
"ListDirectoryTool",
"FileManagementToolkit",
]

View File

@@ -0,0 +1,46 @@
import shutil
from typing import Type
from pydantic import BaseModel, Field
from langchain.tools.file_management.utils import (
INVALID_PATH_TEMPLATE,
BaseFileTool,
FileValidationError,
)
class FileCopyInput(BaseModel):
"""Input for CopyFileTool."""
source_path: str = Field(..., description="Path of the file to copy")
destination_path: str = Field(..., description="Path to save the copied file")
class CopyFileTool(BaseFileTool):
name: str = "copy_file"
args_schema: Type[BaseModel] = FileCopyInput
description: str = "Create a copy of a file in a specified location"
def _run(self, source_path: str, destination_path: str) -> str:
try:
source_path_ = self.get_relative_path(source_path)
except FileValidationError:
return INVALID_PATH_TEMPLATE.format(
arg_name="source_path", value=source_path
)
try:
destination_path_ = self.get_relative_path(destination_path)
except FileValidationError:
return INVALID_PATH_TEMPLATE.format(
arg_name="destination_path", value=destination_path
)
try:
shutil.copy2(source_path_, destination_path_, follow_symlinks=False)
return f"File copied successfully from {source_path} to {destination_path}."
except Exception as e:
return "Error: " + str(e)
async def _arun(self, source_path: str, destination_path: str) -> str:
# TODO: Add aiofiles method
raise NotImplementedError

View File

@@ -0,0 +1,39 @@
import os
from typing import Type
from pydantic import BaseModel, Field
from langchain.tools.file_management.utils import (
INVALID_PATH_TEMPLATE,
BaseFileTool,
FileValidationError,
)
class FileDeleteInput(BaseModel):
"""Input for DeleteFileTool."""
file_path: str = Field(..., description="Path of the file to delete")
class DeleteFileTool(BaseFileTool):
name: str = "file_delete"
args_schema: Type[BaseModel] = FileDeleteInput
description: str = "Delete a file"
def _run(self, file_path: str) -> str:
try:
file_path_ = self.get_relative_path(file_path)
except FileValidationError:
return INVALID_PATH_TEMPLATE.format(arg_name="file_path", value=file_path)
if not file_path_.exists():
return f"Error: no such file or directory: {file_path}"
try:
os.remove(file_path_)
return f"File deleted successfully: {file_path}."
except Exception as e:
return "Error: " + str(e)
async def _arun(self, file_path: str) -> str:
# TODO: Add aiofiles method
raise NotImplementedError

View File

@@ -0,0 +1,55 @@
import fnmatch
import os
from typing import Type
from pydantic import BaseModel, Field
from langchain.tools.file_management.utils import (
INVALID_PATH_TEMPLATE,
BaseFileTool,
FileValidationError,
)
class FileSearchInput(BaseModel):
"""Input for FileSearchTool."""
dir_path: str = Field(
default=".",
description="Subdirectory to search in.",
)
pattern: str = Field(
...,
description="Unix shell regex, where * matches everything.",
)
class FileSearchTool(BaseFileTool):
name: str = "file_search"
args_schema: Type[BaseModel] = FileSearchInput
description: str = (
"Recursively search for files in a subdirectory that match the regex pattern"
)
def _run(self, pattern: str, dir_path: str = ".") -> str:
try:
dir_path_ = self.get_relative_path(dir_path)
except FileValidationError:
return INVALID_PATH_TEMPLATE.format(arg_name="dir_path", value=dir_path)
matches = []
try:
for root, _, filenames in os.walk(dir_path_):
for filename in fnmatch.filter(filenames, pattern):
absolute_path = os.path.join(root, filename)
relative_path = os.path.relpath(absolute_path, dir_path_)
matches.append(relative_path)
if matches:
return "\n".join(matches)
else:
return f"No files found for pattern {pattern} in directory {dir_path}"
except Exception as e:
return "Error: " + str(e)
async def _arun(self, dir_path: str, pattern: str) -> str:
# TODO: Add aiofiles method
raise NotImplementedError

View File

@@ -0,0 +1,40 @@
import os
from typing import Type
from pydantic import BaseModel, Field
from langchain.tools.file_management.utils import (
INVALID_PATH_TEMPLATE,
BaseFileTool,
FileValidationError,
)
class DirectoryListingInput(BaseModel):
"""Input for ListDirectoryTool."""
dir_path: str = Field(default=".", description="Subdirectory to list.")
class ListDirectoryTool(BaseFileTool):
name: str = "list_directory"
args_schema: Type[BaseModel] = DirectoryListingInput
description: str = "List files and directories in a specified folder"
def _run(self, dir_path: str = ".") -> str:
try:
dir_path_ = self.get_relative_path(dir_path)
except FileValidationError:
return INVALID_PATH_TEMPLATE.format(arg_name="dir_path", value=dir_path)
try:
entries = os.listdir(dir_path_)
if entries:
return "\n".join(entries)
else:
return f"No files found in directory {dir_path}"
except Exception as e:
return "Error: " + str(e)
async def _arun(self, dir_path: str) -> str:
# TODO: Add aiofiles method
raise NotImplementedError

View File

@@ -0,0 +1,49 @@
import shutil
from typing import Type
from pydantic import BaseModel, Field
from langchain.tools.file_management.utils import (
INVALID_PATH_TEMPLATE,
BaseFileTool,
FileValidationError,
)
class FileMoveInput(BaseModel):
"""Input for MoveFileTool."""
source_path: str = Field(..., description="Path of the file to move")
destination_path: str = Field(..., description="New path for the moved file")
class MoveFileTool(BaseFileTool):
name: str = "move_file"
args_schema: Type[BaseModel] = FileMoveInput
description: str = "Move or rename a file from one location to another"
def _run(self, source_path: str, destination_path: str) -> str:
try:
source_path_ = self.get_relative_path(source_path)
except FileValidationError:
return INVALID_PATH_TEMPLATE.format(
arg_name="source_path", value=source_path
)
try:
destination_path_ = self.get_relative_path(destination_path)
except FileValidationError:
return INVALID_PATH_TEMPLATE.format(
arg_name="destination_path_", value=destination_path_
)
if not source_path_.exists():
return f"Error: no such file or directory {source_path}"
try:
# shutil.move expects str args in 3.8
shutil.move(str(source_path_), destination_path_)
return f"File moved successfully from {source_path} to {destination_path}."
except Exception as e:
return "Error: " + str(e)
async def _arun(self, source_path: str, destination_path: str) -> str:
# TODO: Add aiofiles method
raise NotImplementedError

View File

@@ -1,10 +1,12 @@
from pathlib import Path
from typing import Optional, Type
from typing import Type
from pydantic import BaseModel, Field
from langchain.tools.base import BaseTool
from langchain.tools.file_management.utils import get_validated_relative_path
from langchain.tools.file_management.utils import (
INVALID_PATH_TEMPLATE,
BaseFileTool,
FileValidationError,
)
class ReadFileInput(BaseModel):
@@ -13,21 +15,18 @@ class ReadFileInput(BaseModel):
file_path: str = Field(..., description="name of file")
class ReadFileTool(BaseTool):
class ReadFileTool(BaseFileTool):
name: str = "read_file"
args_schema: Type[BaseModel] = ReadFileInput
description: str = "Read file from disk"
root_dir: Optional[str] = None
"""Directory to read file from.
If specified, raises an error for file_paths oustide root_dir."""
def _run(self, file_path: str) -> str:
read_path = (
get_validated_relative_path(Path(self.root_dir), file_path)
if self.root_dir
else Path(file_path)
)
try:
read_path = self.get_relative_path(file_path)
except FileValidationError:
return INVALID_PATH_TEMPLATE.format(arg_name="file_path", value=file_path)
if not read_path.exists():
return f"Error: no such file or directory: {file_path}"
try:
with read_path.open("r", encoding="utf-8") as f:
content = f.read()
@@ -35,6 +34,6 @@ class ReadFileTool(BaseTool):
except Exception as e:
return "Error: " + str(e)
async def _arun(self, tool_input: str) -> str:
async def _arun(self, file_path: str) -> str:
# TODO: Add aiofiles method
raise NotImplementedError

View File

@@ -0,0 +1,58 @@
"""Toolkit for interacting with the local filesystem."""
from __future__ import annotations
from typing import List, Optional
from pydantic import root_validator
from langchain.agents.agent_toolkits.base import BaseToolkit
from langchain.tools import BaseTool
from langchain.tools.file_management.copy import CopyFileTool
from langchain.tools.file_management.delete import DeleteFileTool
from langchain.tools.file_management.file_search import FileSearchTool
from langchain.tools.file_management.list_dir import ListDirectoryTool
from langchain.tools.file_management.move import MoveFileTool
from langchain.tools.file_management.read import ReadFileTool
from langchain.tools.file_management.write import WriteFileTool
_FILE_TOOLS = {
tool_cls.__fields__["name"].default: tool_cls
for tool_cls in [
CopyFileTool,
DeleteFileTool,
FileSearchTool,
MoveFileTool,
ReadFileTool,
WriteFileTool,
ListDirectoryTool,
]
}
class FileManagementToolkit(BaseToolkit):
"""Toolkit for interacting with a Local Files."""
root_dir: Optional[str] = None
"""If specified, all file operations are made relative to root_dir."""
selected_tools: Optional[List[str]] = None
"""If provided, only provide the selected tools. Defaults to all."""
@root_validator
def validate_tools(cls, values: dict) -> dict:
selected_tools = values.get("selected_tools") or []
for tool_name in selected_tools:
if tool_name not in _FILE_TOOLS:
raise ValueError(
f"File Tool of name {tool_name} not supported."
f" Permitted tools: {list(_FILE_TOOLS)}"
)
return values
def get_tools(self) -> List[BaseTool]:
"""Get the tools in the toolkit."""
allowed_tools = self.selected_tools or _FILE_TOOLS.keys()
tools: List[BaseTool] = []
for tool in allowed_tools:
tool_cls = _FILE_TOOLS[tool]
tools.append(tool_cls(root_dir=self.root_dir))
return tools

View File

@@ -1,5 +1,10 @@
import sys
from pathlib import Path
from typing import Any, Optional
from pydantic import BaseModel
from langchain.tools.base import BaseTool
def is_relative_to(path: Path, root: Path) -> bool:
@@ -14,6 +19,35 @@ def is_relative_to(path: Path, root: Path) -> bool:
return False
INVALID_PATH_TEMPLATE = (
"Error: Access denied to {arg_name}: {value}."
" Permission granted exclusively to the current working directory"
)
class FileValidationError(ValueError):
"""Error for paths outside the root directory."""
class BaseFileTool(BaseTool, BaseModel):
"""Input for ReadFileTool."""
root_dir: Optional[str] = None
"""The final path will be chosen relative to root_dir if specified."""
def get_relative_path(self, file_path: str) -> Path:
"""Get the relative path, returning an error if unsupported."""
if self.root_dir is None:
return Path(file_path)
return get_validated_relative_path(Path(self.root_dir), file_path)
def _run(self, *args: Any, **kwargs: Any) -> str:
raise NotImplementedError
async def _arun(self, *args: Any, **kwargs: Any) -> str:
raise NotImplementedError
def get_validated_relative_path(root: Path, user_path: str) -> Path:
"""Resolve a relative path, raising an error if not within the root directory."""
# Note, this still permits symlinks from outside that point within the root.
@@ -22,5 +56,7 @@ def get_validated_relative_path(root: Path, user_path: str) -> Path:
full_path = (root / user_path).resolve()
if not is_relative_to(full_path, root):
raise ValueError(f"Path {user_path} is outside of the allowed directory {root}")
raise FileValidationError(
f"Path {user_path} is outside of the allowed directory {root}"
)
return full_path

View File

@@ -1,10 +1,12 @@
from pathlib import Path
from typing import Optional, Type
from typing import Type
from pydantic import BaseModel, Field
from langchain.tools.base import BaseTool
from langchain.tools.file_management.utils import get_validated_relative_path
from langchain.tools.file_management.utils import (
INVALID_PATH_TEMPLATE,
BaseFileTool,
FileValidationError,
)
class WriteFileInput(BaseModel):
@@ -12,31 +14,30 @@ class WriteFileInput(BaseModel):
file_path: str = Field(..., description="name of file")
text: str = Field(..., description="text to write to file")
append: bool = Field(
default=False, description="Whether to append to an existing file."
)
class WriteFileTool(BaseTool):
class WriteFileTool(BaseFileTool):
name: str = "write_file"
args_schema: Type[BaseModel] = WriteFileInput
description: str = "Write file to disk"
root_dir: Optional[str] = None
"""Directory to write file to.
If specified, raises an error for file_paths oustide root_dir."""
def _run(self, file_path: str, text: str) -> str:
write_path = (
get_validated_relative_path(Path(self.root_dir), file_path)
if self.root_dir
else Path(file_path)
)
def _run(self, file_path: str, text: str, append: bool = False) -> str:
try:
write_path = self.get_relative_path(file_path)
except FileValidationError:
return INVALID_PATH_TEMPLATE.format(arg_name="file_path", value=file_path)
try:
write_path.parent.mkdir(exist_ok=True, parents=False)
with write_path.open("w", encoding="utf-8") as f:
mode = "a" if append else "w"
with write_path.open(mode, encoding="utf-8") as f:
f.write(text)
return f"File written successfully to {file_path}."
except Exception as e:
return "Error: " + str(e)
async def _arun(self, file_path: str, text: str) -> str:
async def _arun(self, file_path: str, text: str, append: bool = False) -> str:
# TODO: Add aiofiles method
raise NotImplementedError

View File

@@ -0,0 +1,54 @@
"""Test the FileCopy tool."""
from pathlib import Path
from tempfile import TemporaryDirectory
from langchain.tools.file_management.copy import CopyFileTool
from langchain.tools.file_management.utils import (
INVALID_PATH_TEMPLATE,
)
def test_copy_file_with_root_dir() -> None:
"""Test the FileCopy tool when a root dir is specified."""
with TemporaryDirectory() as temp_dir:
tool = CopyFileTool(root_dir=temp_dir)
source_file = Path(temp_dir) / "source.txt"
destination_file = Path(temp_dir) / "destination.txt"
source_file.write_text("Hello, world!")
tool.run({"source_path": "source.txt", "destination_path": "destination.txt"})
assert source_file.exists()
assert destination_file.exists()
assert source_file.read_text() == "Hello, world!"
assert destination_file.read_text() == "Hello, world!"
def test_copy_file_errs_outside_root_dir() -> None:
"""Test the FileCopy tool when a root dir is specified."""
with TemporaryDirectory() as temp_dir:
tool = CopyFileTool(root_dir=temp_dir)
result = tool.run(
{
"source_path": "../source.txt",
"destination_path": "../destination.txt",
}
)
assert result == INVALID_PATH_TEMPLATE.format(
arg_name="source_path", value="../source.txt"
)
def test_copy_file() -> None:
"""Test the FileCopy tool."""
with TemporaryDirectory() as temp_dir:
tool = CopyFileTool()
source_file = Path(temp_dir) / "source.txt"
destination_file = Path(temp_dir) / "destination.txt"
source_file.write_text("Hello, world!")
tool.run(
{"source_path": str(source_file), "destination_path": str(destination_file)}
)
assert source_file.exists()
assert destination_file.exists()
assert source_file.read_text() == "Hello, world!"
assert destination_file.read_text() == "Hello, world!"

View File

@@ -0,0 +1,43 @@
"""Test the FileSearch tool."""
from pathlib import Path
from tempfile import TemporaryDirectory
from langchain.tools.file_management.file_search import FileSearchTool
from langchain.tools.file_management.utils import (
INVALID_PATH_TEMPLATE,
)
def test_file_search_with_root_dir() -> None:
"""Test the FileSearch tool when a root dir is specified."""
with TemporaryDirectory() as temp_dir:
tool = FileSearchTool(root_dir=temp_dir)
file_1 = Path(temp_dir) / "file1.txt"
file_2 = Path(temp_dir) / "file2.log"
file_1.write_text("File 1 content")
file_2.write_text("File 2 content")
matches = tool.run({"dir_path": ".", "pattern": "*.txt"}).split("\n")
assert len(matches) == 1
assert Path(matches[0]).name == "file1.txt"
def test_file_search_errs_outside_root_dir() -> None:
"""Test the FileSearch tool when a root dir is specified."""
with TemporaryDirectory() as temp_dir:
tool = FileSearchTool(root_dir=temp_dir)
result = tool.run({"dir_path": "..", "pattern": "*.txt"})
assert result == INVALID_PATH_TEMPLATE.format(arg_name="dir_path", value="..")
def test_file_search() -> None:
"""Test the FileSearch tool."""
with TemporaryDirectory() as temp_dir:
tool = FileSearchTool()
file_1 = Path(temp_dir) / "file1.txt"
file_2 = Path(temp_dir) / "file2.log"
file_1.write_text("File 1 content")
file_2.write_text("File 2 content")
matches = tool.run({"dir_path": temp_dir, "pattern": "*.txt"}).split("\n")
assert len(matches) == 1
assert Path(matches[0]).name == "file1.txt"

View File

@@ -0,0 +1,41 @@
"""Test the DirectoryListing tool."""
from pathlib import Path
from tempfile import TemporaryDirectory
from langchain.tools.file_management.list_dir import ListDirectoryTool
from langchain.tools.file_management.utils import (
INVALID_PATH_TEMPLATE,
)
def test_list_directory_with_root_dir() -> None:
"""Test the DirectoryListing tool when a root dir is specified."""
with TemporaryDirectory() as temp_dir:
tool = ListDirectoryTool(root_dir=temp_dir)
file_1 = Path(temp_dir) / "file1.txt"
file_2 = Path(temp_dir) / "file2.txt"
file_1.write_text("File 1 content")
file_2.write_text("File 2 content")
entries = tool.run({"dir_path": "."}).split("\n")
assert set(entries) == {"file1.txt", "file2.txt"}
def test_list_directory_errs_outside_root_dir() -> None:
"""Test the DirectoryListing tool when a root dir is specified."""
with TemporaryDirectory() as temp_dir:
tool = ListDirectoryTool(root_dir=temp_dir)
result = tool.run({"dir_path": ".."})
assert result == INVALID_PATH_TEMPLATE.format(arg_name="dir_path", value="..")
def test_list_directory() -> None:
"""Test the DirectoryListing tool."""
with TemporaryDirectory() as temp_dir:
tool = ListDirectoryTool()
file_1 = Path(temp_dir) / "file1.txt"
file_2 = Path(temp_dir) / "file2.txt"
file_1.write_text("File 1 content")
file_2.write_text("File 2 content")
entries = tool.run({"dir_path": temp_dir}).split("\n")
assert set(entries) == {"file1.txt", "file2.txt"}

View File

@@ -0,0 +1,52 @@
"""Test the FileMove tool."""
from pathlib import Path
from tempfile import TemporaryDirectory
from langchain.tools.file_management.move import MoveFileTool
from langchain.tools.file_management.utils import (
INVALID_PATH_TEMPLATE,
)
def test_move_file_with_root_dir() -> None:
"""Test the FileMove tool when a root dir is specified."""
with TemporaryDirectory() as temp_dir:
tool = MoveFileTool(root_dir=temp_dir)
source_file = Path(temp_dir) / "source.txt"
destination_file = Path(temp_dir) / "destination.txt"
source_file.write_text("Hello, world!")
tool.run({"source_path": "source.txt", "destination_path": "destination.txt"})
assert not source_file.exists()
assert destination_file.exists()
assert destination_file.read_text() == "Hello, world!"
def test_move_file_errs_outside_root_dir() -> None:
"""Test the FileMove tool when a root dir is specified."""
with TemporaryDirectory() as temp_dir:
tool = MoveFileTool(root_dir=temp_dir)
result = tool.run(
{
"source_path": "../source.txt",
"destination_path": "../destination.txt",
}
)
assert result == INVALID_PATH_TEMPLATE.format(
arg_name="source_path", value="../source.txt"
)
def test_move_file() -> None:
"""Test the FileMove tool."""
with TemporaryDirectory() as temp_dir:
tool = MoveFileTool()
source_file = Path(temp_dir) / "source.txt"
destination_file = Path(temp_dir) / "destination.txt"
source_file.write_text("Hello, world!")
tool.run(
{"source_path": str(source_file), "destination_path": str(destination_file)}
)
assert not source_file.exists()
assert destination_file.exists()
assert destination_file.read_text() == "Hello, world!"

View File

@@ -0,0 +1,46 @@
"""Test the FileManagementToolkit."""
from tempfile import TemporaryDirectory
import pytest
from langchain.tools.base import BaseTool
from langchain.tools.file_management.toolkit import FileManagementToolkit
def test_file_toolkit_get_tools() -> None:
"""Test the get_tools method of FileManagementToolkit."""
with TemporaryDirectory() as temp_dir:
toolkit = FileManagementToolkit(root_dir=temp_dir)
tools = toolkit.get_tools()
assert len(tools) > 0
assert all(isinstance(tool, BaseTool) for tool in tools)
def test_file_toolkit_get_tools_with_selection() -> None:
"""Test the get_tools method of FileManagementToolkit with selected_tools."""
with TemporaryDirectory() as temp_dir:
toolkit = FileManagementToolkit(
root_dir=temp_dir, selected_tools=["read_file", "write_file"]
)
tools = toolkit.get_tools()
assert len(tools) == 2
tool_names = [tool.name for tool in tools]
assert "read_file" in tool_names
assert "write_file" in tool_names
def test_file_toolkit_invalid_tool() -> None:
"""Test the FileManagementToolkit with an invalid tool."""
with TemporaryDirectory() as temp_dir:
with pytest.raises(ValueError):
FileManagementToolkit(root_dir=temp_dir, selected_tools=["invalid_tool"])
def test_file_toolkit_root_dir() -> None:
"""Test the FileManagementToolkit root_dir handling."""
with TemporaryDirectory() as temp_dir:
toolkit = FileManagementToolkit(root_dir=temp_dir)
tools = toolkit.get_tools()
root_dirs = [tool.root_dir for tool in tools if hasattr(tool, "root_dir")]
assert all(root_dir == temp_dir for root_dir in root_dirs)

View File

@@ -6,7 +6,10 @@ from tempfile import TemporaryDirectory
import pytest
from langchain.tools.file_management.utils import get_validated_relative_path
from langchain.tools.file_management.utils import (
FileValidationError,
get_validated_relative_path,
)
def test_get_validated_relative_path_errs_on_absolute() -> None:
@@ -14,7 +17,7 @@ def test_get_validated_relative_path_errs_on_absolute() -> None:
root = Path(__file__).parent
user_path = "/bin/bash"
matches = f"Path {user_path} is outside of the allowed directory {root}"
with pytest.raises(ValueError, match=matches):
with pytest.raises(FileValidationError, match=matches):
get_validated_relative_path(root, user_path)
@@ -23,7 +26,7 @@ def test_get_validated_relative_path_errs_on_parent_dir() -> None:
root = Path(__file__).parent
user_path = "data/sub/../../../sibling"
matches = f"Path {user_path} is outside of the allowed directory {root}"
with pytest.raises(ValueError, match=matches):
with pytest.raises(FileValidationError, match=matches):
get_validated_relative_path(root, user_path)
@@ -49,7 +52,7 @@ def test_get_validated_relative_path_errs_for_symlink_outside_root() -> None:
matches = (
f"Path {user_path} is outside of the allowed directory {root.resolve()}"
)
with pytest.raises(ValueError, match=matches):
with pytest.raises(FileValidationError, match=matches):
get_validated_relative_path(root, user_path)
symlink_path.unlink()

View File

@@ -3,8 +3,9 @@
from pathlib import Path
from tempfile import TemporaryDirectory
import pytest
from langchain.tools.file_management.utils import (
INVALID_PATH_TEMPLATE,
)
from langchain.tools.file_management.write import WriteFileTool
@@ -21,8 +22,10 @@ def test_write_file_errs_outside_root_dir() -> None:
"""Test the WriteFile tool when a root dir is specified."""
with TemporaryDirectory() as temp_dir:
tool = WriteFileTool(root_dir=temp_dir)
with pytest.raises(ValueError):
tool.run({"file_path": "../file.txt", "text": "Hello, world!"})
result = tool.run({"file_path": "../file.txt", "text": "Hello, world!"})
assert result == INVALID_PATH_TEMPLATE.format(
arg_name="file_path", value="../file.txt"
)
def test_write_file() -> None: