stash

2026-03-18 11:07:36 +00:00 · 2023-04-18 19:09:26 -07:00
parent a5c996189c
commit 8aa784bfbd
1 changed files with 657 additions and 0 deletions
--- a/docs/use_cases/autonomous_agents/marathon_times.ipynb
+++ b/docs/use_cases/autonomous_agents/marathon_times.ipynb
@@ -0,0 +1,657 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "14f8b67b",
+   "metadata": {},
+   "source": [
+    "## AutoGPT\n",
+    "\n",
+    "* Implementation of https://github.com/Significant-Gravitas/Auto-GPT \n",
+    "* With LangChain primitives (LLMs, PromptTemplates, VectorStores, Embeddings, Tools)\n",
+    "\n",
+    "### Workflow\n",
+    "\n",
+    "[Temporary] Ensure you have correct Langchain version locally\n",
+    "  \n",
+    "*  `export SERPAPI_API_KEY`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a0866af1-43a8-4160-a9cf-9ffe64074493",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip install playwright\n",
+    "# !playwright install  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d27e21c5-8556-46bf-936a-f6e119ebd3a0",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# ! playwright install  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1cff42fd",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# General \n",
+    "from datetime import datetime\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "from typing import Optional\n",
+    "import matplotlib.pyplot as plt\n",
+    "from langchain.experimental.autonomous_agents.autogpt.agent import AutoGPT\n",
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain.utilities import SerpAPIWrapper\n",
+    "from langchain.tools.human.tool import HumanInputRun\n",
+    "from langchain.tools.file_management.read import ReadFileTool\n",
+    "from langchain.tools.file_management.write import WriteFileTool\n",
+    "from langchain.tools.wikipedia.tool import WikipediaQueryRun\n",
+    "from langchain.utilities import WikipediaAPIWrapper\n",
+    "\n",
+    "from langchain.document_loaders.url_selenium import SeleniumURLLoader\n",
+    "from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "338dfc5d-178a-49ba-9187-bd08b648ab29",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.document_loaders.url_playwright import PlaywrightURLLoader, Document"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "192496a7",
+   "metadata": {},
+   "source": [
+    "### Set up tools\n",
+    "\n",
+    "* We'll set up an AutoGPT with a `search` tool, and `write-file` tool, and a `read-file` tool"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "708a426f",
+   "metadata": {},
+   "source": [
+    "Define any `tools` you want to use here"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cef4c150-0ef1-4a33-836b-01062fec134e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Tools\n",
+    "from typing import Optional\n",
+    "from langchain.agents import Tool, tool\n",
+    "from langchain.utilities import SerpAPIWrapper\n",
+    "from langchain.chains.llm_requests import LLMRequestsChain\n",
+    "from langchain.tools.file_management.read import ReadFileTool\n",
+    "from langchain.tools.file_management.write import WriteFileTool\n",
+    "from langchain.tools.requests.tool import RequestsGetTool, TextRequestsWrapper\n",
+    "\n",
+    "@tool\n",
+    "def process_csv(csv_file_path: str, instructions: str, output_path: Optional[str] = None) -> str:\n",
+    "    \"\"\"Process a CSV by with pandas in a limited REPL. Only use this after writing data to disk as a csv file. Any figures must be saved to disk to be viewed by the human. Instructions should be written in natural language, not code. Assume the dataframe is already loaded.\"\"\"\n",
+    "    try:\n",
+    "        df = pd.read_csv(csv_file_path)\n",
+    "    except Exception as e:\n",
+    "        return f\"Error: {e}\"\n",
+    "    agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=True)\n",
+    "    if output_path is not None:\n",
+    "        instructions += f\" Save output to disk at {output_path}\"\n",
+    "    try:\n",
+    "        return agent.run(instructions)\n",
+    "    except Exception as e:\n",
+    "        return f\"Error: {e}\"\n",
+    "    \n",
+    "@tool\n",
+    "def show_image(image_path: str) -> str:\n",
+    "    \"\"\"Show an image from disk\"\"\"\n",
+    "    try:\n",
+    "        img = plt.imread(image_path)\n",
+    "    except Exception as e:\n",
+    "        return f\"Error: {e}\"\n",
+    "    plt.imshow(img)\n",
+    "    return f\"Showed image at {image_path}\"\n",
+    "\n",
+    "@tool\n",
+    "def current_time() -> str:\n",
+    "    \"\"\"Show the current time\"\"\"\n",
+    "    return f\"Current time: {datetime.datetime.now()}\" \n",
+    "\n",
+    "@tool\n",
+    "def scrape_links(url: str) -> str:\n",
+    "    \"\"\"Scrape links from a webpage.\"\"\"\n",
+    "    response, error_message = get_response(url)\n",
+    "    if error_message:\n",
+    "        return error_message\n",
+    "    if not response:\n",
+    "        return \"Error: Could not get response\"\n",
+    "    soup = BeautifulSoup(response.text, \"html.parser\")\n",
+    "\n",
+    "    for script in soup([\"script\", \"style\"]):\n",
+    "        script.extract()\n",
+    "\n",
+    "    hyperlinks = extract_hyperlinks(soup, url)\n",
+    "\n",
+    "    return format_hyperlinks(hyperlinks)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "51c07298-00e0-42d6-8aff-bd2e6bbd35a3",
+   "metadata": {},
+   "source": [
+    "# Free DDG Search"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4afdedb2-f295-4ab8-9397-3640f5eeeed3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# !pip install duckduckgo_search"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "45f143de-e49e-4e27-88eb-ee44a4fdf933",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "from duckduckgo_search import ddg"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e2e799f4-86fb-4190-a298-4ae5c7b7a540",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "@tool\n",
+    "def search(query: str, num_results: int = 8) -> str:\n",
+    "    \"\"\"Useful for general internet search queries.\"\"\"\n",
+    "    search_results = []\n",
+    "    if not query:\n",
+    "        return json.dumps(search_results)\n",
+    "\n",
+    "    results = ddg(query, max_results=num_results)\n",
+    "    if not results:\n",
+    "        return json.dumps(search_results)\n",
+    "\n",
+    "    for j in results:\n",
+    "        search_results.append(j)\n",
+    "\n",
+    "    return json.dumps(search_results, ensure_ascii=False, indent=4)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fbd24d78-93a1-4645-bb3a-cac6fb2fc1a2",
+   "metadata": {},
+   "source": [
+    "### Define Web Browser Tool\n",
+    "\n",
+    "This is optional but can provide more information to the model relative to the other tools"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e3dcd773-a462-404e-b3dd-822a5f773e0d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# !pip install playwright\n",
+    "# !playwright install  >/dev/null\n",
+    "# !pip install bs4\n",
+    "# !pip install nest_asyncio"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "70856c54-aa54-42ca-bf4e-53950bfa6e5d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.docstore.document import Document\n",
+    "from langchain.chains.summarize import load_summarize_chain\n",
+    "from langchain.chains import RetrievalQA\n",
+    "\n",
+    "# chain = load_summarize_chain(llm)\n",
+    "\n",
+    "# @tool\n",
+    "# def summarize_text(text: str) -> str:\n",
+    "#     \"\"\"Summarize the provided text. You have a limited memory, so summarization is useful.\"\"\"\n",
+    "#     docs = [Document(page_content=text)]\n",
+    "#     return chain.run(docs).strip()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ed22eb3f-f374-4dc5-bc44-57f7468bff51",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+    "from langchain.document_loaders import WebBaseLoader\n",
+    "\n",
+    "text_splitter = RecursiveCharacterTextSplitter(\n",
+    "    # Set a really small chunk size, just to show.\n",
+    "    chunk_size = 500,\n",
+    "    chunk_overlap  = 20,\n",
+    "    length_function = len,\n",
+    ")\n",
+    "\n",
+    "loader = WebBaseLoader(\"https://beta.ruff.rs/docs/faq/\")\n",
+    "docs = loader.load()\n",
+    "ruff_texts = text_splitter.split_documents(docs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "26b497d7-8e52-4c7f-8e7e-da0a48820a3c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "async def async_load_playwright(url: str) -> str:\n",
+    "    \"\"\"Load the specified URLs using Playwright and parse using BeautifulSoup.\"\"\"\n",
+    "    from bs4 import BeautifulSoup\n",
+    "    from playwright.async_api import async_playwright\n",
+    "\n",
+    "    results = \"\"\n",
+    "    async with async_playwright() as p:\n",
+    "        browser = await p.chromium.launch(headless=True)\n",
+    "        try:\n",
+    "            page = await browser.new_page()\n",
+    "            await page.goto(url)\n",
+    "\n",
+    "            page_source = await page.content()\n",
+    "            soup = BeautifulSoup(page_source, \"html.parser\")\n",
+    "\n",
+    "            for script in soup([\"script\", \"style\"]):\n",
+    "                script.extract()\n",
+    "\n",
+    "            text = soup.get_text()\n",
+    "            lines = (line.strip() for line in text.splitlines())\n",
+    "            chunks = (phrase.strip() for line in lines for phrase in line.split(\"  \"))\n",
+    "            results = \"\\n\".join(chunk for chunk in chunks if chunk)\n",
+    "        except Exception as e:\n",
+    "            results = f\"Error: {e}\"\n",
+    "        await browser.close()\n",
+    "    return results\n",
+    "\n",
+    "def run_async(coro):\n",
+    "    import asyncio\n",
+    "    import nest_asyncio\n",
+    "    nest_asyncio.apply()\n",
+    "    event_loop = asyncio.get_event_loop()\n",
+    "    return event_loop.run_until_complete(coro)\n",
+    "\n",
+    "@tool\n",
+    "def browse_web_page(url: str) -> str:\n",
+    "    \"\"\"Useful for browsing websites and scraping the text information.\"\"\"\n",
+    "    return run_async(async_load_playwright(url))\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "75c3e2e3-b257-4237-82b1-7a804dd54e6a",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "de963739-8d7d-4661-a7ed-71a6fe7e2596",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# !pip install wikipedia >/dev/null"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "db32f7b5-b315-4d47-8b0d-9103e673c1eb",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8e39ee28",
+   "metadata": {},
+   "source": [
+    "### Set up memory\n",
+    "\n",
+    "* The memory here is used for the agents intermediate steps"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1df7b724",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Memory\n",
+    "import faiss\n",
+    "from langchain.vectorstores import FAISS\n",
+    "from langchain.docstore import InMemoryDocstore\n",
+    "from langchain.embeddings import OpenAIEmbeddings\n",
+    "\n",
+    "embeddings_model = OpenAIEmbeddings()\n",
+    "embedding_size = 1536\n",
+    "index = faiss.IndexFlatL2(embedding_size)\n",
+    "vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e40fd657",
+   "metadata": {},
+   "source": [
+    "### Setup model and AutoGPT\n",
+    "\n",
+    "`Model set-up`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e86326b3-af8c-4165-a5a6-38f64c79f921",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from typing import Any\n",
+    "from langchain.schema import BaseRetriever, BaseLanguageModel\n",
+    "from functools import partial\n",
+    "from langchain.tools import BaseTool\n",
+    "from langchain.chains.retrieval_qa.base import BaseRetrievalQA\n",
+    "from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain\n",
+    "\n",
+    "# def query_memory(query: str, retriever: Any, llm: BaseLanguageModel) -> str:\n",
+    "#     \"\"\"Query your memories.\"\"\"\n",
+    "#     # qa_chain = \n",
+    "#     return qa_chain.run(\n",
+    "#         query\n",
+    "#     )\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9413033e-9e3b-4a50-89a9-b56be8c7af45",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "retrieval_qa_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(search_kwargs={\"k\": 1}))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3a6ebccd-45e7-485a-8be9-4fef5a82d437",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "class QueryMemoryTool(BaseTool):\n",
+    "    \"\"\"Query your memories.\"\"\"\n",
+    "    name = \"query memory\"\n",
+    "    description = \"Query your memories of past actions and thoughts.\"\n",
+    "    retrieval_qa_chain: ConversationalRetrievalChain\n",
+    "    \n",
+    "    def _run(self, query: str) -> str:\n",
+    "        return self.retrieval_qa_chain({\"question\": query, \"chat_history\": []})\n",
+    "    \n",
+    "    async def _arun(self, query: str) -> str:\n",
+    "        raise NotImplementedError\n",
+    "\n",
+    "\n",
+    "query_memory_tool = QueryMemoryTool(\n",
+    "    name = \"query memory\",\n",
+    "    description=\"Query your memories of past actions and thoughts.\",\n",
+    "    retrieval_qa_chain=retrieval_qa_chain,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4d6c4ea1-a592-408c-ad39-33114927e416",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "query_memory_tool(\"What do you remember?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "88c8b184-67d7-4c35-84ae-9b14bef8c4e3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "tools = [\n",
+    "    # Tool(\n",
+    "    #     name = \"google search\",\n",
+    "    #     func=search.run,\n",
+    "    #     description=\"Simple search method useful for answering simple questions about current events. You should ask targeted questions\"\n",
+    "    # ),\n",
+    "    search,\n",
+    "    WriteFileTool(),\n",
+    "    ReadFileTool(),\n",
+    "    process_csv,\n",
+    "    show_image,\n",
+    "    RequestsGetTool(requests_wrapper=TextRequestsWrapper()),\n",
+    "    current_time,\n",
+    "    browse_web_page,\n",
+    "    wikipedia_tool,\n",
+    "    query_memory_tool\n",
+    "    # huggingface_image_generation,\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "709c08c2",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "llm = ChatOpenAI(model_name=\"gpt-4\", temperature=1.0)\n",
+    "agent = AutoGPT.from_llm_and_tools(\n",
+    "    ai_name=\"Tom\",\n",
+    "    ai_role=\"Assistant\",\n",
+    "    tools=tools,\n",
+    "    llm=llm,\n",
+    "    memory=vectorstore.as_retriever(search_kwargs={\"k\": 8}),\n",
+    "    # human_in_the_loop=True, # Set to True if you want to add feedback at each step.\n",
+    ")\n",
+    "# agent.chain.verbose = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fc9b51ba",
+   "metadata": {},
+   "source": [
+    "### AutoGPT as a research / data munger \n",
+    "\n",
+    "#### `inflation` and `college tuition`\n",
+    " \n",
+    "Let's use AutoGPT as researcher and data munger / cleaner.\n",
+    "  \n",
+    "I spent a lot of time over the years crawling data sources and cleaning data. \n",
+    "\n",
+    "Let's see if AutoGPT can do all of this for us!\n",
+    "\n",
+    "Here is the prompt comparing `inflation` and `college tuition`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "64455d70-a134-4d11-826a-33e34c2ce287",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "agent.run([\"Using the 'data/' folder as scratch, find a marathon in Northern California in July come up with a training program for me as a table.\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3aba5e30",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# agent.run([\"Using the 'data/' folder as scratch, get me the yearly % change in US college tuition and the yearly % change in US inflation (US CPI) every year since 1980.\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f78012cf",
+   "metadata": {},
+   "source": [
+    "The command runs and writes output to `data`.\n",
+    "   \n",
+    "`cleaned_college_tuition_inflation_percent_change.csv` is written.\n",
+    "\n",
+    "We write some simple code to plot this."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c33c7a24",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Read\n",
+    "d = pd.read_csv(\"data/cleaned_college_tuition_inflation_percent_change.csv\")\n",
+    "d.set_index(\"Year\",inplace=True)\n",
+    "# Compute cumulative percent change\n",
+    "d['College Tuition % Change Cumulative'] = (1 + d['College Tuition % Change']).cumprod() * 100\n",
+    "d['Inflation % Change Cumulative'] = (1 + d['Inflation % Change']).cumprod() * 100\n",
+    "# Plot\n",
+    "d[['College Tuition % Change Cumulative','Inflation % Change Cumulative']].plot(color=['blue','green'])\n",
+    "plt.ylabel(\"Cumulative Percent Change\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f31d09ea",
+   "metadata": {},
+   "source": [
+    "Of course, we would want to inspect and verify the results."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0e4cc1a1-8d72-4b3b-bba5-1ec5420d3eab",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}