From 8aa784bfbd33c2ea2af07f588d65ad15793bba06 Mon Sep 17 00:00:00 2001 From: vowelparrot <130414180+vowelparrot@users.noreply.github.com> Date: Tue, 18 Apr 2023 19:09:26 -0700 Subject: [PATCH] stash --- .../autonomous_agents/marathon_times.ipynb | 657 ++++++++++++++++++ 1 file changed, 657 insertions(+) create mode 100644 docs/use_cases/autonomous_agents/marathon_times.ipynb diff --git a/docs/use_cases/autonomous_agents/marathon_times.ipynb b/docs/use_cases/autonomous_agents/marathon_times.ipynb new file mode 100644 index 00000000000..1d9ca34758d --- /dev/null +++ b/docs/use_cases/autonomous_agents/marathon_times.ipynb @@ -0,0 +1,657 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "14f8b67b", + "metadata": {}, + "source": [ + "## AutoGPT\n", + "\n", + "* Implementation of https://github.com/Significant-Gravitas/Auto-GPT \n", + "* With LangChain primitives (LLMs, PromptTemplates, VectorStores, Embeddings, Tools)\n", + "\n", + "### Workflow\n", + "\n", + "[Temporary] Ensure you have correct Langchain version locally\n", + " \n", + "* `export SERPAPI_API_KEY`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0866af1-43a8-4160-a9cf-9ffe64074493", + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install playwright\n", + "# !playwright install " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d27e21c5-8556-46bf-936a-f6e119ebd3a0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# ! playwright install " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1cff42fd", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# General \n", + "from datetime import datetime\n", + "import os\n", + "import pandas as pd\n", + "from typing import Optional\n", + "import matplotlib.pyplot as plt\n", + "from langchain.experimental.autonomous_agents.autogpt.agent import AutoGPT\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.utilities import SerpAPIWrapper\n", + "from langchain.tools.human.tool import HumanInputRun\n", + "from langchain.tools.file_management.read import ReadFileTool\n", + "from langchain.tools.file_management.write import WriteFileTool\n", + "from langchain.tools.wikipedia.tool import WikipediaQueryRun\n", + "from langchain.utilities import WikipediaAPIWrapper\n", + "\n", + "from langchain.document_loaders.url_selenium import SeleniumURLLoader\n", + "from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "338dfc5d-178a-49ba-9187-bd08b648ab29", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.document_loaders.url_playwright import PlaywrightURLLoader, Document" + ] + }, + { + "cell_type": "markdown", + "id": "192496a7", + "metadata": {}, + "source": [ + "### Set up tools\n", + "\n", + "* We'll set up an AutoGPT with a `search` tool, and `write-file` tool, and a `read-file` tool" + ] + }, + { + "cell_type": "markdown", + "id": "708a426f", + "metadata": {}, + "source": [ + "Define any `tools` you want to use here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cef4c150-0ef1-4a33-836b-01062fec134e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Tools\n", + "from typing import Optional\n", + "from langchain.agents import Tool, tool\n", + "from langchain.utilities import SerpAPIWrapper\n", + "from langchain.chains.llm_requests import LLMRequestsChain\n", + "from langchain.tools.file_management.read import ReadFileTool\n", + "from langchain.tools.file_management.write import WriteFileTool\n", + "from langchain.tools.requests.tool import RequestsGetTool, TextRequestsWrapper\n", + "\n", + "@tool\n", + "def process_csv(csv_file_path: str, instructions: str, output_path: Optional[str] = None) -> str:\n", + " \"\"\"Process a CSV by with pandas in a limited REPL. Only use this after writing data to disk as a csv file. Any figures must be saved to disk to be viewed by the human. Instructions should be written in natural language, not code. Assume the dataframe is already loaded.\"\"\"\n", + " try:\n", + " df = pd.read_csv(csv_file_path)\n", + " except Exception as e:\n", + " return f\"Error: {e}\"\n", + " agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=True)\n", + " if output_path is not None:\n", + " instructions += f\" Save output to disk at {output_path}\"\n", + " try:\n", + " return agent.run(instructions)\n", + " except Exception as e:\n", + " return f\"Error: {e}\"\n", + " \n", + "@tool\n", + "def show_image(image_path: str) -> str:\n", + " \"\"\"Show an image from disk\"\"\"\n", + " try:\n", + " img = plt.imread(image_path)\n", + " except Exception as e:\n", + " return f\"Error: {e}\"\n", + " plt.imshow(img)\n", + " return f\"Showed image at {image_path}\"\n", + "\n", + "@tool\n", + "def current_time() -> str:\n", + " \"\"\"Show the current time\"\"\"\n", + " return f\"Current time: {datetime.datetime.now()}\" \n", + "\n", + "@tool\n", + "def scrape_links(url: str) -> str:\n", + " \"\"\"Scrape links from a webpage.\"\"\"\n", + " response, error_message = get_response(url)\n", + " if error_message:\n", + " return error_message\n", + " if not response:\n", + " return \"Error: Could not get response\"\n", + " soup = BeautifulSoup(response.text, \"html.parser\")\n", + "\n", + " for script in soup([\"script\", \"style\"]):\n", + " script.extract()\n", + "\n", + " hyperlinks = extract_hyperlinks(soup, url)\n", + "\n", + " return format_hyperlinks(hyperlinks)\n" + ] + }, + { + "cell_type": "markdown", + "id": "51c07298-00e0-42d6-8aff-bd2e6bbd35a3", + "metadata": {}, + "source": [ + "# Free DDG Search" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4afdedb2-f295-4ab8-9397-3640f5eeeed3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# !pip install duckduckgo_search" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45f143de-e49e-4e27-88eb-ee44a4fdf933", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import json\n", + "from duckduckgo_search import ddg" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2e799f4-86fb-4190-a298-4ae5c7b7a540", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "@tool\n", + "def search(query: str, num_results: int = 8) -> str:\n", + " \"\"\"Useful for general internet search queries.\"\"\"\n", + " search_results = []\n", + " if not query:\n", + " return json.dumps(search_results)\n", + "\n", + " results = ddg(query, max_results=num_results)\n", + " if not results:\n", + " return json.dumps(search_results)\n", + "\n", + " for j in results:\n", + " search_results.append(j)\n", + "\n", + " return json.dumps(search_results, ensure_ascii=False, indent=4)" + ] + }, + { + "cell_type": "markdown", + "id": "fbd24d78-93a1-4645-bb3a-cac6fb2fc1a2", + "metadata": {}, + "source": [ + "### Define Web Browser Tool\n", + "\n", + "This is optional but can provide more information to the model relative to the other tools" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3dcd773-a462-404e-b3dd-822a5f773e0d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# !pip install playwright\n", + "# !playwright install >/dev/null\n", + "# !pip install bs4\n", + "# !pip install nest_asyncio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70856c54-aa54-42ca-bf4e-53950bfa6e5d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.docstore.document import Document\n", + "from langchain.chains.summarize import load_summarize_chain\n", + "from langchain.chains import RetrievalQA\n", + "\n", + "# chain = load_summarize_chain(llm)\n", + "\n", + "# @tool\n", + "# def summarize_text(text: str) -> str:\n", + "# \"\"\"Summarize the provided text. You have a limited memory, so summarization is useful.\"\"\"\n", + "# docs = [Document(page_content=text)]\n", + "# return chain.run(docs).strip()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed22eb3f-f374-4dc5-bc44-57f7468bff51", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain.document_loaders import WebBaseLoader\n", + "\n", + "text_splitter = RecursiveCharacterTextSplitter(\n", + " # Set a really small chunk size, just to show.\n", + " chunk_size = 500,\n", + " chunk_overlap = 20,\n", + " length_function = len,\n", + ")\n", + "\n", + "loader = WebBaseLoader(\"https://beta.ruff.rs/docs/faq/\")\n", + "docs = loader.load()\n", + "ruff_texts = text_splitter.split_documents(docs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26b497d7-8e52-4c7f-8e7e-da0a48820a3c", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "async def async_load_playwright(url: str) -> str:\n", + " \"\"\"Load the specified URLs using Playwright and parse using BeautifulSoup.\"\"\"\n", + " from bs4 import BeautifulSoup\n", + " from playwright.async_api import async_playwright\n", + "\n", + " results = \"\"\n", + " async with async_playwright() as p:\n", + " browser = await p.chromium.launch(headless=True)\n", + " try:\n", + " page = await browser.new_page()\n", + " await page.goto(url)\n", + "\n", + " page_source = await page.content()\n", + " soup = BeautifulSoup(page_source, \"html.parser\")\n", + "\n", + " for script in soup([\"script\", \"style\"]):\n", + " script.extract()\n", + "\n", + " text = soup.get_text()\n", + " lines = (line.strip() for line in text.splitlines())\n", + " chunks = (phrase.strip() for line in lines for phrase in line.split(\" \"))\n", + " results = \"\\n\".join(chunk for chunk in chunks if chunk)\n", + " except Exception as e:\n", + " results = f\"Error: {e}\"\n", + " await browser.close()\n", + " return results\n", + "\n", + "def run_async(coro):\n", + " import asyncio\n", + " import nest_asyncio\n", + " nest_asyncio.apply()\n", + " event_loop = asyncio.get_event_loop()\n", + " return event_loop.run_until_complete(coro)\n", + "\n", + "@tool\n", + "def browse_web_page(url: str) -> str:\n", + " \"\"\"Useful for browsing websites and scraping the text information.\"\"\"\n", + " return run_async(async_load_playwright(url))\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75c3e2e3-b257-4237-82b1-7a804dd54e6a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de963739-8d7d-4661-a7ed-71a6fe7e2596", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# !pip install wikipedia >/dev/null" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db32f7b5-b315-4d47-8b0d-9103e673c1eb", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())" + ] + }, + { + "cell_type": "markdown", + "id": "8e39ee28", + "metadata": {}, + "source": [ + "### Set up memory\n", + "\n", + "* The memory here is used for the agents intermediate steps" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1df7b724", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Memory\n", + "import faiss\n", + "from langchain.vectorstores import FAISS\n", + "from langchain.docstore import InMemoryDocstore\n", + "from langchain.embeddings import OpenAIEmbeddings\n", + "\n", + "embeddings_model = OpenAIEmbeddings()\n", + "embedding_size = 1536\n", + "index = faiss.IndexFlatL2(embedding_size)\n", + "vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})" + ] + }, + { + "cell_type": "markdown", + "id": "e40fd657", + "metadata": {}, + "source": [ + "### Setup model and AutoGPT\n", + "\n", + "`Model set-up`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e86326b3-af8c-4165-a5a6-38f64c79f921", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from typing import Any\n", + "from langchain.schema import BaseRetriever, BaseLanguageModel\n", + "from functools import partial\n", + "from langchain.tools import BaseTool\n", + "from langchain.chains.retrieval_qa.base import BaseRetrievalQA\n", + "from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain\n", + "\n", + "# def query_memory(query: str, retriever: Any, llm: BaseLanguageModel) -> str:\n", + "# \"\"\"Query your memories.\"\"\"\n", + "# # qa_chain = \n", + "# return qa_chain.run(\n", + "# query\n", + "# )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9413033e-9e3b-4a50-89a9-b56be8c7af45", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "retrieval_qa_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(search_kwargs={\"k\": 1}))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a6ebccd-45e7-485a-8be9-4fef5a82d437", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "class QueryMemoryTool(BaseTool):\n", + " \"\"\"Query your memories.\"\"\"\n", + " name = \"query memory\"\n", + " description = \"Query your memories of past actions and thoughts.\"\n", + " retrieval_qa_chain: ConversationalRetrievalChain\n", + " \n", + " def _run(self, query: str) -> str:\n", + " return self.retrieval_qa_chain({\"question\": query, \"chat_history\": []})\n", + " \n", + " async def _arun(self, query: str) -> str:\n", + " raise NotImplementedError\n", + "\n", + "\n", + "query_memory_tool = QueryMemoryTool(\n", + " name = \"query memory\",\n", + " description=\"Query your memories of past actions and thoughts.\",\n", + " retrieval_qa_chain=retrieval_qa_chain,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4d6c4ea1-a592-408c-ad39-33114927e416", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "query_memory_tool(\"What do you remember?\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88c8b184-67d7-4c35-84ae-9b14bef8c4e3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "tools = [\n", + " # Tool(\n", + " # name = \"google search\",\n", + " # func=search.run,\n", + " # description=\"Simple search method useful for answering simple questions about current events. You should ask targeted questions\"\n", + " # ),\n", + " search,\n", + " WriteFileTool(),\n", + " ReadFileTool(),\n", + " process_csv,\n", + " show_image,\n", + " RequestsGetTool(requests_wrapper=TextRequestsWrapper()),\n", + " current_time,\n", + " browse_web_page,\n", + " wikipedia_tool,\n", + " query_memory_tool\n", + " # huggingface_image_generation,\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "709c08c2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "llm = ChatOpenAI(model_name=\"gpt-4\", temperature=1.0)\n", + "agent = AutoGPT.from_llm_and_tools(\n", + " ai_name=\"Tom\",\n", + " ai_role=\"Assistant\",\n", + " tools=tools,\n", + " llm=llm,\n", + " memory=vectorstore.as_retriever(search_kwargs={\"k\": 8}),\n", + " # human_in_the_loop=True, # Set to True if you want to add feedback at each step.\n", + ")\n", + "# agent.chain.verbose = True" + ] + }, + { + "cell_type": "markdown", + "id": "fc9b51ba", + "metadata": {}, + "source": [ + "### AutoGPT as a research / data munger \n", + "\n", + "#### `inflation` and `college tuition`\n", + " \n", + "Let's use AutoGPT as researcher and data munger / cleaner.\n", + " \n", + "I spent a lot of time over the years crawling data sources and cleaning data. \n", + "\n", + "Let's see if AutoGPT can do all of this for us!\n", + "\n", + "Here is the prompt comparing `inflation` and `college tuition`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64455d70-a134-4d11-826a-33e34c2ce287", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "agent.run([\"Using the 'data/' folder as scratch, find a marathon in Northern California in July come up with a training program for me as a table.\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3aba5e30", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# agent.run([\"Using the 'data/' folder as scratch, get me the yearly % change in US college tuition and the yearly % change in US inflation (US CPI) every year since 1980.\"])" + ] + }, + { + "cell_type": "markdown", + "id": "f78012cf", + "metadata": {}, + "source": [ + "The command runs and writes output to `data`.\n", + " \n", + "`cleaned_college_tuition_inflation_percent_change.csv` is written.\n", + "\n", + "We write some simple code to plot this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c33c7a24", + "metadata": {}, + "outputs": [], + "source": [ + "# Read\n", + "d = pd.read_csv(\"data/cleaned_college_tuition_inflation_percent_change.csv\")\n", + "d.set_index(\"Year\",inplace=True)\n", + "# Compute cumulative percent change\n", + "d['College Tuition % Change Cumulative'] = (1 + d['College Tuition % Change']).cumprod() * 100\n", + "d['Inflation % Change Cumulative'] = (1 + d['Inflation % Change']).cumprod() * 100\n", + "# Plot\n", + "d[['College Tuition % Change Cumulative','Inflation % Change Cumulative']].plot(color=['blue','green'])\n", + "plt.ylabel(\"Cumulative Percent Change\")" + ] + }, + { + "cell_type": "markdown", + "id": "f31d09ea", + "metadata": {}, + "source": [ + "Of course, we would want to inspect and verify the results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e4cc1a1-8d72-4b3b-bba5-1ec5420d3eab", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}