This commit is contained in:
vowelparrot
2023-04-18 19:09:26 -07:00
parent a5c996189c
commit 8aa784bfbd

View File

@@ -0,0 +1,657 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "14f8b67b",
"metadata": {},
"source": [
"## AutoGPT\n",
"\n",
"* Implementation of https://github.com/Significant-Gravitas/Auto-GPT \n",
"* With LangChain primitives (LLMs, PromptTemplates, VectorStores, Embeddings, Tools)\n",
"\n",
"### Workflow\n",
"\n",
"[Temporary] Ensure you have correct Langchain version locally\n",
" \n",
"* `export SERPAPI_API_KEY`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a0866af1-43a8-4160-a9cf-9ffe64074493",
"metadata": {},
"outputs": [],
"source": [
"# !pip install playwright\n",
"# !playwright install "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d27e21c5-8556-46bf-936a-f6e119ebd3a0",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# ! playwright install "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1cff42fd",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# General \n",
"from datetime import datetime\n",
"import os\n",
"import pandas as pd\n",
"from typing import Optional\n",
"import matplotlib.pyplot as plt\n",
"from langchain.experimental.autonomous_agents.autogpt.agent import AutoGPT\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.utilities import SerpAPIWrapper\n",
"from langchain.tools.human.tool import HumanInputRun\n",
"from langchain.tools.file_management.read import ReadFileTool\n",
"from langchain.tools.file_management.write import WriteFileTool\n",
"from langchain.tools.wikipedia.tool import WikipediaQueryRun\n",
"from langchain.utilities import WikipediaAPIWrapper\n",
"\n",
"from langchain.document_loaders.url_selenium import SeleniumURLLoader\n",
"from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "338dfc5d-178a-49ba-9187-bd08b648ab29",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.document_loaders.url_playwright import PlaywrightURLLoader, Document"
]
},
{
"cell_type": "markdown",
"id": "192496a7",
"metadata": {},
"source": [
"### Set up tools\n",
"\n",
"* We'll set up an AutoGPT with a `search` tool, and `write-file` tool, and a `read-file` tool"
]
},
{
"cell_type": "markdown",
"id": "708a426f",
"metadata": {},
"source": [
"Define any `tools` you want to use here"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cef4c150-0ef1-4a33-836b-01062fec134e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Tools\n",
"from typing import Optional\n",
"from langchain.agents import Tool, tool\n",
"from langchain.utilities import SerpAPIWrapper\n",
"from langchain.chains.llm_requests import LLMRequestsChain\n",
"from langchain.tools.file_management.read import ReadFileTool\n",
"from langchain.tools.file_management.write import WriteFileTool\n",
"from langchain.tools.requests.tool import RequestsGetTool, TextRequestsWrapper\n",
"\n",
"@tool\n",
"def process_csv(csv_file_path: str, instructions: str, output_path: Optional[str] = None) -> str:\n",
" \"\"\"Process a CSV by with pandas in a limited REPL. Only use this after writing data to disk as a csv file. Any figures must be saved to disk to be viewed by the human. Instructions should be written in natural language, not code. Assume the dataframe is already loaded.\"\"\"\n",
" try:\n",
" df = pd.read_csv(csv_file_path)\n",
" except Exception as e:\n",
" return f\"Error: {e}\"\n",
" agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=True)\n",
" if output_path is not None:\n",
" instructions += f\" Save output to disk at {output_path}\"\n",
" try:\n",
" return agent.run(instructions)\n",
" except Exception as e:\n",
" return f\"Error: {e}\"\n",
" \n",
"@tool\n",
"def show_image(image_path: str) -> str:\n",
" \"\"\"Show an image from disk\"\"\"\n",
" try:\n",
" img = plt.imread(image_path)\n",
" except Exception as e:\n",
" return f\"Error: {e}\"\n",
" plt.imshow(img)\n",
" return f\"Showed image at {image_path}\"\n",
"\n",
"@tool\n",
"def current_time() -> str:\n",
" \"\"\"Show the current time\"\"\"\n",
" return f\"Current time: {datetime.datetime.now()}\" \n",
"\n",
"@tool\n",
"def scrape_links(url: str) -> str:\n",
" \"\"\"Scrape links from a webpage.\"\"\"\n",
" response, error_message = get_response(url)\n",
" if error_message:\n",
" return error_message\n",
" if not response:\n",
" return \"Error: Could not get response\"\n",
" soup = BeautifulSoup(response.text, \"html.parser\")\n",
"\n",
" for script in soup([\"script\", \"style\"]):\n",
" script.extract()\n",
"\n",
" hyperlinks = extract_hyperlinks(soup, url)\n",
"\n",
" return format_hyperlinks(hyperlinks)\n"
]
},
{
"cell_type": "markdown",
"id": "51c07298-00e0-42d6-8aff-bd2e6bbd35a3",
"metadata": {},
"source": [
"# Free DDG Search"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4afdedb2-f295-4ab8-9397-3640f5eeeed3",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# !pip install duckduckgo_search"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "45f143de-e49e-4e27-88eb-ee44a4fdf933",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import json\n",
"from duckduckgo_search import ddg"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e2e799f4-86fb-4190-a298-4ae5c7b7a540",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"@tool\n",
"def search(query: str, num_results: int = 8) -> str:\n",
" \"\"\"Useful for general internet search queries.\"\"\"\n",
" search_results = []\n",
" if not query:\n",
" return json.dumps(search_results)\n",
"\n",
" results = ddg(query, max_results=num_results)\n",
" if not results:\n",
" return json.dumps(search_results)\n",
"\n",
" for j in results:\n",
" search_results.append(j)\n",
"\n",
" return json.dumps(search_results, ensure_ascii=False, indent=4)"
]
},
{
"cell_type": "markdown",
"id": "fbd24d78-93a1-4645-bb3a-cac6fb2fc1a2",
"metadata": {},
"source": [
"### Define Web Browser Tool\n",
"\n",
"This is optional but can provide more information to the model relative to the other tools"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e3dcd773-a462-404e-b3dd-822a5f773e0d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# !pip install playwright\n",
"# !playwright install >/dev/null\n",
"# !pip install bs4\n",
"# !pip install nest_asyncio"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70856c54-aa54-42ca-bf4e-53950bfa6e5d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.docstore.document import Document\n",
"from langchain.chains.summarize import load_summarize_chain\n",
"from langchain.chains import RetrievalQA\n",
"\n",
"# chain = load_summarize_chain(llm)\n",
"\n",
"# @tool\n",
"# def summarize_text(text: str) -> str:\n",
"# \"\"\"Summarize the provided text. You have a limited memory, so summarization is useful.\"\"\"\n",
"# docs = [Document(page_content=text)]\n",
"# return chain.run(docs).strip()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ed22eb3f-f374-4dc5-bc44-57f7468bff51",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain.document_loaders import WebBaseLoader\n",
"\n",
"text_splitter = RecursiveCharacterTextSplitter(\n",
" # Set a really small chunk size, just to show.\n",
" chunk_size = 500,\n",
" chunk_overlap = 20,\n",
" length_function = len,\n",
")\n",
"\n",
"loader = WebBaseLoader(\"https://beta.ruff.rs/docs/faq/\")\n",
"docs = loader.load()\n",
"ruff_texts = text_splitter.split_documents(docs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "26b497d7-8e52-4c7f-8e7e-da0a48820a3c",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"async def async_load_playwright(url: str) -> str:\n",
" \"\"\"Load the specified URLs using Playwright and parse using BeautifulSoup.\"\"\"\n",
" from bs4 import BeautifulSoup\n",
" from playwright.async_api import async_playwright\n",
"\n",
" results = \"\"\n",
" async with async_playwright() as p:\n",
" browser = await p.chromium.launch(headless=True)\n",
" try:\n",
" page = await browser.new_page()\n",
" await page.goto(url)\n",
"\n",
" page_source = await page.content()\n",
" soup = BeautifulSoup(page_source, \"html.parser\")\n",
"\n",
" for script in soup([\"script\", \"style\"]):\n",
" script.extract()\n",
"\n",
" text = soup.get_text()\n",
" lines = (line.strip() for line in text.splitlines())\n",
" chunks = (phrase.strip() for line in lines for phrase in line.split(\" \"))\n",
" results = \"\\n\".join(chunk for chunk in chunks if chunk)\n",
" except Exception as e:\n",
" results = f\"Error: {e}\"\n",
" await browser.close()\n",
" return results\n",
"\n",
"def run_async(coro):\n",
" import asyncio\n",
" import nest_asyncio\n",
" nest_asyncio.apply()\n",
" event_loop = asyncio.get_event_loop()\n",
" return event_loop.run_until_complete(coro)\n",
"\n",
"@tool\n",
"def browse_web_page(url: str) -> str:\n",
" \"\"\"Useful for browsing websites and scraping the text information.\"\"\"\n",
" return run_async(async_load_playwright(url))\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "75c3e2e3-b257-4237-82b1-7a804dd54e6a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "de963739-8d7d-4661-a7ed-71a6fe7e2596",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# !pip install wikipedia >/dev/null"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "db32f7b5-b315-4d47-8b0d-9103e673c1eb",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())"
]
},
{
"cell_type": "markdown",
"id": "8e39ee28",
"metadata": {},
"source": [
"### Set up memory\n",
"\n",
"* The memory here is used for the agents intermediate steps"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1df7b724",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# Memory\n",
"import faiss\n",
"from langchain.vectorstores import FAISS\n",
"from langchain.docstore import InMemoryDocstore\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"\n",
"embeddings_model = OpenAIEmbeddings()\n",
"embedding_size = 1536\n",
"index = faiss.IndexFlatL2(embedding_size)\n",
"vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})"
]
},
{
"cell_type": "markdown",
"id": "e40fd657",
"metadata": {},
"source": [
"### Setup model and AutoGPT\n",
"\n",
"`Model set-up`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e86326b3-af8c-4165-a5a6-38f64c79f921",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from typing import Any\n",
"from langchain.schema import BaseRetriever, BaseLanguageModel\n",
"from functools import partial\n",
"from langchain.tools import BaseTool\n",
"from langchain.chains.retrieval_qa.base import BaseRetrievalQA\n",
"from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain\n",
"\n",
"# def query_memory(query: str, retriever: Any, llm: BaseLanguageModel) -> str:\n",
"# \"\"\"Query your memories.\"\"\"\n",
"# # qa_chain = \n",
"# return qa_chain.run(\n",
"# query\n",
"# )\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9413033e-9e3b-4a50-89a9-b56be8c7af45",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"retrieval_qa_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(search_kwargs={\"k\": 1}))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3a6ebccd-45e7-485a-8be9-4fef5a82d437",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class QueryMemoryTool(BaseTool):\n",
" \"\"\"Query your memories.\"\"\"\n",
" name = \"query memory\"\n",
" description = \"Query your memories of past actions and thoughts.\"\n",
" retrieval_qa_chain: ConversationalRetrievalChain\n",
" \n",
" def _run(self, query: str) -> str:\n",
" return self.retrieval_qa_chain({\"question\": query, \"chat_history\": []})\n",
" \n",
" async def _arun(self, query: str) -> str:\n",
" raise NotImplementedError\n",
"\n",
"\n",
"query_memory_tool = QueryMemoryTool(\n",
" name = \"query memory\",\n",
" description=\"Query your memories of past actions and thoughts.\",\n",
" retrieval_qa_chain=retrieval_qa_chain,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4d6c4ea1-a592-408c-ad39-33114927e416",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"query_memory_tool(\"What do you remember?\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "88c8b184-67d7-4c35-84ae-9b14bef8c4e3",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"tools = [\n",
" # Tool(\n",
" # name = \"google search\",\n",
" # func=search.run,\n",
" # description=\"Simple search method useful for answering simple questions about current events. You should ask targeted questions\"\n",
" # ),\n",
" search,\n",
" WriteFileTool(),\n",
" ReadFileTool(),\n",
" process_csv,\n",
" show_image,\n",
" RequestsGetTool(requests_wrapper=TextRequestsWrapper()),\n",
" current_time,\n",
" browse_web_page,\n",
" wikipedia_tool,\n",
" query_memory_tool\n",
" # huggingface_image_generation,\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "709c08c2",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"llm = ChatOpenAI(model_name=\"gpt-4\", temperature=1.0)\n",
"agent = AutoGPT.from_llm_and_tools(\n",
" ai_name=\"Tom\",\n",
" ai_role=\"Assistant\",\n",
" tools=tools,\n",
" llm=llm,\n",
" memory=vectorstore.as_retriever(search_kwargs={\"k\": 8}),\n",
" # human_in_the_loop=True, # Set to True if you want to add feedback at each step.\n",
")\n",
"# agent.chain.verbose = True"
]
},
{
"cell_type": "markdown",
"id": "fc9b51ba",
"metadata": {},
"source": [
"### AutoGPT as a research / data munger \n",
"\n",
"#### `inflation` and `college tuition`\n",
" \n",
"Let's use AutoGPT as researcher and data munger / cleaner.\n",
" \n",
"I spent a lot of time over the years crawling data sources and cleaning data. \n",
"\n",
"Let's see if AutoGPT can do all of this for us!\n",
"\n",
"Here is the prompt comparing `inflation` and `college tuition`."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "64455d70-a134-4d11-826a-33e34c2ce287",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"agent.run([\"Using the 'data/' folder as scratch, find a marathon in Northern California in July come up with a training program for me as a table.\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3aba5e30",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# agent.run([\"Using the 'data/' folder as scratch, get me the yearly % change in US college tuition and the yearly % change in US inflation (US CPI) every year since 1980.\"])"
]
},
{
"cell_type": "markdown",
"id": "f78012cf",
"metadata": {},
"source": [
"The command runs and writes output to `data`.\n",
" \n",
"`cleaned_college_tuition_inflation_percent_change.csv` is written.\n",
"\n",
"We write some simple code to plot this."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c33c7a24",
"metadata": {},
"outputs": [],
"source": [
"# Read\n",
"d = pd.read_csv(\"data/cleaned_college_tuition_inflation_percent_change.csv\")\n",
"d.set_index(\"Year\",inplace=True)\n",
"# Compute cumulative percent change\n",
"d['College Tuition % Change Cumulative'] = (1 + d['College Tuition % Change']).cumprod() * 100\n",
"d['Inflation % Change Cumulative'] = (1 + d['Inflation % Change']).cumprod() * 100\n",
"# Plot\n",
"d[['College Tuition % Change Cumulative','Inflation % Change Cumulative']].plot(color=['blue','green'])\n",
"plt.ylabel(\"Cumulative Percent Change\")"
]
},
{
"cell_type": "markdown",
"id": "f31d09ea",
"metadata": {},
"source": [
"Of course, we would want to inspect and verify the results."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0e4cc1a1-8d72-4b3b-bba5-1ec5420d3eab",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}