mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-06 09:10:27 +00:00
Compare commits
2 Commits
vwp/charac
...
harrison/s
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
186172838f | ||
|
|
44dfda990b |
@@ -27,7 +27,7 @@ FROM builder AS dependencies
|
||||
COPY pyproject.toml poetry.lock poetry.toml ./
|
||||
|
||||
# Install the Poetry dependencies (this layer will be cached as long as the dependencies don't change)
|
||||
RUN $POETRY_HOME/bin/poetry install --no-interaction --no-ansi --with test
|
||||
RUN $POETRY_HOME/bin/poetry install --no-interaction --no-ansi
|
||||
|
||||
# Use a multi-stage build to run tests
|
||||
FROM dependencies AS tests
|
||||
@@ -35,7 +35,7 @@ FROM dependencies AS tests
|
||||
# Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes)
|
||||
COPY . .
|
||||
|
||||
RUN /opt/poetry/bin/poetry install --no-interaction --no-ansi --with test
|
||||
RUN /opt/poetry/bin/poetry install --no-interaction --no-ansi
|
||||
|
||||
# Set the entrypoint to run tests using Poetry
|
||||
ENTRYPOINT ["/opt/poetry/bin/poetry", "run", "pytest"]
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
⚡ Building applications with LLMs through composability ⚡
|
||||
|
||||
[](https://github.com/hwchase17/langchain/actions/workflows/lint.yml) [](https://github.com/hwchase17/langchain/actions/workflows/test.yml) [](https://github.com/hwchase17/langchain/actions/workflows/linkcheck.yml) [](https://pepy.tech/project/langchain) [](https://opensource.org/licenses/MIT) [](https://twitter.com/langchainai) [](https://discord.gg/6adMQxSpJS)
|
||||
[](https://github.com/hwchase17/langchain/actions/workflows/lint.yml) [](https://github.com/hwchase17/langchain/actions/workflows/test.yml) [](https://github.com/hwchase17/langchain/actions/workflows/linkcheck.yml) [](https://opensource.org/licenses/MIT) [](https://twitter.com/langchainai) [](https://discord.gg/6adMQxSpJS)
|
||||
|
||||
**Production Support:** As you move your LangChains into production, we'd love to offer more comprehensive support.
|
||||
Please fill out [this form](https://forms.gle/57d8AmXBYp8PP8tZA) and we'll set up a dedicated support Slack channel.
|
||||
@@ -10,8 +10,6 @@ Please fill out [this form](https://forms.gle/57d8AmXBYp8PP8tZA) and we'll set u
|
||||
## Quick Install
|
||||
|
||||
`pip install langchain`
|
||||
or
|
||||
`conda install langchain -c conda-forge`
|
||||
|
||||
## 🤔 What is this?
|
||||
|
||||
@@ -75,7 +73,7 @@ Memory is the concept of persisting state between calls of a chain/agent. LangCh
|
||||
|
||||
[BETA] Generative models are notoriously hard to evaluate with traditional metrics. One new way of evaluating them is using language models themselves to do the evaluation. LangChain provides some prompts/chains for assisting in this.
|
||||
|
||||
For more information on these concepts, please see our [full documentation](https://langchain.readthedocs.io/en/latest/).
|
||||
For more information on these concepts, please see our [full documentation](https://langchain.readthedocs.io/en/latest/?).
|
||||
|
||||
## 💁 Contributing
|
||||
|
||||
|
||||
BIN
docs/_static/DataberryDashboard.png
vendored
BIN
docs/_static/DataberryDashboard.png
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 157 KiB |
4
docs/_static/css/custom.css
vendored
4
docs/_static/css/custom.css
vendored
@@ -11,7 +11,3 @@ pre {
|
||||
max-width: 2560px !important;
|
||||
}
|
||||
}
|
||||
|
||||
#my-component-root *, #headlessui-portal-root * {
|
||||
z-index: 1000000000000;
|
||||
}
|
||||
|
||||
58
docs/_static/js/mendablesearch.js
vendored
58
docs/_static/js/mendablesearch.js
vendored
@@ -1,58 +0,0 @@
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
// Load the external dependencies
|
||||
function loadScript(src, onLoadCallback) {
|
||||
const script = document.createElement('script');
|
||||
script.src = src;
|
||||
script.onload = onLoadCallback;
|
||||
document.head.appendChild(script);
|
||||
}
|
||||
|
||||
function createRootElement() {
|
||||
const rootElement = document.createElement('div');
|
||||
rootElement.id = 'my-component-root';
|
||||
document.body.appendChild(rootElement);
|
||||
return rootElement;
|
||||
}
|
||||
|
||||
|
||||
|
||||
function initializeMendable() {
|
||||
const rootElement = createRootElement();
|
||||
const { MendableFloatingButton } = Mendable;
|
||||
|
||||
|
||||
const iconSpan1 = React.createElement('span', {
|
||||
}, '🦜');
|
||||
|
||||
const iconSpan2 = React.createElement('span', {
|
||||
}, '🔗');
|
||||
|
||||
const icon = React.createElement('p', {
|
||||
style: { color: '#ffffff', fontSize: '22px',width: '48px', height: '48px', margin: '0px', padding: '0px', display: 'flex', alignItems: 'center', justifyContent: 'center', textAlign: 'center' },
|
||||
}, [iconSpan1, iconSpan2]);
|
||||
|
||||
|
||||
|
||||
|
||||
const mendableFloatingButton = React.createElement(
|
||||
MendableFloatingButton,
|
||||
{
|
||||
style: { darkMode: false, accentColor: '#010810' },
|
||||
floatingButtonStyle: { color: '#ffffff', backgroundColor: '#010810' },
|
||||
anon_key: '82842b36-3ea6-49b2-9fb8-52cfc4bde6bf', // Mendable Search Public ANON key, ok to be public
|
||||
messageSettings: {
|
||||
openSourcesInNewTab: false,
|
||||
},
|
||||
icon: icon,
|
||||
}
|
||||
);
|
||||
|
||||
ReactDOM.render(mendableFloatingButton, rootElement);
|
||||
}
|
||||
|
||||
loadScript('https://unpkg.com/react@17/umd/react.production.min.js', () => {
|
||||
loadScript('https://unpkg.com/react-dom@17/umd/react-dom.production.min.js', () => {
|
||||
loadScript('https://unpkg.com/@mendable/search@0.0.83/dist/umd/mendable.min.js', initializeMendable);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -103,10 +103,5 @@ html_static_path = ["_static"]
|
||||
html_css_files = [
|
||||
"css/custom.css",
|
||||
]
|
||||
|
||||
html_js_files = [
|
||||
"js/mendablesearch.js",
|
||||
]
|
||||
|
||||
nb_execution_mode = "off"
|
||||
myst_enable_extensions = ["colon_fence"]
|
||||
|
||||
@@ -33,19 +33,10 @@ It implements a Question Answering app and contains instructions for deploying t
|
||||
|
||||
A minimal example on how to run LangChain on Vercel using Flask.
|
||||
|
||||
## [Digitalocean App Platform](https://github.com/homanp/digitalocean-langchain)
|
||||
|
||||
A minimal example on how to deploy LangChain to DigitalOcean App Platform.
|
||||
|
||||
## [SteamShip](https://github.com/steamship-core/steamship-langchain/)
|
||||
|
||||
This repository contains LangChain adapters for Steamship, enabling LangChain developers to rapidly deploy their apps on Steamship.
|
||||
This includes: production ready endpoints, horizontal scaling across dependencies, persistant storage of app state, multi-tenancy support, etc.
|
||||
|
||||
## [Langchain-serve](https://github.com/jina-ai/langchain-serve)
|
||||
|
||||
This repository allows users to serve local chains and agents as RESTful, gRPC, or Websocket APIs thanks to [Jina](https://docs.jina.ai/). Deploy your chains & agents with ease and enjoy independent scaling, serverless and autoscaling APIs, as well as a Streamlit playground on Jina AI Cloud.
|
||||
|
||||
## [BentoML](https://github.com/ssheng/BentoChain)
|
||||
|
||||
This repository provides an example of how to deploy a LangChain application with [BentoML](https://github.com/bentoml/BentoML). BentoML is a framework that enables the containerization of machine learning applications as standard OCI images. BentoML also allows for the automatic generation of OpenAPI and gRPC endpoints. With BentoML, you can integrate models from all popular ML frameworks and deploy them as microservices running on the most optimal hardware and scaling independently.
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Getting API Credentials\n",
|
||||
"# Getting API Credentials\n",
|
||||
"\n",
|
||||
"We'll be using quite some APIs in this notebook, here is a list and where to get them:\n",
|
||||
"\n",
|
||||
@@ -47,7 +47,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting Up"
|
||||
"# Setting Up"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -103,7 +103,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Scenario 1: Just an LLM\n",
|
||||
"# Scenario 1: Just an LLM\n",
|
||||
"\n",
|
||||
"First, let's just run a single LLM a few times and capture the resulting prompt-answer conversation in ClearML"
|
||||
]
|
||||
@@ -361,7 +361,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Scenario 2: Creating an agent with tools\n",
|
||||
"# Scenario 2: Creating a agent with tools\n",
|
||||
"\n",
|
||||
"To show a more advanced workflow, let's create an agent with access to tools. The way ClearML tracks the results is not different though, only the table will look slightly different as there are other types of actions taken when compared to the earlier, simpler example.\n",
|
||||
"\n",
|
||||
@@ -542,7 +542,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tips and Next Steps\n",
|
||||
"# Tips and Next Steps\n",
|
||||
"\n",
|
||||
"- Make sure you always use a unique `name` argument for the `clearml_callback.flush_tracker` function. If not, the model parameters used for a run will override the previous run!\n",
|
||||
"\n",
|
||||
|
||||
@@ -1,352 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Comet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this guide we will demonstrate how to track your Langchain Experiments, Evaluation Metrics, and LLM Sessions with [Comet](https://www.comet.com/site/?utm_source=langchain&utm_medium=referral&utm_campaign=comet_notebook). \n",
|
||||
"\n",
|
||||
"<a target=\"_blank\" href=\"https://colab.research.google.com/github/hwchase17/langchain/blob/master/docs/ecosystem/comet_tracking.ipynb\">\n",
|
||||
" <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
|
||||
"</a>\n",
|
||||
"\n",
|
||||
"**Example Project:** [Comet with LangChain](https://www.comet.com/examples/comet-example-langchain/view/b5ZThK6OFdhKWVSP3fDfRtrNF/panels?utm_source=langchain&utm_medium=referral&utm_campaign=comet_notebook)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<img width=\"1280\" alt=\"comet-langchain\" src=\"https://user-images.githubusercontent.com/7529846/230326720-a9711435-9c6f-4edb-a707-94b67271ab25.png\">\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Install Comet and Dependencies"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install comet_ml langchain openai google-search-results spacy textstat pandas\n",
|
||||
"\n",
|
||||
"import sys\n",
|
||||
"!{sys.executable} -m spacy download en_core_web_sm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialize Comet and Set your Credentials"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can grab your [Comet API Key here](https://www.comet.com/signup?utm_source=langchain&utm_medium=referral&utm_campaign=comet_notebook) or click the link after intializing Comet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import comet_ml\n",
|
||||
"\n",
|
||||
"comet_ml.init(project_name=\"comet-example-langchain\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Set OpenAI and SerpAPI credentials"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You will need an [OpenAI API Key](https://platform.openai.com/account/api-keys) and a [SerpAPI API Key](https://serpapi.com/dashboard) to run the following examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"...\"\n",
|
||||
"#os.environ[\"OPENAI_ORGANIZATION\"] = \"...\"\n",
|
||||
"os.environ[\"SERPAPI_API_KEY\"] = \"...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Scenario 1: Using just an LLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"\n",
|
||||
"from langchain.callbacks import CometCallbackHandler, StdOutCallbackHandler\n",
|
||||
"from langchain.callbacks.base import CallbackManager\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"\n",
|
||||
"comet_callback = CometCallbackHandler(\n",
|
||||
" project_name=\"comet-example-langchain\",\n",
|
||||
" complexity_metrics=True,\n",
|
||||
" stream_logs=True,\n",
|
||||
" tags=[\"llm\"],\n",
|
||||
" visualizations=[\"dep\"],\n",
|
||||
")\n",
|
||||
"manager = CallbackManager([StdOutCallbackHandler(), comet_callback])\n",
|
||||
"llm = OpenAI(temperature=0.9, callback_manager=manager, verbose=True)\n",
|
||||
"\n",
|
||||
"llm_result = llm.generate([\"Tell me a joke\", \"Tell me a poem\", \"Tell me a fact\"] * 3)\n",
|
||||
"print(\"LLM result\", llm_result)\n",
|
||||
"comet_callback.flush_tracker(llm, finish=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Scenario 2: Using an LLM in a Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks import CometCallbackHandler, StdOutCallbackHandler\n",
|
||||
"from langchain.callbacks.base import CallbackManager\n",
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"comet_callback = CometCallbackHandler(\n",
|
||||
" complexity_metrics=True,\n",
|
||||
" project_name=\"comet-example-langchain\",\n",
|
||||
" stream_logs=True,\n",
|
||||
" tags=[\"synopsis-chain\"],\n",
|
||||
")\n",
|
||||
"manager = CallbackManager([StdOutCallbackHandler(), comet_callback])\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0.9, callback_manager=manager, verbose=True)\n",
|
||||
"\n",
|
||||
"template = \"\"\"You are a playwright. Given the title of play, it is your job to write a synopsis for that title.\n",
|
||||
"Title: {title}\n",
|
||||
"Playwright: This is a synopsis for the above play:\"\"\"\n",
|
||||
"prompt_template = PromptTemplate(input_variables=[\"title\"], template=template)\n",
|
||||
"synopsis_chain = LLMChain(llm=llm, prompt=prompt_template, callback_manager=manager)\n",
|
||||
"\n",
|
||||
"test_prompts = [{\"title\": \"Documentary about Bigfoot in Paris\"}]\n",
|
||||
"print(synopsis_chain.apply(test_prompts))\n",
|
||||
"comet_callback.flush_tracker(synopsis_chain, finish=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Scenario 3: Using An Agent with Tools "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import initialize_agent, load_tools\n",
|
||||
"from langchain.callbacks import CometCallbackHandler, StdOutCallbackHandler\n",
|
||||
"from langchain.callbacks.base import CallbackManager\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"\n",
|
||||
"comet_callback = CometCallbackHandler(\n",
|
||||
" project_name=\"comet-example-langchain\",\n",
|
||||
" complexity_metrics=True,\n",
|
||||
" stream_logs=True,\n",
|
||||
" tags=[\"agent\"],\n",
|
||||
")\n",
|
||||
"manager = CallbackManager([StdOutCallbackHandler(), comet_callback])\n",
|
||||
"llm = OpenAI(temperature=0.9, callback_manager=manager, verbose=True)\n",
|
||||
"\n",
|
||||
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm, callback_manager=manager)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
" agent=\"zero-shot-react-description\",\n",
|
||||
" callback_manager=manager,\n",
|
||||
" verbose=True,\n",
|
||||
")\n",
|
||||
"agent.run(\n",
|
||||
" \"Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\"\n",
|
||||
")\n",
|
||||
"comet_callback.flush_tracker(agent, finish=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Scenario 4: Using Custom Evaluation Metrics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The `CometCallbackManager` also allows you to define and use Custom Evaluation Metrics to assess generated outputs from your model. Let's take a look at how this works. \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"In the snippet below, we will use the [ROUGE](https://huggingface.co/spaces/evaluate-metric/rouge) metric to evaluate the quality of a generated summary of an input prompt. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install rouge-score"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from rouge_score import rouge_scorer\n",
|
||||
"\n",
|
||||
"from langchain.callbacks import CometCallbackHandler, StdOutCallbackHandler\n",
|
||||
"from langchain.callbacks.base import CallbackManager\n",
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Rouge:\n",
|
||||
" def __init__(self, reference):\n",
|
||||
" self.reference = reference\n",
|
||||
" self.scorer = rouge_scorer.RougeScorer([\"rougeLsum\"], use_stemmer=True)\n",
|
||||
"\n",
|
||||
" def compute_metric(self, generation, prompt_idx, gen_idx):\n",
|
||||
" prediction = generation.text\n",
|
||||
" results = self.scorer.score(target=self.reference, prediction=prediction)\n",
|
||||
"\n",
|
||||
" return {\n",
|
||||
" \"rougeLsum_score\": results[\"rougeLsum\"].fmeasure,\n",
|
||||
" \"reference\": self.reference,\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"reference = \"\"\"\n",
|
||||
"The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building.\n",
|
||||
"It was the first structure to reach a height of 300 metres.\n",
|
||||
"\n",
|
||||
"It is now taller than the Chrysler Building in New York City by 5.2 metres (17 ft)\n",
|
||||
"Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France .\n",
|
||||
"\"\"\"\n",
|
||||
"rouge_score = Rouge(reference=reference)\n",
|
||||
"\n",
|
||||
"template = \"\"\"Given the following article, it is your job to write a summary.\n",
|
||||
"Article:\n",
|
||||
"{article}\n",
|
||||
"Summary: This is the summary for the above article:\"\"\"\n",
|
||||
"prompt_template = PromptTemplate(input_variables=[\"article\"], template=template)\n",
|
||||
"\n",
|
||||
"comet_callback = CometCallbackHandler(\n",
|
||||
" project_name=\"comet-example-langchain\",\n",
|
||||
" complexity_metrics=False,\n",
|
||||
" stream_logs=True,\n",
|
||||
" tags=[\"custom_metrics\"],\n",
|
||||
" custom_metrics=rouge_score.compute_metric,\n",
|
||||
")\n",
|
||||
"manager = CallbackManager([StdOutCallbackHandler(), comet_callback])\n",
|
||||
"llm = OpenAI(temperature=0.9, callback_manager=manager, verbose=True)\n",
|
||||
"\n",
|
||||
"synopsis_chain = LLMChain(llm=llm, prompt=prompt_template, callback_manager=manager)\n",
|
||||
"\n",
|
||||
"test_prompts = [\n",
|
||||
" {\n",
|
||||
" \"article\": \"\"\"\n",
|
||||
" The tower is 324 metres (1,063 ft) tall, about the same height as\n",
|
||||
" an 81-storey building, and the tallest structure in Paris. Its base is square,\n",
|
||||
" measuring 125 metres (410 ft) on each side.\n",
|
||||
" During its construction, the Eiffel Tower surpassed the\n",
|
||||
" Washington Monument to become the tallest man-made structure in the world,\n",
|
||||
" a title it held for 41 years until the Chrysler Building\n",
|
||||
" in New York City was finished in 1930.\n",
|
||||
"\n",
|
||||
" It was the first structure to reach a height of 300 metres.\n",
|
||||
" Due to the addition of a broadcasting aerial at the top of the tower in 1957,\n",
|
||||
" it is now taller than the Chrysler Building by 5.2 metres (17 ft).\n",
|
||||
"\n",
|
||||
" Excluding transmitters, the Eiffel Tower is the second tallest\n",
|
||||
" free-standing structure in France after the Millau Viaduct.\n",
|
||||
" \"\"\"\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"print(synopsis_chain.apply(test_prompts))\n",
|
||||
"comet_callback.flush_tracker(synopsis_chain, finish=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
# Databerry
|
||||
|
||||
This page covers how to use the [Databerry](https://databerry.ai) within LangChain.
|
||||
|
||||
## What is Databerry?
|
||||
|
||||
Databerry is an [open source](https://github.com/gmpetrov/databerry) document retrievial platform that helps to connect your personal data with Large Language Models.
|
||||
|
||||

|
||||
|
||||
## Quick start
|
||||
|
||||
Retrieving documents stored in Databerry from LangChain is very easy!
|
||||
|
||||
```python
|
||||
from langchain.retrievers import DataberryRetriever
|
||||
|
||||
retriever = DataberryRetriever(
|
||||
datastore_url="https://api.databerry.ai/query/clg1xg2h80000l708dymr0fxc",
|
||||
# api_key="DATABERRY_API_KEY", # optional if datastore is public
|
||||
# top_k=10 # optional
|
||||
)
|
||||
|
||||
docs = retriever.get_relevant_documents("What's Databerry?")
|
||||
```
|
||||
@@ -8,9 +8,8 @@ This page covers how to use the Deep Lake ecosystem within LangChain.
|
||||
|
||||
## More Resources
|
||||
1. [Ultimate Guide to LangChain & Deep Lake: Build ChatGPT to Answer Questions on Your Financial Data](https://www.activeloop.ai/resources/ultimate-guide-to-lang-chain-deep-lake-build-chat-gpt-to-answer-questions-on-your-financial-data/)
|
||||
2. [Twitter the-algorithm codebase analysis with Deep Lake](../use_cases/code/twitter-the-algorithm-analysis-deeplake.ipynb)
|
||||
3. Here is [whitepaper](https://www.deeplake.ai/whitepaper) and [academic paper](https://arxiv.org/pdf/2209.10785.pdf) for Deep Lake
|
||||
4. Here is a set of additional resources available for review: [Deep Lake](https://github.com/activeloopai/deeplake), [Getting Started](https://docs.activeloop.ai/getting-started) and [Tutorials](https://docs.activeloop.ai/hub-tutorials)
|
||||
1. Here is [whitepaper](https://www.deeplake.ai/whitepaper) and [academic paper](https://arxiv.org/pdf/2209.10785.pdf) for Deep Lake
|
||||
2. Here is a set of additional resources available for review: [Deep Lake](https://github.com/activeloopai/deeplake), [Getting Started](https://docs.activeloop.ai/getting-started) and [Tutorials](https://docs.activeloop.ai/hub-tutorials)
|
||||
|
||||
## Installation and Setup
|
||||
- Install the Python package with `pip install deeplake`
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
# GPT4All
|
||||
|
||||
This page covers how to use the `GPT4All` wrapper within LangChain. The tutorial is divided into two parts: installation and setup, followed by usage with an example.
|
||||
This page covers how to use the `GPT4All` wrapper within LangChain.
|
||||
It is broken into two parts: installation and setup, and then usage with an example.
|
||||
|
||||
## Installation and Setup
|
||||
- Install the Python package with `pip install pyllamacpp`
|
||||
- Download a [GPT4All model](https://github.com/nomic-ai/pyllamacpp#supported-model) and place it in your desired directory
|
||||
- Download a [GPT4All model](https://github.com/nomic-ai/gpt4all) and place it in your desired directory
|
||||
|
||||
## Usage
|
||||
|
||||
### GPT4All
|
||||
|
||||
To use the GPT4All wrapper, you need to provide the path to the pre-trained model file and the model's configuration.
|
||||
|
||||
```python
|
||||
from langchain.llms import GPT4All
|
||||
|
||||
# Instantiate the model. Callbacks support token-wise streaming
|
||||
# Instantiate the model
|
||||
model = GPT4All(model="./models/gpt4all-model.bin", n_ctx=512, n_threads=8)
|
||||
|
||||
# Generate text
|
||||
@@ -24,24 +24,14 @@ response = model("Once upon a time, ")
|
||||
|
||||
You can also customize the generation parameters, such as n_predict, temp, top_p, top_k, and others.
|
||||
|
||||
To stream the model's predictions, add in a CallbackManager.
|
||||
Example:
|
||||
|
||||
```python
|
||||
from langchain.llms import GPT4All
|
||||
from langchain.callbacks.base import CallbackManager
|
||||
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
||||
# There are many CallbackHandlers supported, such as
|
||||
# from langchain.callbacks.streamlit import StreamlitCallbackHandler
|
||||
|
||||
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
||||
model = GPT4All(model="./models/gpt4all-model.bin", n_ctx=512, n_threads=8, callback_handler=callback_handler, verbose=True)
|
||||
|
||||
# Generate text. Tokens are streamed through the callback manager.
|
||||
model("Once upon a time, ")
|
||||
model = GPT4All(model="./models/gpt4all-model.bin", n_predict=55, temp=0)
|
||||
response = model("Once upon a time, ")
|
||||
```
|
||||
|
||||
## Model File
|
||||
|
||||
You can find links to model file downloads in the [pyllamacpp](https://github.com/nomic-ai/pyllamacpp) repository.
|
||||
You can find links to model file downloads at the [GPT4all](https://github.com/nomic-ai/gpt4all) repository. They will need to be converted to `ggml` format to work, as specified in the [pyllamacpp](https://github.com/nomic-ai/pyllamacpp) repository.
|
||||
|
||||
For a more detailed walkthrough of this, see [this notebook](../modules/models/llms/integrations/gpt4all.ipynb)
|
||||
For a more detailed walkthrough of this, see [this notebook](../modules/models/llms/integrations/gpt4all.ipynb)
|
||||
@@ -1,6 +1,6 @@
|
||||
# Graphsignal
|
||||
|
||||
This page covers how to use [Graphsignal](https://app.graphsignal.com) to trace and monitor LangChain. Graphsignal enables full visibility into your application. It provides latency breakdowns by chains and tools, exceptions with full context, data monitoring, compute/GPU utilization, OpenAI cost analytics, and more.
|
||||
This page covers how to use the Graphsignal ecosystem to trace and monitor LangChain.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
@@ -10,7 +10,7 @@ This page covers how to use [Graphsignal](https://app.graphsignal.com) to trace
|
||||
|
||||
## Tracing and Monitoring
|
||||
|
||||
Graphsignal automatically instruments and starts tracing and monitoring chains. Traces and metrics are then available in your [Graphsignal dashboards](https://app.graphsignal.com).
|
||||
Graphsignal automatically instruments and starts tracing and monitoring chains. Traces, metrics and errors are then available in your [Graphsignal dashboard](https://app.graphsignal.com/). No prompts or other sensitive data are sent to Graphsignal cloud, only statistics and metadata.
|
||||
|
||||
Initialize the tracer by providing a deployment name:
|
||||
|
||||
@@ -20,13 +20,7 @@ import graphsignal
|
||||
graphsignal.configure(deployment='my-langchain-app-prod')
|
||||
```
|
||||
|
||||
To additionally trace any function or code, you can use a decorator or a context manager:
|
||||
|
||||
```python
|
||||
@graphsignal.trace_function
|
||||
def handle_request():
|
||||
chain.run("some initial text")
|
||||
```
|
||||
In order to trace full runs and see a breakdown by chains and tools, you can wrap the calling routine or use a decorator:
|
||||
|
||||
```python
|
||||
with graphsignal.start_trace('my-chain'):
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
# Zilliz
|
||||
|
||||
This page covers how to use the Zilliz Cloud ecosystem within LangChain.
|
||||
Zilliz uses the Milvus integration.
|
||||
It is broken into two parts: installation and setup, and then references to specific Milvus wrappers.
|
||||
|
||||
## Installation and Setup
|
||||
- Install the Python SDK with `pip install pymilvus`
|
||||
## Wrappers
|
||||
|
||||
### VectorStore
|
||||
|
||||
There exists a wrapper around Zilliz indexes, allowing you to use it as a vectorstore,
|
||||
whether for semantic search or example selection.
|
||||
|
||||
To import this vectorstore:
|
||||
```python
|
||||
from langchain.vectorstores import Milvus
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the Miluvs wrapper, see [this notebook](../modules/indexes/vectorstores/examples/zilliz.ipynb)
|
||||
@@ -1,5 +1,5 @@
|
||||
LangChain Gallery
|
||||
=================
|
||||
=============
|
||||
|
||||
Lots of people have built some pretty awesome stuff with LangChain.
|
||||
This is a collection of our favorites.
|
||||
@@ -223,7 +223,7 @@ Open Source
|
||||
Answer questions about the documentation of any project
|
||||
|
||||
Misc. Colab Notebooks
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
.. panels::
|
||||
:body: text-center
|
||||
|
||||
@@ -9,8 +9,6 @@ To get started, install LangChain with the following command:
|
||||
|
||||
```bash
|
||||
pip install langchain
|
||||
# or
|
||||
conda install langchain -c conda-forge
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -71,8 +71,6 @@ The above modules can be used in a variety of ways. LangChain also provides guid
|
||||
|
||||
- `Querying Tabular Data <./use_cases/tabular.html>`_: If you want to understand how to use LLMs to query data that is stored in a tabular format (csvs, SQL, dataframes, etc) you should read this page.
|
||||
|
||||
- `Code Understanding <./use_cases/code.html>`_: If you want to understand how to use LLMs to query source code from github, you should read this page.
|
||||
|
||||
- `Interacting with APIs <./use_cases/apis.html>`_: Enabling LLMs to interact with APIs is extremely powerful in order to give them more up-to-date information and allow them to take actions.
|
||||
|
||||
- `Extraction <./use_cases/extraction.html>`_: Extract structured information from text.
|
||||
@@ -92,7 +90,6 @@ The above modules can be used in a variety of ways. LangChain also provides guid
|
||||
./use_cases/question_answering.md
|
||||
./use_cases/chatbots.md
|
||||
./use_cases/tabular.rst
|
||||
./use_cases/code.md
|
||||
./use_cases/apis.md
|
||||
./use_cases/summarization.md
|
||||
./use_cases/extraction.md
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "68b24990",
|
||||
"metadata": {},
|
||||
@@ -10,7 +9,7 @@
|
||||
"\n",
|
||||
"This notebook covers how to combine agents and vectorstores. The use case for this is that you've ingested your data into a vectorstore and want to interact with it in an agentic manner.\n",
|
||||
"\n",
|
||||
"The recommended method for doing so is to create a RetrievalQA and then use that as a tool in the overall agent. Let's take a look at doing this below. You can do this with multiple different vectordbs, and use the agent as a way to route between them. There are two different ways of doing this - you can either let the agent use the vectorstores as normal tools, or you can set `return_direct=True` to really just use the agent as a router."
|
||||
"The reccomended method for doing so is to create a RetrievalQA and then use that as a tool in the overall agent. Let's take a look at doing this below. You can do this with multiple different vectordbs, and use the agent as a way to route between them. There are two different ways of doing this - you can either let the agent use the vectorstores as normal tools, or you can set `return_direct=True` to really just use the agent as a router."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -176,7 +176,7 @@
|
||||
" llm = OpenAI(temperature=0)\n",
|
||||
" tools = load_tools([\"llm-math\", \"serpapi\"], llm=llm)\n",
|
||||
" agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION verbose=True\n",
|
||||
" )\n",
|
||||
" agent.run(q)\n",
|
||||
"\n",
|
||||
|
||||
@@ -26,8 +26,6 @@ For documentation on how to create a custom agent, see the below.
|
||||
./agents/custom_llm_agent.ipynb
|
||||
./agents/custom_llm_chat_agent.ipynb
|
||||
./agents/custom_mrkl_agent.ipynb
|
||||
./agents/custom_multi_action_agent.ipynb
|
||||
./agents/custom_agent_with_tool_retrieval.ipynb
|
||||
|
||||
We also have documentation for an in-depth dive into each agent type.
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@
|
||||
" Returns:\n",
|
||||
" Action specifying what tool to use.\n",
|
||||
" \"\"\"\n",
|
||||
" return AgentAction(tool=\"Search\", tool_input=kwargs[\"input\"], log=\"\")\n",
|
||||
" return AgentAction(tool=\"Search\", tool_input=\"foo\", log=\"\")\n",
|
||||
"\n",
|
||||
" async def aplan(\n",
|
||||
" self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any\n",
|
||||
@@ -92,7 +92,7 @@
|
||||
" Returns:\n",
|
||||
" Action specifying what tool to use.\n",
|
||||
" \"\"\"\n",
|
||||
" return AgentAction(tool=\"Search\", tool_input=kwargs[\"input\"], log=\"\")"
|
||||
" return AgentAction(tool=\"Search\", tool_input=\"foo\", log=\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,478 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ba5f8741",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Custom Agent with Tool Retrieval\n",
|
||||
"\n",
|
||||
"This notebook builds off of [this notebook](custom_llm_agent.ipynb) and assumes familiarity with how agents work.\n",
|
||||
"\n",
|
||||
"The novel idea introduced in this notebook is the idea of using retrieval to select the set of tools to use to answer an agent query. This is useful when you have many many tools to select from. You cannot put the description of all the tools in the prompt (because of context length issues) so instead you dynamically select the N tools you do want to consider using at run time.\n",
|
||||
"\n",
|
||||
"In this notebook we will create a somewhat contrieved example. We will have one legitimate tool (search) and then 99 fake tools which are just nonsense. We will then add a step in the prompt template that takes the user input and retrieves tool relevant to the query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fea4812c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up environment\n",
|
||||
"\n",
|
||||
"Do necessary imports, etc."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "9af9734e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser\n",
|
||||
"from langchain.prompts import StringPromptTemplate\n",
|
||||
"from langchain import OpenAI, SerpAPIWrapper, LLMChain\n",
|
||||
"from typing import List, Union\n",
|
||||
"from langchain.schema import AgentAction, AgentFinish\n",
|
||||
"import re"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6df0253f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up tools\n",
|
||||
"\n",
|
||||
"We will create one legitimate tool (search) and then 99 fake tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "becda2a1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define which tools the agent can use to answer user queries\n",
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"search_tool = Tool(\n",
|
||||
" name = \"Search\",\n",
|
||||
" func=search.run,\n",
|
||||
" description=\"useful for when you need to answer questions about current events\"\n",
|
||||
" )\n",
|
||||
"def fake_func(inp: str) -> str:\n",
|
||||
" return \"foo\"\n",
|
||||
"fake_tools = [\n",
|
||||
" Tool(\n",
|
||||
" name=f\"foo-{i}\", \n",
|
||||
" func=fake_func, \n",
|
||||
" description=f\"a silly function that you can use to get more information about the number {i}\"\n",
|
||||
" ) \n",
|
||||
" for i in range(99)\n",
|
||||
"]\n",
|
||||
"ALL_TOOLS = [search_tool] + fake_tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "17362717",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tool Retriever\n",
|
||||
"\n",
|
||||
"We will use a vectorstore to create embeddings for each tool description. Then, for an incoming query we can create embeddings for that query and do a similarity search for relevant tools."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "77c4be4b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.vectorstores import FAISS\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.schema import Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9092a158",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = [Document(page_content=t.description, metadata={\"index\": i}) for i, t in enumerate(ALL_TOOLS)]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "affc4e56",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store = FAISS.from_documents(docs, OpenAIEmbeddings())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "735a7566",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = vector_store.as_retriever()\n",
|
||||
"\n",
|
||||
"def get_tools(query):\n",
|
||||
" docs = retriever.get_relevant_documents(query)\n",
|
||||
" return [ALL_TOOLS[d.metadata[\"index\"]] for d in docs]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7699afd7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can now test this retriever to see if it seems to work."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "425f2886",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Tool(name='Search', description='useful for when you need to answer questions about current events', return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x114b28a90>, func=<bound method SerpAPIWrapper.run of SerpAPIWrapper(search_engine=<class 'serpapi.google_search.GoogleSearch'>, params={'engine': 'google', 'google_domain': 'google.com', 'gl': 'us', 'hl': 'en'}, serpapi_api_key='c657176b327b17e79b55306ab968d164ee2369a7c7fa5b3f8a5f7889903de882', aiosession=None)>, coroutine=None),\n",
|
||||
" Tool(name='foo-95', description='a silly function that you can use to get more information about the number 95', return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x114b28a90>, func=<function fake_func at 0x15e5bd1f0>, coroutine=None),\n",
|
||||
" Tool(name='foo-12', description='a silly function that you can use to get more information about the number 12', return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x114b28a90>, func=<function fake_func at 0x15e5bd1f0>, coroutine=None),\n",
|
||||
" Tool(name='foo-15', description='a silly function that you can use to get more information about the number 15', return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x114b28a90>, func=<function fake_func at 0x15e5bd1f0>, coroutine=None)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"get_tools(\"whats the weather?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "4036dd19",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Tool(name='foo-13', description='a silly function that you can use to get more information about the number 13', return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x114b28a90>, func=<function fake_func at 0x15e5bd1f0>, coroutine=None),\n",
|
||||
" Tool(name='foo-12', description='a silly function that you can use to get more information about the number 12', return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x114b28a90>, func=<function fake_func at 0x15e5bd1f0>, coroutine=None),\n",
|
||||
" Tool(name='foo-14', description='a silly function that you can use to get more information about the number 14', return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x114b28a90>, func=<function fake_func at 0x15e5bd1f0>, coroutine=None),\n",
|
||||
" Tool(name='foo-11', description='a silly function that you can use to get more information about the number 11', return_direct=False, verbose=False, callback_manager=<langchain.callbacks.shared.SharedCallbackManager object at 0x114b28a90>, func=<function fake_func at 0x15e5bd1f0>, coroutine=None)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"get_tools(\"whats the number 13?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2e7a075c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prompt Template\n",
|
||||
"\n",
|
||||
"The prompt template is pretty standard, because we're not actually changing that much logic in the actual prompt template, but rather we are just changing how retrieval is done."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "339b1bb8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set up the base template\n",
|
||||
"template = \"\"\"Answer the following questions as best you can, but speaking as a pirate might speak. You have access to the following tools:\n",
|
||||
"\n",
|
||||
"{tools}\n",
|
||||
"\n",
|
||||
"Use the following format:\n",
|
||||
"\n",
|
||||
"Question: the input question you must answer\n",
|
||||
"Thought: you should always think about what to do\n",
|
||||
"Action: the action to take, should be one of [{tool_names}]\n",
|
||||
"Action Input: the input to the action\n",
|
||||
"Observation: the result of the action\n",
|
||||
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
|
||||
"Thought: I now know the final answer\n",
|
||||
"Final Answer: the final answer to the original input question\n",
|
||||
"\n",
|
||||
"Begin! Remember to speak as a pirate when giving your final answer. Use lots of \"Arg\"s\n",
|
||||
"\n",
|
||||
"Question: {input}\n",
|
||||
"{agent_scratchpad}\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1583acdc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The custom prompt template now has the concept of a tools_getter, which we call on the input to select the tools to use"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"id": "fd969d31",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Callable\n",
|
||||
"# Set up a prompt template\n",
|
||||
"class CustomPromptTemplate(StringPromptTemplate):\n",
|
||||
" # The template to use\n",
|
||||
" template: str\n",
|
||||
" ############## NEW ######################\n",
|
||||
" # The list of tools available\n",
|
||||
" tools_getter: Callable\n",
|
||||
" \n",
|
||||
" def format(self, **kwargs) -> str:\n",
|
||||
" # Get the intermediate steps (AgentAction, Observation tuples)\n",
|
||||
" # Format them in a particular way\n",
|
||||
" intermediate_steps = kwargs.pop(\"intermediate_steps\")\n",
|
||||
" thoughts = \"\"\n",
|
||||
" for action, observation in intermediate_steps:\n",
|
||||
" thoughts += action.log\n",
|
||||
" thoughts += f\"\\nObservation: {observation}\\nThought: \"\n",
|
||||
" # Set the agent_scratchpad variable to that value\n",
|
||||
" kwargs[\"agent_scratchpad\"] = thoughts\n",
|
||||
" ############## NEW ######################\n",
|
||||
" tools = self.tools_getter(kwargs[\"input\"])\n",
|
||||
" # Create a tools variable from the list of tools provided\n",
|
||||
" kwargs[\"tools\"] = \"\\n\".join([f\"{tool.name}: {tool.description}\" for tool in tools])\n",
|
||||
" # Create a list of tool names for the tools provided\n",
|
||||
" kwargs[\"tool_names\"] = \", \".join([tool.name for tool in tools])\n",
|
||||
" return self.template.format(**kwargs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"id": "798ef9fb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt = CustomPromptTemplate(\n",
|
||||
" template=template,\n",
|
||||
" tools_getter=get_tools,\n",
|
||||
" # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically\n",
|
||||
" # This includes the `intermediate_steps` variable because that is needed\n",
|
||||
" input_variables=[\"input\", \"intermediate_steps\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ef3a1af3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Output Parser\n",
|
||||
"\n",
|
||||
"The output parser is unchanged from the previous notebook, since we are not changing anything about the output format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"id": "7c6fe0d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class CustomOutputParser(AgentOutputParser):\n",
|
||||
" \n",
|
||||
" def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:\n",
|
||||
" # Check if agent should finish\n",
|
||||
" if \"Final Answer:\" in llm_output:\n",
|
||||
" return AgentFinish(\n",
|
||||
" # Return values is generally always a dictionary with a single `output` key\n",
|
||||
" # It is not recommended to try anything else at the moment :)\n",
|
||||
" return_values={\"output\": llm_output.split(\"Final Answer:\")[-1].strip()},\n",
|
||||
" log=llm_output,\n",
|
||||
" )\n",
|
||||
" # Parse out the action and action input\n",
|
||||
" regex = r\"Action\\s*\\d*\\s*:(.*?)\\nAction\\s*\\d*\\s*Input\\s*\\d*\\s*:[\\s]*(.*)\"\n",
|
||||
" match = re.search(regex, llm_output, re.DOTALL)\n",
|
||||
" if not match:\n",
|
||||
" raise ValueError(f\"Could not parse LLM output: `{llm_output}`\")\n",
|
||||
" action = match.group(1).strip()\n",
|
||||
" action_input = match.group(2)\n",
|
||||
" # Return the action and action input\n",
|
||||
" return AgentAction(tool=action, tool_input=action_input.strip(\" \").strip('\"'), log=llm_output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"id": "d278706a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"output_parser = CustomOutputParser()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "170587b1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up LLM, stop sequence, and the agent\n",
|
||||
"\n",
|
||||
"Also the same as the previous notebook"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"id": "f9d4c374",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"id": "9b1cc2a2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# LLM chain consisting of the LLM and a prompt\n",
|
||||
"llm_chain = LLMChain(llm=llm, prompt=prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"id": "e4f5092f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tool_names = [tool.name for tool in tools]\n",
|
||||
"agent = LLMSingleActionAgent(\n",
|
||||
" llm_chain=llm_chain, \n",
|
||||
" output_parser=output_parser,\n",
|
||||
" stop=[\"\\nObservation:\"], \n",
|
||||
" allowed_tools=tool_names\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "aa8a5326",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use the Agent\n",
|
||||
"\n",
|
||||
"Now we can use it!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"id": "490604e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"id": "653b1617",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out what the weather is in SF\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Weather in SF\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3mMostly cloudy skies early, then partly cloudy in the afternoon. High near 60F. ENE winds shifting to W at 10 to 15 mph. Humidity71%. UV Index6 of 10.\u001b[0m\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 'Arg, 'tis mostly cloudy skies early, then partly cloudy in the afternoon. High near 60F. ENE winds shiftin' to W at 10 to 15 mph. Humidity71%. UV Index6 of 10.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"'Arg, 'tis mostly cloudy skies early, then partly cloudy in the afternoon. High near 60F. ENE winds shiftin' to W at 10 to 15 mph. Humidity71%. UV Index6 of 10.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 60,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"What's the weather in SF?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2481ee76",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "18784188d7ecd866c0586ac068b02361a6896dc3a29b64f5cc957f09c590acef"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -42,7 +42,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 11,
|
||||
"id": "9af9734e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -67,7 +67,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 28,
|
||||
"id": "becda2a1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -99,7 +99,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 7,
|
||||
"id": "339b1bb8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -128,7 +128,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 22,
|
||||
"id": "fd969d31",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -159,7 +159,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 23,
|
||||
"id": "798ef9fb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -187,7 +187,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 13,
|
||||
"id": "7c6fe0d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -204,7 +204,7 @@
|
||||
" log=llm_output,\n",
|
||||
" )\n",
|
||||
" # Parse out the action and action input\n",
|
||||
" regex = r\"Action\\s*\\d*\\s*:(.*?)\\nAction\\s*\\d*\\s*Input\\s*\\d*\\s*:[\\s]*(.*)\"\n",
|
||||
" regex = r\"Action: (.*?)[\\n]*Action Input:[\\s]*(.*)\"\n",
|
||||
" match = re.search(regex, llm_output, re.DOTALL)\n",
|
||||
" if not match:\n",
|
||||
" raise ValueError(f\"Could not parse LLM output: `{llm_output}`\")\n",
|
||||
@@ -216,7 +216,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 14,
|
||||
"id": "d278706a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -236,7 +236,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 16,
|
||||
"id": "f9d4c374",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -268,7 +268,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 24,
|
||||
"id": "9b1cc2a2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -279,7 +279,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 25,
|
||||
"id": "e4f5092f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -305,7 +305,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 26,
|
||||
"id": "490604e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -315,7 +315,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 27,
|
||||
"id": "653b1617",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -326,12 +326,11 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out the population of Canada in 2023\n",
|
||||
"Action: Search\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction: Search\n",
|
||||
"Action Input: Population of Canada in 2023\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3mThe current population of Canada is 38,658,314 as of Wednesday, April 12, 2023, based on Worldometer elaboration of the latest United Nations data.\u001b[0m\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Arrr, there be 38,658,314 people livin' in Canada as of 2023!\u001b[0m\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3m38,648,380\u001b[0m\u001b[32;1m\u001b[1;3m That's a lot of people!\n",
|
||||
"Final Answer: Arrr, there be 38,648,380 people livin' in Canada come 2023!\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -339,10 +338,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Arrr, there be 38,658,314 people livin' in Canada as of 2023!\""
|
||||
"\"Arrr, there be 38,648,380 people livin' in Canada come 2023!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -351,203 +350,10 @@
|
||||
"agent_executor.run(\"How many people live in canada as of 2023?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d5b4a078",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Adding Memory\n",
|
||||
"\n",
|
||||
"If you want to add memory to the agent, you'll need to:\n",
|
||||
"\n",
|
||||
"1. Add a place in the custom prompt for the chat_history\n",
|
||||
"2. Add a memory object to the agent executor."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "94fffda1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set up the base template\n",
|
||||
"template_with_history = \"\"\"Answer the following questions as best you can, but speaking as a pirate might speak. You have access to the following tools:\n",
|
||||
"\n",
|
||||
"{tools}\n",
|
||||
"\n",
|
||||
"Use the following format:\n",
|
||||
"\n",
|
||||
"Question: the input question you must answer\n",
|
||||
"Thought: you should always think about what to do\n",
|
||||
"Action: the action to take, should be one of [{tool_names}]\n",
|
||||
"Action Input: the input to the action\n",
|
||||
"Observation: the result of the action\n",
|
||||
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
|
||||
"Thought: I now know the final answer\n",
|
||||
"Final Answer: the final answer to the original input question\n",
|
||||
"\n",
|
||||
"Begin! Remember to speak as a pirate when giving your final answer. Use lots of \"Arg\"s\n",
|
||||
"\n",
|
||||
"Previous conversation history:\n",
|
||||
"{history}\n",
|
||||
"\n",
|
||||
"New question: {input}\n",
|
||||
"{agent_scratchpad}\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "f58488d7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt_with_history = CustomPromptTemplate(\n",
|
||||
" template=template_with_history,\n",
|
||||
" tools=tools,\n",
|
||||
" # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically\n",
|
||||
" # This includes the `intermediate_steps` variable because that is needed\n",
|
||||
" input_variables=[\"input\", \"intermediate_steps\", \"history\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "d28d4b5a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_chain = LLMChain(llm=llm, prompt=prompt_with_history)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "3e37b32a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tool_names = [tool.name for tool in tools]\n",
|
||||
"agent = LLMSingleActionAgent(\n",
|
||||
" llm_chain=llm_chain, \n",
|
||||
" output_parser=output_parser,\n",
|
||||
" stop=[\"\\nObservation:\"], \n",
|
||||
" allowed_tools=tool_names\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "97ea1bce",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import ConversationBufferWindowMemory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"id": "b5ad69ce",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"memory=ConversationBufferWindowMemory(k=2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"id": "b7b5c9b1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"id": "5ec4c39b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out the population of Canada in 2023\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Population of Canada in 2023\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3mThe current population of Canada is 38,658,314 as of Wednesday, April 12, 2023, based on Worldometer elaboration of the latest United Nations data.\u001b[0m\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Arrr, there be 38,658,314 people livin' in Canada as of 2023!\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Arrr, there be 38,658,314 people livin' in Canada as of 2023!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"How many people live in canada as of 2023?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"id": "b2ba45bb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out how many people live in Mexico.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: How many people live in Mexico as of 2023?\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3mThe current population of Mexico is 132,679,922 as of Tuesday, April 11, 2023, based on Worldometer elaboration of the latest United Nations data. Mexico 2020 ...\u001b[0m\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: Arrr, there be 132,679,922 people livin' in Mexico as of 2023!\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Arrr, there be 132,679,922 people livin' in Mexico as of 2023!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 45,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"how about in mexico?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bd820a7a",
|
||||
"id": "adefb4c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
|
||||
@@ -206,7 +206,7 @@
|
||||
" log=llm_output,\n",
|
||||
" )\n",
|
||||
" # Parse out the action and action input\n",
|
||||
" regex = r\"Action\\s*\\d*\\s*:(.*?)\\nAction\\s*\\d*\\s*Input\\s*\\d*\\s*:[\\s]*(.*)\"\n",
|
||||
" regex = r\"Action: (.*?)[\\n]*Action Input:[\\s]*(.*)\"\n",
|
||||
" match = re.search(regex, llm_output, re.DOTALL)\n",
|
||||
" if not match:\n",
|
||||
" raise ValueError(f\"Could not parse LLM output: `{llm_output}`\")\n",
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 23,
|
||||
"id": "9af9734e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -53,7 +53,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 24,
|
||||
"id": "becda2a1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -70,7 +70,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 25,
|
||||
"id": "339b1bb8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -99,7 +99,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 26,
|
||||
"id": "e21d2098",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -145,7 +145,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 27,
|
||||
"id": "9b1cc2a2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -155,7 +155,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 28,
|
||||
"id": "e4f5092f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -166,7 +166,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 29,
|
||||
"id": "490604e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -176,7 +176,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 31,
|
||||
"id": "653b1617",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -190,9 +190,9 @@
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out the population of Canada\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Population of Canada 2023\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe current population of Canada is 38,661,927 as of Sunday, April 16, 2023, based on Worldometer elaboration of the latest United Nations data.\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe current population of Canada is 38,610,447 as of Saturday, February 18, 2023, based on Worldometer elaboration of the latest United Nations data. Canada 2020 population is estimated at 37,742,154 people at mid year according to UN data.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Arrr, Canada be havin' 38,661,927 people livin' there as of 2023!\u001b[0m\n",
|
||||
"Final Answer: Arrr, Canada be havin' 38,610,447 scallywags livin' there as of 2023!\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -200,10 +200,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Arrr, Canada be havin' 38,661,927 people livin' there as of 2023!\""
|
||||
"\"Arrr, Canada be havin' 38,610,447 scallywags livin' there as of 2023!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -223,7 +223,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 32,
|
||||
"id": "43dbfa2f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -244,7 +244,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 33,
|
||||
"id": "0f087313",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -254,7 +254,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 34,
|
||||
"id": "92c75a10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -264,7 +264,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 35,
|
||||
"id": "ac5b83bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -274,7 +274,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 36,
|
||||
"id": "c960e4ff",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -285,16 +285,12 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I should look for recent population estimates.\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out the population of Canada in 2023.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Canada population 2023\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m39,566,248\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I should double check this number.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Canada population estimates 2023\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mCanada's population was estimated at 39,566,248 on January 1, 2023, after a record population growth of 1,050,110 people from January 1, 2022, to January 1, 2023.\u001b[0m\n",
|
||||
"Action Input: Population of Canada in 2023\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe current population of Canada is 38,610,447 as of Saturday, February 18, 2023, based on Worldometer elaboration of the latest United Nations data. Canada 2020 population is estimated at 37,742,154 people at mid year according to UN data.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: La popolazione del Canada è stata stimata a 39.566.248 il 1° gennaio 2023, dopo un record di crescita demografica di 1.050.110 persone dal 1° gennaio 2022 al 1° gennaio 2023.\u001b[0m\n",
|
||||
"Final Answer: La popolazione del Canada nel 2023 è stimata in 38.610.447 persone.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -302,10 +298,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'La popolazione del Canada è stata stimata a 39.566.248 il 1° gennaio 2023, dopo un record di crescita demografica di 1.050.110 persone dal 1° gennaio 2022 al 1° gennaio 2023.'"
|
||||
"'La popolazione del Canada nel 2023 è stimata in 38.610.447 persone.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 36,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
||||
@@ -28,15 +28,7 @@
|
||||
"execution_count": 2,
|
||||
"id": "f65308ab",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to default session, using empty session: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /sessions (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x10a1767c0>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import Tool\n",
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
@@ -96,20 +88,7 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /chain-runs (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x13fab40d0>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Hello Bob! How can I assist you today?\"\n",
|
||||
@@ -145,20 +124,7 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /chain-runs (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x13fab44f0>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Your name is Bob.\"\n",
|
||||
@@ -201,24 +167,10 @@
|
||||
" \"action\": \"Current Search\",\n",
|
||||
" \"action_input\": \"Thai food dinner recipes\"\n",
|
||||
"}\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m59 easy Thai recipes for any night of the week · Marion Grasby's Thai spicy chilli and basil fried rice · Thai curry noodle soup · Marion Grasby's Thai Spicy ...\u001b[0m\n",
|
||||
"Thought:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /chain-runs (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x13fae8be0>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m{\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m59 easy Thai recipes for any night of the week · Marion Grasby's Thai spicy chilli and basil fried rice · Thai curry noodle soup · Marion Grasby's ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Here are some Thai food dinner recipes you can make this week: Thai spicy chilli and basil fried rice, Thai curry noodle soup, and Thai Spicy ... (59 recipes in total).\"\n",
|
||||
" \"action_input\": \"Here are some Thai food dinner recipes you can make this week: Thai spicy chilli and basil fried rice, Thai curry noodle soup, and many more. You can find 59 easy Thai recipes for any night of the week on Marion Grasby's website.\"\n",
|
||||
"}\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
@@ -227,7 +179,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Here are some Thai food dinner recipes you can make this week: Thai spicy chilli and basil fried rice, Thai curry noodle soup, and Thai Spicy ... (59 recipes in total).'"
|
||||
"\"Here are some Thai food dinner recipes you can make this week: Thai spicy chilli and basil fried rice, Thai curry noodle soup, and many more. You can find 59 easy Thai recipes for any night of the week on Marion Grasby's website.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
@@ -258,25 +210,11 @@
|
||||
" \"action_input\": \"who won the world cup in 1978\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mArgentina national football team\u001b[0m\n",
|
||||
"Thought:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /chain-runs (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x13fae86d0>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe Argentina national football team represents Argentina in men's international football and is administered by the Argentine Football Association, the governing body for football in Argentina. Nicknamed La Albiceleste, they are the reigning world champions, having won the most recent World Cup in 2022.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"The last letter in your name is 'b', and the winner of the 1978 World Cup was the Argentina national football team.\"\n",
|
||||
" \"action_input\": \"The last letter in your name is 'b'. The Argentina national football team won the World Cup in 1978.\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
@@ -286,7 +224,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"The last letter in your name is 'b', and the winner of the 1978 World Cup was the Argentina national football team.\""
|
||||
"\"The last letter in your name is 'b'. The Argentina national football team won the World Cup in 1978.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
@@ -315,24 +253,10 @@
|
||||
" \"action\": \"Current Search\",\n",
|
||||
" \"action_input\": \"weather in pomfret\"\n",
|
||||
"}\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m10 Day Weather-Pomfret, CT ; Sun 16. 64° · 50°. 24% · NE 7 mph ; Mon 17. 58° · 45°. 70% · ESE 8 mph ; Tue 18. 57° · 37°. 8% · WSW 15 mph.\u001b[0m\n",
|
||||
"Thought:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /chain-runs (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x13fa9d7f0>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m{\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mMostly cloudy with gusty winds developing during the afternoon. A few flurries or snow showers possible. High near 40F. Winds NNW at 20 to 30 mph.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"The weather in Pomfret, CT for the next 10 days is as follows: Sun 16. 64° · 50°. 24% · NE 7 mph ; Mon 17. 58° · 45°. 70% · ESE 8 mph ; Tue 18. 57° · 37°. 8% · WSW 15 mph.\"\n",
|
||||
" \"action_input\": \"The weather in Pomfret is mostly cloudy with gusty winds developing during the afternoon. A few flurries or snow showers are possible. High near 40F. Winds NNW at 20 to 30 mph.\"\n",
|
||||
"}\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
@@ -341,7 +265,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The weather in Pomfret, CT for the next 10 days is as follows: Sun 16. 64° · 50°. 24% · NE 7 mph ; Mon 17. 58° · 45°. 70% · ESE 8 mph ; Tue 18. 57° · 37°. 8% · WSW 15 mph.'"
|
||||
"'The weather in Pomfret is mostly cloudy with gusty winds developing during the afternoon. A few flurries or snow showers are possible. High near 40F. Winds NNW at 20 to 30 mph.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
"from langchain.agents import AgentType\n",
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain import OpenAI\n",
|
||||
"from langchain.utilities import SerpAPIWrapper\n",
|
||||
"from langchain.utilities import GoogleSearchAPIWrapper\n",
|
||||
"from langchain.agents import initialize_agent"
|
||||
]
|
||||
},
|
||||
@@ -34,7 +34,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"search = GoogleSearchAPIWrapper()\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name = \"Current Search\",\n",
|
||||
@@ -149,12 +149,8 @@
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Thought: Do I need to use a tool? Yes\n",
|
||||
"Action: Current Search\n",
|
||||
"Action Input: Thai food dinner recipes\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m59 easy Thai recipes for any night of the week · Marion Grasby's Thai spicy chilli and basil fried rice · Thai curry noodle soup · Marion Grasby's Thai Spicy ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m Do I need to use a tool? No\n",
|
||||
"AI: Here are some great Thai dinner recipes you can try this week: Marion Grasby's Thai Spicy Chilli and Basil Fried Rice, Thai Curry Noodle Soup, Thai Green Curry with Coconut Rice, Thai Red Curry with Vegetables, and Thai Coconut Soup. I hope you enjoy them!\u001b[0m\n",
|
||||
"Thought: Do I need to use a tool? No\n",
|
||||
"AI: If you like Thai food, some great dinner options this week could include Thai green curry, Pad Thai, or a Thai-style stir-fry. You could also try making a Thai-style soup or salad. Enjoy!\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -162,7 +158,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Here are some great Thai dinner recipes you can try this week: Marion Grasby's Thai Spicy Chilli and Basil Fried Rice, Thai Curry Noodle Soup, Thai Green Curry with Coconut Rice, Thai Red Curry with Vegetables, and Thai Coconut Soup. I hope you enjoy them!\""
|
||||
"'If you like Thai food, some great dinner options this week could include Thai green curry, Pad Thai, or a Thai-style stir-fry. You could also try making a Thai-style soup or salad. Enjoy!'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
@@ -191,9 +187,9 @@
|
||||
"Thought: Do I need to use a tool? Yes\n",
|
||||
"Action: Current Search\n",
|
||||
"Action Input: Who won the World Cup in 1978\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mArgentina national football team\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe Cup was won by the host nation, Argentina, who defeated the Netherlands 3–1 in the final, after extra time. The final was held at River Plate's home stadium ... Amid Argentina's celebrations, there was sympathy for the Netherlands, runners-up for the second tournament running, following a 3-1 final defeat at the Estadio ... The match was won by the Argentine squad in extra time by a score of 3–1. Mario Kempes, who finished as the tournament's top scorer, was named the man of the ... May 21, 2022 ... Argentina won the World Cup for the first time in their history, beating Netherlands 3-1 in the final. This edition of the World Cup was full of ... The adidas Golden Ball is presented to the best player at each FIFA World Cup finals. Those who finish as runners-up in the vote receive the adidas Silver ... Holders West Germany failed to beat Holland and Italy and were eliminated when Berti Vogts' own goal gave Austria a 3-2 victory. Holland thrashed the Austrians ... Jun 14, 2018 ... On a clear afternoon on 1 June 1978 at the revamped El Monumental stadium in Buenos Aires' Belgrano barrio, several hundred children in white ... Dec 15, 2022 ... The tournament couldn't have gone better for the ruling junta. Argentina went on to win the championship, defeating the Netherlands, 3-1, in the ... Nov 9, 2022 ... Host: Argentina Teams: 16. Format: Group stage, second round, third-place playoff, final. Matches: 38. Goals: 102. Winner: Argentina Feb 19, 2009 ... Argentina sealed their first World Cup win on home soil when they defeated the Netherlands in an exciting final that went to extra-time. For the ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m Do I need to use a tool? No\n",
|
||||
"AI: The last letter in your name is \"b\" and the winner of the 1978 World Cup was the Argentina national football team.\u001b[0m\n",
|
||||
"AI: The last letter in your name is 'b'. Argentina won the World Cup in 1978.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -201,7 +197,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The last letter in your name is \"b\" and the winner of the 1978 World Cup was the Argentina national football team.'"
|
||||
"\"The last letter in your name is 'b'. Argentina won the World Cup in 1978.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
@@ -230,9 +226,9 @@
|
||||
"Thought: Do I need to use a tool? Yes\n",
|
||||
"Action: Current Search\n",
|
||||
"Action Input: Current temperature in Pomfret\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mPartly cloudy skies. High around 70F. Winds W at 5 to 10 mph. Humidity41%.\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mA mixture of rain and snow showers. High 39F. Winds NNW at 5 to 10 mph. Chance of precip 50%. Snow accumulations less than one inch. Pomfret, CT Weather Forecast, with current conditions, wind, air quality, and what to expect for the next 3 days. Pomfret Center Weather Forecasts. ... Pomfret Center, CT Weather Conditionsstar_ratehome ... Tomorrow's temperature is forecast to be COOLER than today. It is 46 degrees fahrenheit, or 8 degrees celsius and feels like 46 degrees fahrenheit. The barometric pressure is 29.78 - measured by inch of mercury units - ... Pomfret Weather Forecasts. ... Pomfret, MD Weather Conditionsstar_ratehome ... Tomorrow's temperature is forecast to be MUCH COOLER than today. Additional Headlines. En Español · Share |. Current conditions at ... Pomfret CT. Tonight ... Past Weather Information · Interactive Forecast Map. Pomfret MD detailed current weather report for 20675 in Charles county, Maryland. ... Pomfret, MD weather condition is Mostly Cloudy and 43°F. Mostly Cloudy. Hazardous Weather Conditions. Hazardous Weather Outlook · En Español · Share |. Current conditions at ... South Pomfret VT. Tonight. Pomfret Center, CT Weather. Current Report for Thu Jan 5 2023. As of 2:00 PM EST. 5-Day Forecast | Road Conditions. 45°F 7°c. Feels Like 44°F. Pomfret Center CT. Today. Today: Areas of fog before 9am. Otherwise, cloudy, with a ... Otherwise, cloudy, with a temperature falling to around 33 by 5pm.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m Do I need to use a tool? No\n",
|
||||
"AI: The current temperature in Pomfret is around 70F with partly cloudy skies and winds W at 5 to 10 mph. The humidity is 41%.\u001b[0m\n",
|
||||
"AI: The current temperature in Pomfret is 45°F (7°C) and it feels like 44°F.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -240,7 +236,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The current temperature in Pomfret is around 70F with partly cloudy skies and winds W at 5 to 10 mph. The humidity is 41%.'"
|
||||
"'The current temperature in Pomfret is 45°F (7°C) and it feels like 44°F.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"id": "07e96d99",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -41,7 +41,7 @@
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"llm_math_chain = LLMMathChain(llm=llm, verbose=True)\n",
|
||||
"db = SQLDatabase.from_uri(\"sqlite:///../../../../../notebooks/Chinook.db\")\n",
|
||||
"db = SQLDatabase.from_uri(\"sqlite:///../../../../notebooks/Chinook.db\")\n",
|
||||
"db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
@@ -64,7 +64,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"id": "a069c4b6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -74,7 +74,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"id": "e603cd7d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -88,24 +88,30 @@
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out who Leo DiCaprio's girlfriend is and then calculate her age raised to the 0.43 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Who is Leo DiCaprio's girlfriend?\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mDiCaprio met actor Camila Morrone in December 2017, when she was 20 and he was 43. They were spotted at Coachella and went on multiple vacations together. Some reports suggested that DiCaprio was ready to ask Morrone to marry him. The couple made their red carpet debut at the 2020 Academy Awards.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate Camila Morrone's age raised to the 0.43 power.\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mCamila Morrone\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out Camila Morrone's age\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"How old is Camila Morrone?\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m25 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate 25 raised to the 0.43 power\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 21^0.43\u001b[0m\n",
|
||||
"Action Input: 25^0.43\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMMathChain chain...\u001b[0m\n",
|
||||
"21^0.43\u001b[32;1m\u001b[1;3m\n",
|
||||
"```text\n",
|
||||
"21**0.43\n",
|
||||
"25^0.43\u001b[32;1m\u001b[1;3m\n",
|
||||
"```python\n",
|
||||
"import math\n",
|
||||
"print(math.pow(25, 0.43))\n",
|
||||
"```\n",
|
||||
"...numexpr.evaluate(\"21**0.43\")...\n",
|
||||
"\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m3.7030049853137306\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m3.991298452658078\n",
|
||||
"\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 3.7030049853137306\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: Camila Morrone is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is 3.7030049853137306.\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 3.991298452658078\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Camila Morrone is 25 years old and her age raised to the 0.43 power is 3.991298452658078.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -113,10 +119,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Camila Morrone is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is 3.7030049853137306.\""
|
||||
"'Camila Morrone is 25 years old and her age raised to the 0.43 power is 3.991298452658078.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -127,7 +133,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"id": "a5c07010",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -141,36 +147,21 @@
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out the artist's full name and then search the FooBar database for their albums.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"The Storm Before the Calm\" artist\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe Storm Before the Calm (stylized in all lowercase) is the tenth (and eighth international) studio album by Canadian-American singer-songwriter Alanis Morissette, released June 17, 2022, via Epiphany Music and Thirty Tigers, as well as by RCA Records in Europe.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now need to search the FooBar database for Alanis Morissette's albums.\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe Storm Before the Calm (stylized in all lowercase) is the tenth (and eighth international) studio album by Canadian-American singer-songwriter Alanis ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now need to search the FooBar database for Alanis Morissette's albums\n",
|
||||
"Action: FooBar DB\n",
|
||||
"Action Input: What albums by Alanis Morissette are in the FooBar database?\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
||||
"What albums by Alanis Morissette are in the FooBar database?\n",
|
||||
"SQLQuery:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/harrisonchase/workplace/langchain/langchain/sql_database.py:191: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.\n",
|
||||
" sample_rows = connection.execute(command)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m SELECT \"Title\" FROM \"Album\" INNER JOIN \"Artist\" ON \"Album\".\"ArtistId\" = \"Artist\".\"ArtistId\" WHERE \"Name\" = 'Alanis Morissette' LIMIT 5;\u001b[0m\n",
|
||||
"What albums by Alanis Morissette are in the FooBar database? \n",
|
||||
"SQLQuery:\u001b[32;1m\u001b[1;3m SELECT Title FROM Album INNER JOIN Artist ON Album.ArtistId = Artist.ArtistId WHERE Artist.Name = 'Alanis Morissette' LIMIT 5;\u001b[0m\n",
|
||||
"SQLResult: \u001b[33;1m\u001b[1;3m[('Jagged Little Pill',)]\u001b[0m\n",
|
||||
"Answer:\u001b[32;1m\u001b[1;3m The albums by Alanis Morissette in the FooBar database are Jagged Little Pill.\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3m The albums by Alanis Morissette in the FooBar database are Jagged Little Pill.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: The artist who released the album 'The Storm Before the Calm' is Alanis Morissette and the albums of hers in the FooBar database are Jagged Little Pill.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The artist who released the album The Storm Before the Calm is Alanis Morissette and the albums of theirs in the FooBar database are Jagged Little Pill.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -178,10 +169,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"The artist who released the album 'The Storm Before the Calm' is Alanis Morissette and the albums of hers in the FooBar database are Jagged Little Pill.\""
|
||||
"'The artist who released the album The Storm Before the Calm is Alanis Morissette and the albums of theirs in the FooBar database are Jagged Little Pill.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 1,
|
||||
"id": "ac561cc4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -34,7 +34,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 2,
|
||||
"id": "07e96d99",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -43,7 +43,7 @@
|
||||
"llm1 = OpenAI(temperature=0)\n",
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"llm_math_chain = LLMMathChain(llm=llm1, verbose=True)\n",
|
||||
"db = SQLDatabase.from_uri(\"sqlite:///../../../../../notebooks/Chinook.db\")\n",
|
||||
"db = SQLDatabase.from_uri(\"sqlite:///../../../../notebooks/Chinook.db\")\n",
|
||||
"db_chain = SQLDatabaseChain(llm=llm1, database=db, verbose=True)\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
@@ -66,7 +66,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 3,
|
||||
"id": "a069c4b6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -76,7 +76,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 4,
|
||||
"id": "e603cd7d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -92,34 +92,37 @@
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Search\",\n",
|
||||
" \"action_input\": \"Leo DiCaprio girlfriend\"\n",
|
||||
" \"action_input\": \"Who is Leo DiCaprio's girlfriend?\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mGigi Hadid: 2022 Leo and Gigi were first linked back in September 2022, when a source told Us Weekly that Leo had his “sights set\" on her (alarming way to put it, but okay).\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mFor the second question, I need to calculate the age raised to the 0.43 power. I will use the calculator tool.\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mCamila Morrone\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mFor the second question, I need to use the calculator tool to raise her current age to the 0.43 power.\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Calculator\",\n",
|
||||
" \"action_input\": \"((2022-1995)^0.43)\"\n",
|
||||
" \"action_input\": \"22.0^(0.43)\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMMathChain chain...\u001b[0m\n",
|
||||
"((2022-1995)^0.43)\u001b[32;1m\u001b[1;3m\n",
|
||||
"```text\n",
|
||||
"(2022-1995)**0.43\n",
|
||||
"22.0^(0.43)\u001b[32;1m\u001b[1;3m\n",
|
||||
"```python\n",
|
||||
"import math\n",
|
||||
"print(math.pow(22.0, 0.43))\n",
|
||||
"```\n",
|
||||
"...numexpr.evaluate(\"(2022-1995)**0.43\")...\n",
|
||||
"\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m4.125593352125936\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m3.777824273683966\n",
|
||||
"\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 4.125593352125936\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 3.777824273683966\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI now know the final answer.\n",
|
||||
"Final Answer: Gigi Hadid is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is approximately 4.13.\u001b[0m\n",
|
||||
"Final Answer: Camila Morrone, 3.777824273683966.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -127,10 +130,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Gigi Hadid is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is approximately 4.13.\""
|
||||
"'Camila Morrone, 3.777824273683966.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -141,7 +144,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 5,
|
||||
"id": "a5c07010",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -153,7 +156,7 @@
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mQuestion: What is the full name of the artist who recently released an album called 'The Storm Before the Calm' and are they in the FooBar database? If so, what albums of theirs are in the FooBar database?\n",
|
||||
"Thought: I should use the Search tool to find the answer to the first part of the question and then use the FooBar DB tool to find the answer to the second part.\n",
|
||||
"Thought: I should use the Search tool to find the answer to the first part of the question and then use the FooBar DB tool to find the answer to the second part of the question.\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
@@ -163,7 +166,7 @@
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mAlanis Morissette\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mNow that I know the artist's name, I can use the FooBar DB tool to find out if they are in the database and what albums of theirs are in it.\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mNow that I have the name of the artist, I can use the FooBar DB tool to find their albums in the database.\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
@@ -175,7 +178,7 @@
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
||||
"What albums does Alanis Morissette have in the database?\n",
|
||||
"What albums does Alanis Morissette have in the database? \n",
|
||||
"SQLQuery:"
|
||||
]
|
||||
},
|
||||
@@ -183,7 +186,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/harrisonchase/workplace/langchain/langchain/sql_database.py:191: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.\n",
|
||||
"/Users/harrisonchase/workplace/langchain/langchain/sql_database.py:141: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.\n",
|
||||
" sample_rows = connection.execute(command)\n"
|
||||
]
|
||||
},
|
||||
@@ -191,14 +194,14 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m SELECT \"Title\" FROM \"Album\" WHERE \"ArtistId\" IN (SELECT \"ArtistId\" FROM \"Artist\" WHERE \"Name\" = 'Alanis Morissette') LIMIT 5;\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m SELECT Title FROM Album WHERE ArtistId IN (SELECT ArtistId FROM Artist WHERE Name = 'Alanis Morissette') LIMIT 5;\u001b[0m\n",
|
||||
"SQLResult: \u001b[33;1m\u001b[1;3m[('Jagged Little Pill',)]\u001b[0m\n",
|
||||
"Answer:\u001b[32;1m\u001b[1;3m Alanis Morissette has the album Jagged Little Pill in the database.\u001b[0m\n",
|
||||
"Answer:\u001b[32;1m\u001b[1;3m Alanis Morissette has the album 'Jagged Little Pill' in the database.\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3m Alanis Morissette has the album Jagged Little Pill in the database.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe artist Alanis Morissette is in the FooBar database and has the album Jagged Little Pill in it.\n",
|
||||
"Final Answer: Alanis Morissette is in the FooBar database and has the album Jagged Little Pill in it.\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3m Alanis Morissette has the album 'Jagged Little Pill' in the database.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI have found the answer to both parts of the question.\n",
|
||||
"Final Answer: The artist who recently released an album called 'The Storm Before the Calm' is Alanis Morissette. The album 'Jagged Little Pill' is in the FooBar database.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -206,10 +209,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Alanis Morissette is in the FooBar database and has the album Jagged Little Pill in it.'"
|
||||
"\"The artist who recently released an album called 'The Storm Before the Calm' is Alanis Morissette. The album 'Jagged Little Pill' is in the FooBar database.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "7e3b513e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -25,12 +25,11 @@
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m Yes.\n",
|
||||
"Follow up: Who is the reigning men's U.S. Open champion?\u001b[0m\n",
|
||||
"Intermediate answer: \u001b[36;1m\u001b[1;3mCarlos Alcaraz Garfia\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mFollow up: Where is Carlos Alcaraz Garfia from?\u001b[0m\n",
|
||||
"Intermediate answer: \u001b[36;1m\u001b[1;3mCarlos Alcaraz won the 2022 Men's single title while Poland's Iga Swiatek won the Women's single title defeating Tunisian's Ons Jabeur.\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mFollow up: Where is Carlos Alcaraz from?\u001b[0m\n",
|
||||
"Intermediate answer: \u001b[36;1m\u001b[1;3mEl Palmar, Spain\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mSo the final answer is: El Palmar, Spain\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"\u001b[1m> Finished AgentExecutor chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -39,7 +38,7 @@
|
||||
"'El Palmar, Spain'"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -62,14 +61,6 @@
|
||||
"self_ask_with_search = initialize_agent(tools, llm, agent=AgentType.SELF_ASK_WITH_SEARCH, verbose=True)\n",
|
||||
"self_ask_with_search.run(\"What is the hometown of the reigning men's U.S. Open champion?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b2e4d6bc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -88,7 +79,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -38,7 +38,6 @@
|
||||
"source": [
|
||||
"from langchain.agents import load_tools\n",
|
||||
"from langchain.agents import initialize_agent\n",
|
||||
"from langchain.agents import AgentType\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 11,
|
||||
"id": "16c4dc59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -45,7 +45,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 12,
|
||||
"id": "46b9489d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -72,7 +72,7 @@
|
||||
"'There are 891 rows in the dataframe.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -83,7 +83,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "a96309be",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -110,7 +110,7 @@
|
||||
"'30 people have more than 3 siblings.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -121,7 +121,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "964a09f7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -143,7 +143,7 @@
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to import the math library\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: import math\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mNone\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I can now calculate the square root\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: math.sqrt(df['Age'].mean())\u001b[0m\n",
|
||||
@@ -160,7 +160,7 @@
|
||||
"'5.449689683556195'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
||||
@@ -1,409 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c7ad998d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Natural Language APIs\n",
|
||||
"\n",
|
||||
"Natural Language API Toolkits (NLAToolkits) permit LangChain Agents to efficiently plan and combine calls across endpoints. This notebook demonstrates a sample composition of the Speak, Klarna, and Spoonacluar APIs.\n",
|
||||
"\n",
|
||||
"For a detailed walkthrough of the OpenAPI chains wrapped within the NLAToolkit, see the [OpenAPI Operation Chain](openapi.ipynb) notebook.\n",
|
||||
"\n",
|
||||
"### First, import dependencies and load the LLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "6593f793",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List, Optional\n",
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain.requests import Requests\n",
|
||||
"from langchain.tools import APIOperation, OpenAPISpec\n",
|
||||
"from langchain.agents import AgentType, Tool, initialize_agent\n",
|
||||
"from langchain.agents.agent_toolkits import NLAToolkit"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "dd720860",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Select the LLM to use. Here, we use text-davinci-003\n",
|
||||
"llm = OpenAI(temperature=0, max_tokens=700) # You can swap between different core LLM's here."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4cadac9d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"### Next, load the Natural Language API Toolkits"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "6b208ab0",
|
||||
"metadata": {
|
||||
"scrolled": true,
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"speak_toolkit = NLAToolkit.from_llm_and_url(llm, \"https://api.speak.com/openapi.yaml\")\n",
|
||||
"klarna_toolkit = NLAToolkit.from_llm_and_url(llm, \"https://www.klarna.com/us/shopping/public/openai/v0/api-docs/\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "16c7336f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create the Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "730a0dc2-b4d0-46d5-a1e9-583803220973",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Slightly tweak the instructions from the default agent\n",
|
||||
"openapi_format_instructions = \"\"\"Use the following format:\n",
|
||||
"\n",
|
||||
"Question: the input question you must answer\n",
|
||||
"Thought: you should always think about what to do\n",
|
||||
"Action: the action to take, should be one of [{tool_names}]\n",
|
||||
"Action Input: what to instruct the AI Action representative.\n",
|
||||
"Observation: The Agent's response\n",
|
||||
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
|
||||
"Thought: I now know the final answer. User can't see any of my observations, API responses, links, or tools.\n",
|
||||
"Final Answer: the final answer to the original input question with the right amount of detail\n",
|
||||
"\n",
|
||||
"When responding with your Final Answer, remember that the person you are responding to CANNOT see any of your Thought/Action/Action Input/Observations, so if there is any relevant information there you need to include it explicitly in your response.\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "40a979c3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"natural_language_tools = speak_toolkit.get_tools() + klarna_toolkit.get_tools()\n",
|
||||
"mrkl = initialize_agent(natural_language_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, \n",
|
||||
" verbose=True, agent_kwargs={\"format_instructions\":openapi_format_instructions})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "794380ba",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out what kind of Italian clothes are available\n",
|
||||
"Action: Open_AI_Klarna_product_Api.productsUsingGET\n",
|
||||
"Action Input: Italian clothes\u001b[0m\n",
|
||||
"Observation: \u001b[31;1m\u001b[1;3mThe API response contains two products from the Alé brand in Italian Blue. The first is the Alé Colour Block Short Sleeve Jersey Men - Italian Blue, which costs $86.49, and the second is the Alé Dolid Flash Jersey Men - Italian Blue, which costs $40.00.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know what kind of Italian clothes are available and how much they cost.\n",
|
||||
"Final Answer: You can buy two products from the Alé brand in Italian Blue for your end of year party. The Alé Colour Block Short Sleeve Jersey Men - Italian Blue costs $86.49, and the Alé Dolid Flash Jersey Men - Italian Blue costs $40.00.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'You can buy two products from the Alé brand in Italian Blue for your end of year party. The Alé Colour Block Short Sleeve Jersey Men - Italian Blue costs $86.49, and the Alé Dolid Flash Jersey Men - Italian Blue costs $40.00.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"mrkl.run(\"I have an end of year party for my Italian class and have to buy some Italian clothes for it\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c61d92a8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using Auth + Adding more Endpoints\n",
|
||||
"\n",
|
||||
"Some endpoints may require user authentication via things like access tokens. Here we show how to pass in the authentication information via the `Requests` wrapper object.\n",
|
||||
"\n",
|
||||
"Since each NLATool exposes a concisee natural language interface to its wrapped API, the top level conversational agent has an easier job incorporating each endpoint to satisfy a user's request."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f0d132cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Adding the Spoonacular endpoints.**\n",
|
||||
"\n",
|
||||
"1. Go to the [Spoonacular API Console](https://spoonacular.com/food-api/console#Profile) and make a free account.\n",
|
||||
"2. Click on `Profile` and copy your API key below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "c2368b9c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"spoonacular_api_key = \"\" # Copy from the API Console"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "fbd97c28-fef6-41b5-9600-a9611a32bfb3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Attempting to load an OpenAPI 3.0.0 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Content-Type. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Accept. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Content-Type. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Accept. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Content-Type. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Accept. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Content-Type. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Accept. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Content-Type. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Content-Type. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Content-Type. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Content-Type. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Accept. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Content-Type. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Accept. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Accept. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Accept. Valid values are ['path', 'query'] Ignoring optional parameter\n",
|
||||
"Unsupported APIPropertyLocation \"header\" for parameter Content-Type. Valid values are ['path', 'query'] Ignoring optional parameter\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"requests = Requests(headers={\"x-api-key\": spoonacular_api_key})\n",
|
||||
"spoonacular_toolkit = NLAToolkit.from_llm_and_url(\n",
|
||||
" llm, \n",
|
||||
" \"https://spoonacular.com/application/frontend/downloads/spoonacular-openapi-3.json\",\n",
|
||||
" requests=requests,\n",
|
||||
" max_text_length=1800, # If you want to truncate the response text\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "81a6edac",
|
||||
"metadata": {
|
||||
"scrolled": true,
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"34 tools loaded.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"natural_language_api_tools = (speak_toolkit.get_tools() \n",
|
||||
" + klarna_toolkit.get_tools() \n",
|
||||
" + spoonacular_toolkit.get_tools()[:30]\n",
|
||||
" )\n",
|
||||
"print(f\"{len(natural_language_api_tools)} tools loaded.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "831f772d-5cd1-4467-b494-a3172af2ff48",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create an agent with the new tools\n",
|
||||
"mrkl = initialize_agent(natural_language_api_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, \n",
|
||||
" verbose=True, agent_kwargs={\"format_instructions\":openapi_format_instructions})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "0385e04b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Make the query more complex!\n",
|
||||
"user_input = (\n",
|
||||
" \"I'm learning Italian, and my language class is having an end of year party... \"\n",
|
||||
" \" Could you help me find an Italian outfit to wear and\"\n",
|
||||
" \" an appropriate recipe to prepare so I can present for the class in Italian?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "6ebd3f55",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find a recipe and an outfit that is Italian-themed.\n",
|
||||
"Action: spoonacular_API.searchRecipes\n",
|
||||
"Action Input: Italian\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe API response contains 10 Italian recipes, including Turkey Tomato Cheese Pizza, Broccolini Quinoa Pilaf, Bruschetta Style Pork & Pasta, Salmon Quinoa Risotto, Italian Tuna Pasta, Roasted Brussels Sprouts With Garlic, Asparagus Lemon Risotto, Italian Steamed Artichokes, Crispy Italian Cauliflower Poppers Appetizer, and Pappa Al Pomodoro.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find an Italian-themed outfit.\n",
|
||||
"Action: Open_AI_Klarna_product_Api.productsUsingGET\n",
|
||||
"Action Input: Italian\u001b[0m\n",
|
||||
"Observation: \u001b[31;1m\u001b[1;3mI found 10 products related to 'Italian' in the API response. These products include Italian Gold Sparkle Perfectina Necklace - Gold, Italian Design Miami Cuban Link Chain Necklace - Gold, Italian Gold Miami Cuban Link Chain Necklace - Gold, Italian Gold Herringbone Necklace - Gold, Italian Gold Claddagh Ring - Gold, Italian Gold Herringbone Chain Necklace - Gold, Garmin QuickFit 22mm Italian Vacchetta Leather Band, Macy's Italian Horn Charm - Gold, Dolce & Gabbana Light Blue Italian Love Pour Homme EdT 1.7 fl oz.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: To present for your Italian language class, you could wear an Italian Gold Sparkle Perfectina Necklace - Gold, an Italian Design Miami Cuban Link Chain Necklace - Gold, or an Italian Gold Miami Cuban Link Chain Necklace - Gold. For a recipe, you could make Turkey Tomato Cheese Pizza, Broccolini Quinoa Pilaf, Bruschetta Style Pork & Pasta, Salmon Quinoa Risotto, Italian Tuna Pasta, Roasted Brussels Sprouts With Garlic, Asparagus Lemon Risotto, Italian Steamed Artichokes, Crispy Italian Cauliflower Poppers Appetizer, or Pappa Al Pomodoro.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'To present for your Italian language class, you could wear an Italian Gold Sparkle Perfectina Necklace - Gold, an Italian Design Miami Cuban Link Chain Necklace - Gold, or an Italian Gold Miami Cuban Link Chain Necklace - Gold. For a recipe, you could make Turkey Tomato Cheese Pizza, Broccolini Quinoa Pilaf, Bruschetta Style Pork & Pasta, Salmon Quinoa Risotto, Italian Tuna Pasta, Roasted Brussels Sprouts With Garlic, Asparagus Lemon Risotto, Italian Steamed Artichokes, Crispy Italian Cauliflower Poppers Appetizer, or Pappa Al Pomodoro.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"mrkl.run(user_input)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a2959462",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Thank you!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "6fcda5f0",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"In Italian, you can say 'Buon appetito' to someone to wish them to enjoy their meal. This phrase is commonly used in Italy when someone is about to eat, often at the beginning of a meal. It's similar to saying 'Bon appétit' in French or 'Guten Appetit' in German.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"natural_language_api_tools[1].run(\"Tell the LangChain audience to 'enjoy the meal' in Italian, please!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ab366dc0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -80,11 +80,11 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"tools = load_tools([\"requests_all\"] )\n",
|
||||
"llm = ChatOpenAI(temperature=0,)\n",
|
||||
"tools = load_tools([\"requests\"] )\n",
|
||||
"tools += [tool]\n",
|
||||
"\n",
|
||||
"agent_chain = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)\n",
|
||||
"agent_chain = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION verbose=True)\n",
|
||||
"agent_chain.run(\"what t shirts are available in klarna?\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"\n",
|
||||
"This notebook goes over how to use the google search component.\n",
|
||||
"\n",
|
||||
"First, you need to set up the proper API keys and environment variables. To set it up, create the GOOGLE_API_KEY in the Google Cloud credential console (https://console.cloud.google.com/apis/credentials) and a GOOGLE_CSE_ID using the Programmable Search Enginge (https://programmablesearchengine.google.com/controlpanel/create). Next, it is good to follow the instructions found [here](https://stackoverflow.com/questions/37083058/programmatically-searching-google-in-python-using-custom-search).\n",
|
||||
"First, you need to set up the proper API keys and environment variables. To set it up, follow the instructions found [here](https://stackoverflow.com/questions/37083058/programmatically-searching-google-in-python-using-custom-search).\n",
|
||||
"\n",
|
||||
"Then we will need to set some environment variables."
|
||||
]
|
||||
|
||||
@@ -1,388 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "23234b50-e6c6-4c87-9f97-259c15f36894",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"# Callbacks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "29dd6333-307c-43df-b848-65001c01733b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"LangChain provides a callback system that allows you to hook into the various stages of your LLM application. This is useful for logging, [monitoring](https://python.langchain.com/en/latest/tracing.html), [streaming](https://python.langchain.com/en/latest/modules/models/llms/examples/streaming_llm.html), and other tasks.\n",
|
||||
"\n",
|
||||
"You can subscribe to these events by using the `callback_manager` argument available throughout the API. A `CallbackManager` is an object that manages a list of `CallbackHandlers`. The `CallbackManager` will call the appropriate method on each handler when the event is triggered."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fdb72e8d-a02a-474d-96bf-f5759432afc8",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"```python\n",
|
||||
"class CallbackManager(BaseCallbackHandler):\n",
|
||||
" \"\"\"Base callback manager that can be used to handle callbacks from LangChain.\"\"\"\n",
|
||||
"\n",
|
||||
" def add_handler(self, callback: BaseCallbackHandler) -> None:\n",
|
||||
" \"\"\"Add a handler to the callback manager.\"\"\"\n",
|
||||
"\n",
|
||||
" def remove_handler(self, handler: BaseCallbackHandler) -> None:\n",
|
||||
" \"\"\"Remove a handler from the callback manager.\"\"\"\n",
|
||||
"\n",
|
||||
" def set_handler(self, handler: BaseCallbackHandler) -> None:\n",
|
||||
" \"\"\"Set handler as the only handler on the callback manager.\"\"\"\n",
|
||||
" self.set_handlers([handler])\n",
|
||||
"\n",
|
||||
" def set_handlers(self, handlers: List[BaseCallbackHandler]) -> None:\n",
|
||||
" \"\"\"Set handlers as the only handlers on the callback manager.\"\"\"\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b6d7dba-cd20-472a-ae05-f68675cc9ea4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`CallbackHandlers` are objects that implement the `CallbackHandler` interface, which has a method for each event that can be subscribed to. The `CallbackManager` will call the appropriate method on each handler when the event is triggered."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e4592215-6604-47e2-89ff-5db3af6d1e40",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"```python\n",
|
||||
"class BaseCallbackHandler(ABC):\n",
|
||||
" \"\"\"Base callback handler that can be used to handle callbacks from langchain.\"\"\"\n",
|
||||
"\n",
|
||||
" @abstractmethod\n",
|
||||
" def on_llm_start(\n",
|
||||
" self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any\n",
|
||||
" ) -> Any:\n",
|
||||
" \"\"\"Run when LLM starts running.\"\"\"\n",
|
||||
"\n",
|
||||
" @abstractmethod\n",
|
||||
" def on_llm_new_token(self, token: str, **kwargs: Any) -> Any:\n",
|
||||
" \"\"\"Run on new LLM token. Only available when streaming is enabled.\"\"\"\n",
|
||||
"\n",
|
||||
" @abstractmethod\n",
|
||||
" def on_llm_end(self, response: LLMResult, **kwargs: Any) -> Any:\n",
|
||||
" \"\"\"Run when LLM ends running.\"\"\"\n",
|
||||
"\n",
|
||||
" @abstractmethod\n",
|
||||
" def on_llm_error(\n",
|
||||
" self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any\n",
|
||||
" ) -> Any:\n",
|
||||
" \"\"\"Run when LLM errors.\"\"\"\n",
|
||||
"\n",
|
||||
" @abstractmethod\n",
|
||||
" def on_chain_start(\n",
|
||||
" self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any\n",
|
||||
" ) -> Any:\n",
|
||||
" \"\"\"Run when chain starts running.\"\"\"\n",
|
||||
"\n",
|
||||
" @abstractmethod\n",
|
||||
" def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:\n",
|
||||
" \"\"\"Run when chain ends running.\"\"\"\n",
|
||||
"\n",
|
||||
" @abstractmethod\n",
|
||||
" def on_chain_error(\n",
|
||||
" self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any\n",
|
||||
" ) -> Any:\n",
|
||||
" \"\"\"Run when chain errors.\"\"\"\n",
|
||||
"\n",
|
||||
" @abstractmethod\n",
|
||||
" def on_tool_start(\n",
|
||||
" self, serialized: Dict[str, Any], input_str: str, **kwargs: Any\n",
|
||||
" ) -> Any:\n",
|
||||
" \"\"\"Run when tool starts running.\"\"\"\n",
|
||||
"\n",
|
||||
" @abstractmethod\n",
|
||||
" def on_tool_end(self, output: str, **kwargs: Any) -> Any:\n",
|
||||
" \"\"\"Run when tool ends running.\"\"\"\n",
|
||||
"\n",
|
||||
" @abstractmethod\n",
|
||||
" def on_tool_error(\n",
|
||||
" self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any\n",
|
||||
" ) -> Any:\n",
|
||||
" \"\"\"Run when tool errors.\"\"\"\n",
|
||||
"\n",
|
||||
" @abstractmethod\n",
|
||||
" def on_text(self, text: str, **kwargs: Any) -> Any:\n",
|
||||
" \"\"\"Run on arbitrary text.\"\"\"\n",
|
||||
"\n",
|
||||
" @abstractmethod\n",
|
||||
" def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:\n",
|
||||
" \"\"\"Run on agent action.\"\"\"\n",
|
||||
"\n",
|
||||
" @abstractmethod\n",
|
||||
" def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any:\n",
|
||||
" \"\"\"Run on agent end.\"\"\"\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d3bf3304-43fb-47ad-ae50-0637a17018a2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating and Using a Custom `CallbackHandler`\n",
|
||||
"\n",
|
||||
"By default, a shared CallbackManager with the StdOutCallbackHandler will be used by models, chains, agents, and tools. However, you can pass in your own CallbackManager with a custom CallbackHandler:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "80532dfc-d687-4147-a0c9-1f90cc3e868c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"AgentAction(tool='Search', tool_input=\"US Open men's final 2019 winner\", log=' I need to find out who won the US Open men\\'s final in 2019 and then calculate his age raised to the 0.334 power.\\nAction: Search\\nAction Input: \"US Open men\\'s final 2019 winner\"')\n",
|
||||
"Rafael Nadal defeated Daniil Medvedev in the final, 7–5, 6–3, 5–7, 4–6, 6–4 to win the men's singles tennis title at the 2019 US Open. It was his fourth US ...\n",
|
||||
"AgentAction(tool='Search', tool_input='Rafael Nadal age', log=' I need to find out the age of the winner\\nAction: Search\\nAction Input: \"Rafael Nadal age\"')\n",
|
||||
"36 years\n",
|
||||
"AgentAction(tool='Calculator', tool_input='36^0.334', log=' I now need to calculate his age raised to the 0.334 power\\nAction: Calculator\\nAction Input: 36^0.334')\n",
|
||||
"Answer: 3.3098250249682484\n",
|
||||
"\n",
|
||||
" I now know the final answer\n",
|
||||
"Final Answer: Rafael Nadal, aged 36, won the US Open men's final in 2019 and his age raised to the 0.334 power is 3.3098250249682484.\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Rafael Nadal, aged 36, won the US Open men's final in 2019 and his age raised to the 0.334 power is 3.3098250249682484.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from typing import Any, Dict, List, Optional, Union\n",
|
||||
"\n",
|
||||
"from langchain.agents import initialize_agent, load_tools\n",
|
||||
"from langchain.agents import AgentType\n",
|
||||
"from langchain.callbacks.base import CallbackManager, BaseCallbackHandler\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.schema import AgentAction, AgentFinish, LLMResult\n",
|
||||
"\n",
|
||||
"class MyCustomCallbackHandler(BaseCallbackHandler):\n",
|
||||
" \"\"\"Custom CallbackHandler.\"\"\"\n",
|
||||
"\n",
|
||||
" def on_llm_start(\n",
|
||||
" self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any\n",
|
||||
" ) -> None:\n",
|
||||
" \"\"\"Print out the prompts.\"\"\"\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
" def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:\n",
|
||||
" \"\"\"Do nothing.\"\"\"\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
" def on_llm_new_token(self, token: str, **kwargs: Any) -> None:\n",
|
||||
" \"\"\"Do nothing.\"\"\"\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
" def on_llm_error(\n",
|
||||
" self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any\n",
|
||||
" ) -> None:\n",
|
||||
" \"\"\"Do nothing.\"\"\"\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
" def on_chain_start(\n",
|
||||
" self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any\n",
|
||||
" ) -> None:\n",
|
||||
" \"\"\"Print out that we are entering a chain.\"\"\"\n",
|
||||
" class_name = serialized[\"name\"]\n",
|
||||
" print(f\"\\n\\n\\033[1m> Entering new {class_name} chain...\\033[0m\")\n",
|
||||
"\n",
|
||||
" def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:\n",
|
||||
" \"\"\"Print out that we finished a chain.\"\"\"\n",
|
||||
" print(\"\\n\\033[1m> Finished chain.\\033[0m\")\n",
|
||||
"\n",
|
||||
" def on_chain_error(\n",
|
||||
" self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any\n",
|
||||
" ) -> None:\n",
|
||||
" \"\"\"Do nothing.\"\"\"\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
" def on_tool_start(\n",
|
||||
" self,\n",
|
||||
" serialized: Dict[str, Any],\n",
|
||||
" input_str: str,\n",
|
||||
" **kwargs: Any,\n",
|
||||
" ) -> None:\n",
|
||||
" \"\"\"Do nothing.\"\"\"\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
" def on_agent_action(\n",
|
||||
" self, action: AgentAction, color: Optional[str] = None, **kwargs: Any\n",
|
||||
" ) -> Any:\n",
|
||||
" \"\"\"Run on agent action.\"\"\"\n",
|
||||
" print(action)\n",
|
||||
"\n",
|
||||
" def on_tool_end(\n",
|
||||
" self,\n",
|
||||
" output: str,\n",
|
||||
" color: Optional[str] = None,\n",
|
||||
" observation_prefix: Optional[str] = None,\n",
|
||||
" llm_prefix: Optional[str] = None,\n",
|
||||
" **kwargs: Any,\n",
|
||||
" ) -> None:\n",
|
||||
" \"\"\"If not the final action, print out observation.\"\"\"\n",
|
||||
" print(output)\n",
|
||||
"\n",
|
||||
" def on_tool_error(\n",
|
||||
" self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any\n",
|
||||
" ) -> None:\n",
|
||||
" \"\"\"Do nothing.\"\"\"\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
" def on_text(\n",
|
||||
" self,\n",
|
||||
" text: str,\n",
|
||||
" color: Optional[str] = None,\n",
|
||||
" end: str = \"\",\n",
|
||||
" **kwargs: Optional[str],\n",
|
||||
" ) -> None:\n",
|
||||
" \"\"\"Run when agent ends.\"\"\"\n",
|
||||
" print(text)\n",
|
||||
"\n",
|
||||
" def on_agent_finish(\n",
|
||||
" self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any\n",
|
||||
" ) -> None:\n",
|
||||
" \"\"\"Run on agent end.\"\"\"\n",
|
||||
" print(finish.log)\n",
|
||||
"manager = CallbackManager([MyCustomCallbackHandler()])\n",
|
||||
"llm = OpenAI(temperature=0, callback_manager=manager, verbose=True)\n",
|
||||
"tools = load_tools([\"llm-math\", \"serpapi\"], llm=llm, callback_manager=manager)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True, callback_manager=manager\n",
|
||||
")\n",
|
||||
"agent.run(\"Who won the US Open men's final in 2019? What is his age raised to the 0.334 power?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bc9785fa-4f71-4797-91a3-4fe7e57d0429",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Async Support\n",
|
||||
"\n",
|
||||
"If you are planning to use the async API, it is recommended to use `AsyncCallbackHandler` and `AsyncCallbackManager` to avoid blocking the runloop."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "c702e0c9-a961-4897-90c1-cdd13b6f16b2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"zzzz....\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"zzzz....\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"from aiohttp import ClientSession\n",
|
||||
"\n",
|
||||
"from langchain.callbacks.base import AsyncCallbackHandler, AsyncCallbackManager\n",
|
||||
"\n",
|
||||
"class MyCustomAsyncCallbackHandler(AsyncCallbackHandler):\n",
|
||||
" \"\"\"Async callback handler that can be used to handle callbacks from langchain.\"\"\"\n",
|
||||
"\n",
|
||||
" async def on_chain_start(\n",
|
||||
" self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any\n",
|
||||
" ) -> None:\n",
|
||||
" \"\"\"Run when chain starts running.\"\"\"\n",
|
||||
" print(\"zzzz....\")\n",
|
||||
" await asyncio.sleep(0.5)\n",
|
||||
" class_name = serialized[\"name\"]\n",
|
||||
" print(f\"\\n\\n\\033[1m> Entering new {class_name} chain...\\033[0m\")\n",
|
||||
"\n",
|
||||
" async def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:\n",
|
||||
" \"\"\"Run when chain ends running.\"\"\"\n",
|
||||
" print(\"zzzz....\")\n",
|
||||
" await asyncio.sleep(0.5)\n",
|
||||
" print(\"\\n\\033[1m> Finished chain.\\033[0m\")\n",
|
||||
"\n",
|
||||
"manager = AsyncCallbackManager([MyCustomAsyncCallbackHandler()])\n",
|
||||
"\n",
|
||||
"# To make async requests in Tools more efficient, you can pass in your own aiohttp.ClientSession, \n",
|
||||
"# but you must manually close the client session at the end of your program/event loop\n",
|
||||
"aiosession = ClientSession()\n",
|
||||
"llm = OpenAI(temperature=0, callback_manager=manager)\n",
|
||||
"async_tools = load_tools([\"llm-math\", \"serpapi\"], llm=llm, aiosession=aiosession, callback_manager=manager)\n",
|
||||
"async_agent = initialize_agent(async_tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True, callback_manager=manager)\n",
|
||||
"await async_agent.arun(\"Who won the US Open men's final in 2019? What is his age raised to the 0.334 power?\")\n",
|
||||
"await aiosession.close()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "86be6304-e433-4048-880c-a92a73244407",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -144,160 +144,6 @@
|
||||
"\u001b[32;1m\u001b[1;3mYou are a helpful AI Assistant. Please provide JSON arguments to agentFunc() based on the user's instructions.\n",
|
||||
"\n",
|
||||
"API_SCHEMA: ```typescript\n",
|
||||
"/* API for fetching Klarna product information */\n",
|
||||
"type productsUsingGET = (_: {\n",
|
||||
"/* A precise query that matches one very small category or product that needs to be searched for to find the products the user is looking for. If the user explicitly stated what they want, use that as a query. The query is as specific as possible to the product name or category mentioned by the user in its singular form, and don't contain any clarifiers like latest, newest, cheapest, budget, premium, expensive or similar. The query is always taken from the latest topic, if there is a new topic a new query is started. */\n",
|
||||
"\t\tq: string,\n",
|
||||
"/* number of products returned */\n",
|
||||
"\t\tsize?: number,\n",
|
||||
"/* (Optional) Minimum price in local currency for the product searched for. Either explicitly stated by the user or implicitly inferred from a combination of the user's request and the kind of product searched for. */\n",
|
||||
"\t\tmin_price?: number,\n",
|
||||
"/* (Optional) Maximum price in local currency for the product searched for. Either explicitly stated by the user or implicitly inferred from a combination of the user's request and the kind of product searched for. */\n",
|
||||
"\t\tmax_price?: number,\n",
|
||||
"}) => any;\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"USER_INSTRUCTIONS: \"whats the most expensive shirt?\"\n",
|
||||
"\n",
|
||||
"Your arguments must be plain json provided in a markdown block:\n",
|
||||
"\n",
|
||||
"ARGS: ```json\n",
|
||||
"{valid json conforming to API_SCHEMA}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Example\n",
|
||||
"-----\n",
|
||||
"\n",
|
||||
"ARGS: ```json\n",
|
||||
"{\"foo\": \"bar\", \"baz\": {\"qux\": \"quux\"}}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"The block must be no more than 1 line long, and all arguments must be valid JSON. All string arguments must be wrapped in double quotes.\n",
|
||||
"You MUST strictly comply to the types indicated by the provided schema, including all required args.\n",
|
||||
"\n",
|
||||
"If you don't have sufficient information to call the function due to things like requiring specific uuid's, you can reply with the following message:\n",
|
||||
"\n",
|
||||
"Message: ```text\n",
|
||||
"Concise response requesting the additional information that would make calling the function successful.\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Begin\n",
|
||||
"-----\n",
|
||||
"ARGS:\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m{\"q\": \"shirt\", \"size\": 1, \"max_price\": null}\u001b[0m\n",
|
||||
"\u001b[36;1m\u001b[1;3m{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]}]}\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new APIResponderChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mYou are a helpful AI assistant trained to answer user queries from API responses.\n",
|
||||
"You attempted to call an API, which resulted in:\n",
|
||||
"API_RESPONSE: {\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]}]}\n",
|
||||
"\n",
|
||||
"USER_COMMENT: \"whats the most expensive shirt?\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"If the API_RESPONSE can answer the USER_COMMENT respond with the following markdown json block:\n",
|
||||
"Response: ```json\n",
|
||||
"{\"response\": \"Human-understandable synthesis of the API_RESPONSE\"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Otherwise respond with the following markdown json block:\n",
|
||||
"Response Error: ```json\n",
|
||||
"{\"response\": \"What you did and a concise statement of the resulting error. If it can be easily fixed, provide a suggestion.\"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You MUST respond as a markdown json code block. The person you are responding to CANNOT see the API_RESPONSE, so if there is any relevant information there you must include it in your response.\n",
|
||||
"\n",
|
||||
"Begin:\n",
|
||||
"---\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001b[33;1m\u001b[1;3mThe most expensive shirt in the API response is the Burberry Check Poplin Shirt, which costs $360.00.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = chain(\"whats the most expensive shirt?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "c000295e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'request_args': '{\"q\": \"shirt\", \"size\": 1, \"max_price\": null}',\n",
|
||||
" 'response_text': '{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]}]}'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# View intermediate steps\n",
|
||||
"output[\"intermediate_steps\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "092bdb4d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Return raw response\n",
|
||||
"\n",
|
||||
"We can also run this chain without synthesizing the response. This will have the effect of just returning the raw API output."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "4dff3849",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = OpenAPIEndpointChain.from_api_operation(\n",
|
||||
" operation, \n",
|
||||
" llm, \n",
|
||||
" requests=Requests(), \n",
|
||||
" verbose=True,\n",
|
||||
" return_intermediate_steps=True, # Return request and response text\n",
|
||||
" raw_response=True # Return raw response\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "762499a9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new OpenAPIEndpointChain chain...\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new APIRequesterChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mYou are a helpful AI Assistant. Please provide JSON arguments to agentFunc() based on the user's instructions.\n",
|
||||
"\n",
|
||||
"API_SCHEMA: ```typescript\n",
|
||||
"/* API for fetching Klarna product information */\n",
|
||||
"type productsUsingGET = (_: {\n",
|
||||
"/* A precise query that matches one very small category or product that needs to be searched for to find the products the user is looking for. If the user explicitly stated what they want, use that as a query. The query is as specific as possible to the product name or category mentioned by the user in its singular form, and don't contain any clarifiers like latest, newest, cheapest, budget, premium, expensive or similar. The query is always taken from the latest topic, if there is a new topic a new query is started. */\n",
|
||||
"\t\tq: string,\n",
|
||||
@@ -341,7 +187,36 @@
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m{\"q\": \"shirt\", \"max_price\": null}\u001b[0m\n",
|
||||
"\u001b[36;1m\u001b[1;3m{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Cotton Shirt - Beige\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl359/3200280807/Children-s-Clothing/Burberry-Vintage-Check-Cotton-Shirt-Beige/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$229.02\",\"attributes\":[\"Material:Cotton,Elastane\",\"Color:Beige\",\"Model:Boy\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Stretch Cotton Twill Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3202342515/Clothing/Burberry-Vintage-Check-Stretch-Cotton-Twill-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$309.99\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Woman\",\"Color:Beige\",\"Properties:Stretch\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Somerton Check Shirt - Camel\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201112728/Clothing/Burberry-Somerton-Check-Shirt-Camel/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$450.00\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Man\",\"Color:Beige\"]},{\"name\":\"Magellan Outdoors Laguna Madre Solid Short Sleeve Fishing Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3203102142/Clothing/Magellan-Outdoors-Laguna-Madre-Solid-Short-Sleeve-Fishing-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$19.99\",\"attributes\":[\"Material:Polyester,Nylon\",\"Target Group:Man\",\"Color:Red,Pink,White,Blue,Purple,Beige,Black,Green\",\"Properties:Pockets\",\"Pattern:Solid Color\"]}]}\u001b[0m\n",
|
||||
"\u001b[36;1m\u001b[1;3m{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Cotton Shirt - Beige\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl359/3200280807/Children-s-Clothing/Burberry-Vintage-Check-Cotton-Shirt-Beige/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$196.30\",\"attributes\":[\"Material:Cotton,Elastane\",\"Color:Beige\",\"Model:Boy\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Somerton Check Shirt - Camel\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201112728/Clothing/Burberry-Somerton-Check-Shirt-Camel/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$450.00\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Man\",\"Color:Beige\"]},{\"name\":\"Calvin Klein Slim Fit Oxford Dress Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201839169/Clothing/Calvin-Klein-Slim-Fit-Oxford-Dress-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$24.91\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,White,Blue,Black\",\"Pattern:Solid Color\"]},{\"name\":\"Magellan Outdoors Laguna Madre Solid Short Sleeve Fishing Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3203102142/Clothing/Magellan-Outdoors-Laguna-Madre-Solid-Short-Sleeve-Fishing-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$19.99\",\"attributes\":[\"Material:Polyester,Nylon\",\"Target Group:Man\",\"Color:Red,Pink,White,Blue,Purple,Beige,Black,Green\",\"Properties:Pockets\",\"Pattern:Solid Color\"]}]}\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new APIResponderChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mYou are a helpful AI assistant trained to answer user queries from API responses.\n",
|
||||
"You attempted to call an API, which resulted in:\n",
|
||||
"API_RESPONSE: {\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Cotton Shirt - Beige\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl359/3200280807/Children-s-Clothing/Burberry-Vintage-Check-Cotton-Shirt-Beige/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$196.30\",\"attributes\":[\"Material:Cotton,Elastane\",\"Color:Beige\",\"Model:Boy\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Somerton Check Shirt - Camel\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201112728/Clothing/Burberry-Somerton-Check-Shirt-Camel/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$450.00\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Man\",\"Color:Beige\"]},{\"name\":\"Calvin Klein Slim Fit Oxford Dress Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201839169/Clothing/Calvin-Klein-Slim-Fit-Oxford-Dress-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$24.91\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,White,Blue,Black\",\"Pattern:Solid Color\"]},{\"name\":\"Magellan Outdoors Laguna Madre Solid Short Sleeve Fishing Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3203102142/Clothing/Magellan-Outdoors-Laguna-Madre-Solid-Short-Sleeve-Fishing-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$19.99\",\"attributes\":[\"Material:Polyester,Nylon\",\"Target Group:Man\",\"Color:Red,Pink,White,Blue,Purple,Beige,Black,Green\",\"Properties:Pockets\",\"Pattern:Solid Color\"]}]}\n",
|
||||
"\n",
|
||||
"USER_COMMENT: \"whats the most expensive shirt?\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"If the API_RESPONSE can answer the USER_COMMENT respond with the following markdown json block:\n",
|
||||
"Response: ```json\n",
|
||||
"{\"response\": \"Concise response to USER_COMMENT based on API_RESPONSE.\"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Otherwise respond with the following markdown json block:\n",
|
||||
"Response Error: ```json\n",
|
||||
"{\"response\": \"What you did and a concise statement of the resulting error. If it can be easily fixed, provide a suggestion.\"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You MUST respond as a markdown json code block.\n",
|
||||
"\n",
|
||||
"Begin:\n",
|
||||
"---\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001b[33;1m\u001b[1;3mThe most expensive shirt in this list is the 'Burberry Somerton Check Shirt - Camel' which is priced at $450.00\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -353,26 +228,25 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "4afc021a",
|
||||
"execution_count": 8,
|
||||
"id": "c000295e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'instructions': 'whats the most expensive shirt?',\n",
|
||||
" 'output': '{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Cotton Shirt - Beige\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl359/3200280807/Children-s-Clothing/Burberry-Vintage-Check-Cotton-Shirt-Beige/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$229.02\",\"attributes\":[\"Material:Cotton,Elastane\",\"Color:Beige\",\"Model:Boy\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Stretch Cotton Twill Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3202342515/Clothing/Burberry-Vintage-Check-Stretch-Cotton-Twill-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$309.99\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Woman\",\"Color:Beige\",\"Properties:Stretch\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Somerton Check Shirt - Camel\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201112728/Clothing/Burberry-Somerton-Check-Shirt-Camel/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$450.00\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Man\",\"Color:Beige\"]},{\"name\":\"Magellan Outdoors Laguna Madre Solid Short Sleeve Fishing Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3203102142/Clothing/Magellan-Outdoors-Laguna-Madre-Solid-Short-Sleeve-Fishing-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$19.99\",\"attributes\":[\"Material:Polyester,Nylon\",\"Target Group:Man\",\"Color:Red,Pink,White,Blue,Purple,Beige,Black,Green\",\"Properties:Pockets\",\"Pattern:Solid Color\"]}]}',\n",
|
||||
" 'intermediate_steps': {'request_args': '{\"q\": \"shirt\", \"max_price\": null}',\n",
|
||||
" 'response_text': '{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Cotton Shirt - Beige\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl359/3200280807/Children-s-Clothing/Burberry-Vintage-Check-Cotton-Shirt-Beige/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$229.02\",\"attributes\":[\"Material:Cotton,Elastane\",\"Color:Beige\",\"Model:Boy\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Stretch Cotton Twill Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3202342515/Clothing/Burberry-Vintage-Check-Stretch-Cotton-Twill-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$309.99\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Woman\",\"Color:Beige\",\"Properties:Stretch\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Somerton Check Shirt - Camel\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201112728/Clothing/Burberry-Somerton-Check-Shirt-Camel/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$450.00\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Man\",\"Color:Beige\"]},{\"name\":\"Magellan Outdoors Laguna Madre Solid Short Sleeve Fishing Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3203102142/Clothing/Magellan-Outdoors-Laguna-Madre-Solid-Short-Sleeve-Fishing-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$19.99\",\"attributes\":[\"Material:Polyester,Nylon\",\"Target Group:Man\",\"Color:Red,Pink,White,Blue,Purple,Beige,Black,Green\",\"Properties:Pockets\",\"Pattern:Solid Color\"]}]}'}}"
|
||||
"['{\"q\": \"shirt\", \"max_price\": null}',\n",
|
||||
" '{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Cotton Shirt - Beige\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl359/3200280807/Children-s-Clothing/Burberry-Vintage-Check-Cotton-Shirt-Beige/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$196.30\",\"attributes\":[\"Material:Cotton,Elastane\",\"Color:Beige\",\"Model:Boy\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Somerton Check Shirt - Camel\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201112728/Clothing/Burberry-Somerton-Check-Shirt-Camel/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$450.00\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Man\",\"Color:Beige\"]},{\"name\":\"Calvin Klein Slim Fit Oxford Dress Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201839169/Clothing/Calvin-Klein-Slim-Fit-Oxford-Dress-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$24.91\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,White,Blue,Black\",\"Pattern:Solid Color\"]},{\"name\":\"Magellan Outdoors Laguna Madre Solid Short Sleeve Fishing Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3203102142/Clothing/Magellan-Outdoors-Laguna-Madre-Solid-Short-Sleeve-Fishing-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$19.99\",\"attributes\":[\"Material:Polyester,Nylon\",\"Target Group:Man\",\"Color:Red,Pink,White,Blue,Purple,Beige,Black,Green\",\"Properties:Pockets\",\"Pattern:Solid Color\"]}]}']"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output"
|
||||
"# View intermediate steps\n",
|
||||
"output[\"intermediate_steps\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -574,7 +448,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -9,9 +9,9 @@
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# SQL Chain example\n",
|
||||
"# SQLite example\n",
|
||||
"\n",
|
||||
"This example demonstrates the use of the `SQLDatabaseChain` for answering questions over a database."
|
||||
"This example showcases hooking up an LLM to answer questions over a database."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -23,10 +23,8 @@
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Under the hood, LangChain uses SQLAlchemy to connect to SQL databases. The `SQLDatabaseChain` can therefore be used with any SQL dialect supported by SQLAlchemy, such as MS SQL, MySQL, MariaDB, PostgreSQL, Oracle SQL, and SQLite. Please refer to the SQLAlchemy documentation for more information about requirements for connecting to your database. For example, a connection to MySQL requires an appropriate connector such as PyMySQL. A URI for a MySQL connection might look like: `mysql+pymysql://user:pass@some_mysql_db_address/db_name`\n",
|
||||
"\n",
|
||||
"This demonstration uses SQLite and the example Chinook database.\n",
|
||||
"To set it up, follow the instructions on https://database.guide/2-sample-databases-sqlite/, placing the `.db` file in a notebooks folder at the root of this repository."
|
||||
"This uses the example Chinook database.\n",
|
||||
"To set it up follow the instructions on https://database.guide/2-sample-databases-sqlite/, placing the `.db` file in a notebooks folder at the root of this repository."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -681,7 +679,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.10"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -36,6 +36,25 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "7a886879",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"cannot find .env file\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%load_ext dotenv\n",
|
||||
"%dotenv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "3f2f9b8c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -232,23 +251,10 @@
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'title': 'Tragedy at sunset on the beach',\n",
|
||||
" 'era': 'Victorian England',\n",
|
||||
" 'synopsis': \"\\n\\nThe play follows the story of John, a young man from a wealthy Victorian family, who dreams of a better life for himself. He soon meets a beautiful young woman named Mary, who shares his dream. The two fall in love and decide to elope and start a new life together.\\n\\nOn their journey, they make their way to a beach at sunset, where they plan to exchange their vows of love. Unbeknownst to them, their plans are overheard by John's father, who has been tracking them. He follows them to the beach and, in a fit of rage, confronts them. \\n\\nA physical altercation ensues, and in the struggle, John's father accidentally stabs Mary in the chest with his sword. The two are left in shock and disbelief as Mary dies in John's arms, her last words being a declaration of her love for him.\\n\\nThe tragedy of the play comes to a head when John, broken and with no hope of a future, chooses to take his own life by jumping off the cliffs into the sea below. \\n\\nThe play is a powerful story of love, hope, and loss set against the backdrop of 19th century England.\",\n",
|
||||
" 'review': \"\\n\\nThe latest production from playwright X is a powerful and heartbreaking story of love and loss set against the backdrop of 19th century England. The play follows John, a young man from a wealthy Victorian family, and Mary, a beautiful young woman with whom he falls in love. The two decide to elope and start a new life together, and the audience is taken on a journey of hope and optimism for the future.\\n\\nUnfortunately, their dreams are cut short when John's father discovers them and in a fit of rage, fatally stabs Mary. The tragedy of the play is further compounded when John, broken and without hope, takes his own life. The storyline is not only realistic, but also emotionally compelling, drawing the audience in from start to finish.\\n\\nThe acting was also commendable, with the actors delivering believable and nuanced performances. The playwright and director have successfully crafted a timeless tale of love and loss that will resonate with audiences for years to come. Highly recommended.\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"overall_chain({\"title\":\"Tragedy at sunset on the beach\", \"era\": \"Victorian England\"})"
|
||||
"review = overall_chain({\"title\":\"Tragedy at sunset on the beach\", \"era\": \"Victorian England\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -5,14 +5,14 @@
|
||||
"id": "134a0785",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Chat Over Documents with Chat History\n",
|
||||
"# Chat Index\n",
|
||||
"\n",
|
||||
"This notebook goes over how to set up a chain to chat over documents with chat history using a `ConversationalRetrievalChain`. The only difference between this chain and the [RetrievalQAChain](./vector_db_qa.ipynb) is that this allows for passing in of a chat history which can be used to allow for follow up questions."
|
||||
"This notebook goes over how to set up a chain to chat with an index. The only difference between this chain and the [RetrievalQAChain](./vector_db_qa.ipynb) is that this allows for passing in of a chat history which can be used to allow for follow up questions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 1,
|
||||
"id": "70c4e529",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -36,7 +36,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 2,
|
||||
"id": "01c46e92",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -58,7 +58,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 3,
|
||||
"id": "433363a5",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -81,7 +81,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"id": "a8930cf7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -109,12 +109,12 @@
|
||||
"id": "3c96b118",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We now initialize the `ConversationalRetrievalChain`"
|
||||
"We now initialize the ConversationalRetrievalChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 5,
|
||||
"id": "7b4110f3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -134,7 +134,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 6,
|
||||
"id": "7fe3e730",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -148,7 +148,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 7,
|
||||
"id": "bfff9cc8",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -160,7 +160,7 @@
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -179,7 +179,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 8,
|
||||
"id": "00b4cf00",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -193,7 +193,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 9,
|
||||
"id": "f01828d1",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -202,10 +202,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Ketanji Brown Jackson succeeded Justice Stephen Breyer on the United States Supreme Court.'"
|
||||
"' Justice Stephen Breyer'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -225,11 +225,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 11,
|
||||
"id": "562769c6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), return_source_documents=True)"
|
||||
@@ -237,11 +235,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 12,
|
||||
"id": "ea478300",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat_history = []\n",
|
||||
@@ -251,19 +247,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 13,
|
||||
"id": "4cb75b4e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../state_of_the_union.txt'})"
|
||||
"Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', lookup_str='', metadata={'source': '../../state_of_the_union.txt'}, lookup_index=0)"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -283,11 +277,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 14,
|
||||
"id": "5ed8d612",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vectordbkwargs = {\"search_distance\": 0.9}"
|
||||
@@ -295,11 +287,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 15,
|
||||
"id": "6a7b3459",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), return_source_documents=True)\n",
|
||||
@@ -319,25 +309,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 16,
|
||||
"id": "e53a9d66",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.chains.question_answering import load_qa_chain\n",
|
||||
"from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT"
|
||||
"from langchain.chains.chat_index.prompts import CONDENSE_QUESTION_PROMPT"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "bf205e35",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
@@ -355,9 +341,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "78155887",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat_history = []\n",
|
||||
@@ -369,9 +353,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "e54b5fa2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@@ -402,9 +384,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "d1058fd2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.qa_with_sources import load_qa_with_sources_chain"
|
||||
@@ -414,9 +394,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "a6594482",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
@@ -434,9 +412,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "e2badd21",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat_history = []\n",
|
||||
@@ -448,9 +424,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "edb31fe5",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@@ -479,7 +453,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 26,
|
||||
"id": "2efacec3-2690-4b05-8de3-a32fd2ac3911",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -489,10 +463,10 @@
|
||||
"from langchain.chains.llm import LLMChain\n",
|
||||
"from langchain.callbacks.base import CallbackManager\n",
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
|
||||
"from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT\n",
|
||||
"from langchain.chains.chat_index.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT\n",
|
||||
"from langchain.chains.question_answering import load_qa_chain\n",
|
||||
"\n",
|
||||
"# Construct a ConversationalRetrievalChain with a streaming llm for combine docs\n",
|
||||
"# Construct a ChatVectorDBChain with a streaming llm for combine docs\n",
|
||||
"# and a separate, non-streaming llm for question generation\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"streaming_llm = OpenAI(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0)\n",
|
||||
@@ -506,7 +480,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 27,
|
||||
"id": "fd6d43f4-7428-44a4-81bc-26fe88a98762",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -528,7 +502,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 28,
|
||||
"id": "5ab38978-f3e8-4fa7-808c-c79dec48379a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -538,7 +512,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Ketanji Brown Jackson succeeded Justice Stephen Breyer on the United States Supreme Court."
|
||||
" Justice Stephen Breyer"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -559,11 +533,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"execution_count": 29,
|
||||
"id": "a7ba9d8c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_chat_history(inputs) -> str:\n",
|
||||
@@ -571,16 +543,14 @@
|
||||
" for human, ai in inputs:\n",
|
||||
" res.append(f\"Human:{human}\\nAI:{ai}\")\n",
|
||||
" return \"\\n\".join(res)\n",
|
||||
"qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore.as_retriever(), get_chat_history=get_chat_history)"
|
||||
"qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0), vectorstore, get_chat_history=get_chat_history)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"execution_count": 30,
|
||||
"id": "a3e33c0d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat_history = []\n",
|
||||
@@ -590,11 +560,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"execution_count": 31,
|
||||
"id": "936dc62f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@@ -602,7 +570,7 @@
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 33,
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -636,7 +604,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -23,9 +23,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "17fcbc0f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
@@ -40,26 +38,17 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "ef9305cc",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open(\"../../state_of_the_union.txt\") as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
"index_creator = VectorstoreIndexCreator()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "291f0117",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -71,29 +60,27 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{\"source\": str(i)} for i in range(len(texts))]).as_retriever()"
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"loader = TextLoader(\"../../state_of_the_union.txt\")\n",
|
||||
"docsearch = index_creator.from_loaders([loader])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "d1eaf6e6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Justice Breyer\"\n",
|
||||
"docs = docsearch.get_relevant_documents(query)"
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a16e3453",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.question_answering import load_qa_chain\n",
|
||||
@@ -111,19 +98,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 19,
|
||||
"id": "fd9e6190",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' The president said that Justice Breyer has dedicated his life to serve the country and thanked him for his service.'"
|
||||
"' The president said that he was honoring Justice Breyer for his service to the country and that he was a Constitutional scholar, Army veteran, and retiring Justice of the United States Supreme Court.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -154,11 +139,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"id": "180fd4c1",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_qa_chain(OpenAI(temperature=0), chain_type=\"stuff\")"
|
||||
@@ -166,19 +149,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"id": "77fdf1aa",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output_text': ' The president said that Justice Breyer has dedicated his life to serve the country and thanked him for his service.'}"
|
||||
"{'output_text': ' The president said that he was honoring Justice Breyer for his service to the country and that he was a Constitutional scholar, Army veteran, and retiring Justice of the United States Supreme Court.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -200,19 +181,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 7,
|
||||
"id": "5558c9e0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output_text': ' Il presidente ha detto che Justice Breyer ha dedicato la sua vita a servire questo paese e ha ricevuto una vasta gamma di supporto.'}"
|
||||
"{'output_text': ' Il presidente ha detto che Justice Breyer ha dedicato la sua vita a servire questo paese e ha onorato la sua carriera come giudice della Corte Suprema degli Stati Uniti.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -243,11 +222,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 8,
|
||||
"id": "b0060f51",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_qa_chain(OpenAI(temperature=0), chain_type=\"map_reduce\")"
|
||||
@@ -255,19 +232,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 9,
|
||||
"id": "fbdb9137",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output_text': ' The president said that Justice Breyer is an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court, and thanked him for his service.'}"
|
||||
"{'output_text': ' The president said, \"Justice Breyer, thank you for your service.\"'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -289,11 +264,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 10,
|
||||
"id": "452c8680",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_qa_chain(OpenAI(temperature=0), chain_type=\"map_reduce\", return_map_steps=True)"
|
||||
@@ -301,23 +274,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 11,
|
||||
"id": "90b47a75",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'intermediate_steps': [' \"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.\"',\n",
|
||||
" ' A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.',\n",
|
||||
" ' None',\n",
|
||||
" ' None',\n",
|
||||
" ' None'],\n",
|
||||
" 'output_text': ' The president said that Justice Breyer is an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court, and thanked him for his service.'}"
|
||||
" 'output_text': ' The president said, \"Justice Breyer, thank you for your service.\"'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -338,23 +309,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 13,
|
||||
"id": "af03a578",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'intermediate_steps': [\"\\nStasera vorrei onorare qualcuno che ha dedicato la sua vita a servire questo paese: il giustizia Stephen Breyer - un veterano dell'esercito, uno studioso costituzionale e un giustizia in uscita della Corte Suprema degli Stati Uniti. Giustizia Breyer, grazie per il tuo servizio.\",\n",
|
||||
" '\\nNessun testo pertinente.',\n",
|
||||
" ' Non ha detto nulla riguardo a Justice Breyer.',\n",
|
||||
" \" Non c'è testo pertinente.\"],\n",
|
||||
" 'output_text': ' Non ha detto nulla riguardo a Justice Breyer.'}"
|
||||
" \"\\nCome ho detto l'anno scorso, soprattutto ai nostri giovani americani transgender, avrò sempre il tuo sostegno come tuo Presidente, in modo che tu possa essere te stesso e raggiungere il tuo potenziale donato da Dio.\",\n",
|
||||
" '\\nNella mia amministrazione, i guardiani sono stati accolti di nuovo. Stiamo andando dietro ai criminali che hanno rubato miliardi di dollari di aiuti di emergenza destinati alle piccole imprese e a milioni di americani. E stasera, annuncio che il Dipartimento di Giustizia nominerà un procuratore capo per la frode pandemica.'],\n",
|
||||
" 'output_text': ' Non conosco la risposta alla tua domanda su cosa abbia detto il Presidente riguardo al Giustizia Breyer.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -410,11 +379,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 12,
|
||||
"id": "fb167057",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_qa_chain(OpenAI(temperature=0), chain_type=\"refine\")"
|
||||
@@ -422,19 +389,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 13,
|
||||
"id": "d8b5286e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output_text': '\\n\\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans. He also praised Justice Breyer for his role in helping to pass the Bipartisan Infrastructure Law, which he said would be the most sweeping investment to rebuild America in history and would help the country compete for the jobs of the 21st Century.'}"
|
||||
"{'output_text': '\\n\\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his commitment to protecting the rights of LGBTQ+ Americans and his support for the bipartisan Equality Act. He also mentioned his plan to lower costs to give families a fair shot, lower the deficit, and go after criminals who stole pandemic relief funds. He also announced that the Justice Department will name a chief prosecutor for pandemic fraud.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -456,11 +421,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 14,
|
||||
"id": "a5c64200",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_qa_chain(OpenAI(temperature=0), chain_type=\"refine\", return_refine_steps=True)"
|
||||
@@ -468,23 +431,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 15,
|
||||
"id": "817546ac",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'intermediate_steps': ['\\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country and his legacy of excellence.',\n",
|
||||
" '\\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice.',\n",
|
||||
" '\\n\\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans.',\n",
|
||||
" '\\n\\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans. He also praised Justice Breyer for his role in helping to pass the Bipartisan Infrastructure Law, which is the most sweeping investment to rebuild America in history.'],\n",
|
||||
" 'output_text': '\\n\\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his support of the Equality Act and his commitment to protecting the rights of LGBTQ+ Americans. He also praised Justice Breyer for his role in helping to pass the Bipartisan Infrastructure Law, which is the most sweeping investment to rebuild America in history.'}"
|
||||
" '\\n\\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice.',\n",
|
||||
" '\\n\\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his commitment to protecting the rights of LGBTQ+ Americans and his support for the bipartisan Equality Act.',\n",
|
||||
" '\\n\\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his commitment to protecting the rights of LGBTQ+ Americans and his support for the bipartisan Equality Act. He also mentioned his plan to lower costs to give families a fair shot, lower the deficit, and go after criminals who stole pandemic relief funds. He also announced that the Justice Department will name a chief prosecutor for pandemic fraud.'],\n",
|
||||
" 'output_text': '\\n\\nThe president said that he wanted to honor Justice Breyer for his dedication to serving the country, his legacy of excellence, and his commitment to advancing liberty and justice, as well as for his commitment to protecting the rights of LGBTQ+ Americans and his support for the bipartisan Equality Act. He also mentioned his plan to lower costs to give families a fair shot, lower the deficit, and go after criminals who stole pandemic relief funds. He also announced that the Justice Department will name a chief prosecutor for pandemic fraud.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -505,23 +466,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 14,
|
||||
"id": "6664bda7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'intermediate_steps': ['\\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese e ha reso omaggio al suo servizio.',\n",
|
||||
" \"\\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere e la risoluzione del sistema di immigrazione.\",\n",
|
||||
" \"\\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere, la risoluzione del sistema di immigrazione, la protezione degli americani LGBTQ+ e l'approvazione dell'Equality Act. Ha inoltre sottolineato l'importanza di lavorare insieme per sconfiggere l'epidemia di oppiacei.\",\n",
|
||||
" \"\\n\\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere, la risoluzione del sistema di immigrazione, la protezione degli americani LGBTQ+ e l'approvazione dell'Equality Act. Ha inoltre sottolineato l'importanza di lavorare insieme per sconfiggere l'epidemia di oppiacei e per investire in America, educare gli americani, far crescere la forza lavoro e costruire l'economia dal\"],\n",
|
||||
" 'output_text': \"\\n\\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese, ha reso omaggio al suo servizio e ha sostenuto la nomina di una top litigatrice in pratica privata, un ex difensore pubblico federale e una famiglia di insegnanti e agenti di polizia delle scuole pubbliche. Ha anche sottolineato l'importanza di avanzare la libertà e la giustizia attraverso la sicurezza delle frontiere, la risoluzione del sistema di immigrazione, la protezione degli americani LGBTQ+ e l'approvazione dell'Equality Act. Ha inoltre sottolineato l'importanza di lavorare insieme per sconfiggere l'epidemia di oppiacei e per investire in America, educare gli americani, far crescere la forza lavoro e costruire l'economia dal\"}"
|
||||
"{'intermediate_steps': ['\\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese e ha onorato la sua carriera. Ha anche detto che la sua nomina di Circuit Court of Appeals Judge Ketanji Brown Jackson continuerà il suo eccezionale lascito.',\n",
|
||||
" \"\\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese e ha onorato la sua carriera. Ha anche detto che la sua nomina di Circuit Court of Appeals Judge Ketanji Brown Jackson continuerà il suo eccezionale lascito. Ha sottolineato che la sua esperienza come avvocato di alto livello in pratica privata, come ex difensore federale pubblico e come membro di una famiglia di educatori e agenti di polizia, la rende una costruttrice di consenso. Ha anche sottolineato che, dalla sua nomina, ha ricevuto un ampio sostegno, dall'Ordine Fraterno della Polizia a ex giudici nominati da democratici e repubblicani.\",\n",
|
||||
" \"\\n\\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese e ha onorato la sua carriera. Ha anche detto che la sua nomina di Circuit Court of Appeals Judge Ketanji Brown Jackson continuerà il suo eccezionale lascito. Ha sottolineato che la sua esperienza come avvocato di alto livello in pratica privata, come ex difensore federale pubblico e come membro di una famiglia di educatori e agenti di polizia, la rende una costruttrice di consenso. Ha anche sottolineato che, dalla sua nomina, ha ricevuto un ampio sostegno, dall'Ordine Fraterno della Polizia a ex giudici nominati da democratici e repubblicani. Ha inoltre sottolineato che la nomina di Justice Breyer è un passo importante verso l'uguaglianza per tutti gli americani, in partic\",\n",
|
||||
" \"\\n\\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese e ha onorato la sua carriera. Ha anche detto che la sua nomina di Circuit Court of Appeals Judge Ketanji Brown Jackson continuerà il suo eccezionale lascito. Ha sottolineato che la sua esperienza come avvocato di alto livello in pratica privata, come ex difensore federale pubblico e come membro di una famiglia di educatori e agenti di polizia, la rende una costruttrice di consenso. Ha anche sottolineato che, dalla sua nomina, ha ricevuto un ampio sostegno, dall'Ordine Fraterno della Polizia a ex giudici nominati da democratici e repubblicani. Ha inoltre sottolineato che la nomina di Justice Breyer è un passo importante verso l'uguaglianza per tutti gli americani, in partic\"],\n",
|
||||
" 'output_text': \"\\n\\nIl presidente ha detto che Justice Breyer ha dedicato la sua vita al servizio di questo paese e ha onorato la sua carriera. Ha anche detto che la sua nomina di Circuit Court of Appeals Judge Ketanji Brown Jackson continuerà il suo eccezionale lascito. Ha sottolineato che la sua esperienza come avvocato di alto livello in pratica privata, come ex difensore federale pubblico e come membro di una famiglia di educatori e agenti di polizia, la rende una costruttrice di consenso. Ha anche sottolineato che, dalla sua nomina, ha ricevuto un ampio sostegno, dall'Ordine Fraterno della Polizia a ex giudici nominati da democratici e repubblicani. Ha inoltre sottolineato che la nomina di Justice Breyer è un passo importante verso l'uguaglianza per tutti gli americani, in partic\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -573,11 +532,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 16,
|
||||
"id": "e2bfe203",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_qa_chain(OpenAI(temperature=0), chain_type=\"map_rerank\", return_intermediate_steps=True)"
|
||||
@@ -585,11 +542,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 17,
|
||||
"id": "5c28880c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Justice Breyer\"\n",
|
||||
@@ -598,19 +553,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 18,
|
||||
"id": "80ac2db3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' The President thanked Justice Breyer for his service and honored him for dedicating his life to serve the country.'"
|
||||
"' The president thanked Justice Breyer for his service and honored him for dedicating his life to serving the country. '"
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -621,23 +574,24 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 19,
|
||||
"id": "b428fcb9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'answer': ' The President thanked Justice Breyer for his service and honored him for dedicating his life to serve the country.',\n",
|
||||
"[{'answer': ' The president thanked Justice Breyer for his service and honored him for dedicating his life to serving the country. ',\n",
|
||||
" 'score': '100'},\n",
|
||||
" {'answer': ' This document does not answer the question', 'score': '0'},\n",
|
||||
" {'answer': ' This document does not answer the question', 'score': '0'},\n",
|
||||
" {'answer': ' This document does not answer the question', 'score': '0'}]"
|
||||
" {'answer': \" The president said that Justice Breyer is a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He also said that since she's been nominated, she's received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans, and that she is a consensus builder.\",\n",
|
||||
" 'score': '100'},\n",
|
||||
" {'answer': ' The president did not mention Justice Breyer in this context.',\n",
|
||||
" 'score': '0'},\n",
|
||||
" {'answer': ' The president did not mention Justice Breyer in the given context. ',\n",
|
||||
" 'score': '0'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -658,25 +612,24 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 16,
|
||||
"id": "41b83cd8",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'intermediate_steps': [{'answer': ' Il presidente ha detto che Justice Breyer ha dedicato la sua vita a servire questo paese.',\n",
|
||||
"{'intermediate_steps': [{'answer': ' Il presidente ha detto che Justice Breyer ha dedicato la sua vita a servire questo paese e ha onorato la sua carriera.',\n",
|
||||
" 'score': '100'},\n",
|
||||
" {'answer': ' Il presidente non ha detto nulla sulla Giustizia Breyer.',\n",
|
||||
" 'score': '100'},\n",
|
||||
" {'answer': ' Non so.', 'score': '0'},\n",
|
||||
" {'answer': ' Non so.', 'score': '0'}],\n",
|
||||
" 'output_text': ' Il presidente ha detto che Justice Breyer ha dedicato la sua vita a servire questo paese.'}"
|
||||
" {'answer': ' Il presidente non ha detto nulla sulla giustizia Breyer.',\n",
|
||||
" 'score': '100'}],\n",
|
||||
" 'output_text': ' Il presidente ha detto che Justice Breyer ha dedicato la sua vita a servire questo paese e ha onorato la sua carriera.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -741,7 +694,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.9.1"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -11,7 +11,7 @@ This module contains utility functions for working with documents, different typ
|
||||
The most common way that indexes are used in chains is in a "retrieval" step.
|
||||
This step refers to taking a user's query and returning the most relevant documents.
|
||||
We draw this distinction because (1) an index can be used for other things besides retrieval, and (2) retrieval can use other logic besides an index to find relevant documents.
|
||||
We therefore have a concept of a "Retriever" interface - this is the interface that most chains work with.
|
||||
We therefor have a concept of a "Retriever" interface - this is the interface that most chains work with.
|
||||
|
||||
Most of the time when we talk about indexes and retrieval we are talking about indexing and retrieving unstructured data (like text documents).
|
||||
For interacting with structured data (SQL tables, etc) or APIs, please see the corresponding use case sections for links to relevant functionality.
|
||||
|
||||
@@ -1,87 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "66a7777e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Bilibili\n",
|
||||
"\n",
|
||||
"This loader utilizes the `bilibili-api` to fetch the text transcript from Bilibili, one of the most beloved long-form video sites in China.\n",
|
||||
"\n",
|
||||
"With this BiliBiliLoader, users can easily obtain the transcript of their desired video content on the platform."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "9ec8a3b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.bilibili import BiliBiliLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "43128d8d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install bilibili-api"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "35d6809a",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = BiliBiliLoader(\n",
|
||||
" [\"https://www.bilibili.com/video/BV1xt411o7Xu/\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -106,7 +106,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Specify a column to be used identify the document source\n",
|
||||
"# Specify a column to be used identify the document source\n",
|
||||
"\n",
|
||||
"Use the `source_column` argument to specify a column to be set as the source for the document created from each row. Otherwise `file_path` will be used as the source for all documents created from the csv file.\n",
|
||||
"\n",
|
||||
|
||||
File diff suppressed because one or more lines are too long
Submodule docs/modules/indexes/document_loaders/examples/example_data/test_repo1 deleted from 7e525a3b91
@@ -8,5 +8,4 @@
|
||||
1/23/23, 3:02 AM - User 1: I thought you were selling the blue one!
|
||||
1/23/23, 3:18 AM - User 2: No Im sorry it was my mistake, the blue one is not for sale
|
||||
1/23/23, 3:19 AM - User 1: Oh no worries! Bye
|
||||
1/23/23, 3:19 AM - User 2: Bye!
|
||||
1/23/23, 3:22_AM - User 1: And let me know if anything changes
|
||||
1/23/23, 3:19 AM - User 2: Bye!
|
||||
@@ -1,192 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Git\n",
|
||||
"\n",
|
||||
"This notebook shows how to load text files from Git repository."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load existing repository from disk"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from git import Repo\n",
|
||||
"\n",
|
||||
"repo = Repo.clone_from(\n",
|
||||
" \"https://github.com/hwchase17/langchain\", to_path=\"./example_data/test_repo1\"\n",
|
||||
")\n",
|
||||
"branch = repo.head.reference"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import GitLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GitLoader(repo_path=\"./example_data/test_repo1/\", branch=branch)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"len(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='.venv\\n.github\\n.git\\n.mypy_cache\\n.pytest_cache\\nDockerfile' metadata={'file_path': '.dockerignore', 'file_name': '.dockerignore', 'file_type': ''}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(data[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Clone repository from url"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import GitLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GitLoader(\n",
|
||||
" clone_url=\"https://github.com/hwchase17/langchain\",\n",
|
||||
" repo_path=\"./example_data/test_repo2/\",\n",
|
||||
" branch=\"master\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1074"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Filtering files to load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import GitLoader\n",
|
||||
"\n",
|
||||
"# eg. loading only python files\n",
|
||||
"loader = GitLoader(repo_path=\"./example_data/test_repo1/\", file_filter=lambda file_path: file_path.endswith(\".py\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -104,11 +104,10 @@
|
||||
"Efficient Data AnnotationC u s t o m i z e d M o d e l T r a i n i n gModel Cust omizationDI A Model HubDI A Pipeline SharingCommunity PlatformLa y out Detection ModelsDocument Images \n",
|
||||
"T h e C o r e L a y o u t P a r s e r L i b r a r yOCR ModuleSt or age & VisualizationLa y out Data Structur e\n",
|
||||
"Fig. 1: The overall architecture of LayoutParser . For an input document image,\n",
|
||||
"the core LayoutParser library provides a set of o\u000b",
|
||||
"\n",
|
||||
"the core LayoutParser library provides a set of o\u000B",
|
||||
"-the-shelf tools for layout\n",
|
||||
"detection, OCR, visualization, and storage, backed by a carefully designed layout\n",
|
||||
"data structure. LayoutParser also supports high level customization via e\u000ecient\n",
|
||||
"data structure. LayoutParser also supports high level customization via e\u000Ecient\n",
|
||||
"layout annotation and model training functions. These improve model accuracy\n",
|
||||
"on the target samples. The community platform enables the easy sharing of DIA\n",
|
||||
"models and whole digitization pipelines to promote reusability and reproducibility.\n",
|
||||
@@ -118,7 +117,6 @@
|
||||
"DL-based support for developing and deploying models for general computer\n",
|
||||
"vision and natural language processing problems. LayoutParser , on the other\n",
|
||||
"hand, specializes speci\f",
|
||||
"\n",
|
||||
"cally in DIA tasks. LayoutParser is also equipped with a\n",
|
||||
"community platform inspired by established model hubs such as Torch Hub [23]\n",
|
||||
"andTensorFlow Hub [1]. It enables the sharing of pretrained models as well as\n",
|
||||
@@ -127,16 +125,13 @@
|
||||
"development of DL models. Some examples include PRImA [ 3](magazine layouts),\n",
|
||||
"PubLayNet [ 38](academic paper layouts), Table Bank [ 18](tables in academic\n",
|
||||
"papers), Newspaper Navigator Dataset [ 16,17](newspaper \f",
|
||||
"\n",
|
||||
"gure layouts) and\n",
|
||||
"HJDataset [31](historical Japanese document layouts). A spectrum of models\n",
|
||||
"trained on these datasets are currently available in the LayoutParser model zoo\n",
|
||||
"to support di\u000b",
|
||||
"\n",
|
||||
"to support di\u000B",
|
||||
"erent use cases.\n",
|
||||
"3 The Core LayoutParser Library\n",
|
||||
"At the core of LayoutParser is an o\u000b",
|
||||
"\n",
|
||||
"At the core of LayoutParser is an o\u000B",
|
||||
"-the-shelf toolkit that streamlines DL-\n",
|
||||
"based document image analysis. Five components support a simple interface\n",
|
||||
"with comprehensive functionalities: 1) The layout detection models enable using\n",
|
||||
@@ -231,9 +226,7 @@
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='LayoutParser: A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1 (<28>), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1 Allen Institute for AI\\nshannons@allenai.org\\n2 Brown University\\nruochen zhang@brown.edu\\n3 Harvard University\\n{melissadell,jacob carlson}@fas.harvard.edu\\n4 University of Washington\\nbcgl@cs.washington.edu\\n5 University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser, an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io.\\nKeywords: Document Image Analysis · Deep Learning · Layout Analysis\\n· Character Recognition · Open Source library · Toolkit.\\n1\\nIntroduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classification [11,\\narXiv:2103.15348v2 [cs.CV] 21 Jun 2021\\n', lookup_str='', metadata={'file_path': 'example_data/layout-parser-paper.pdf', 'page_number': 1, 'total_pages': 16, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'LaTeX with hyperref', 'producer': 'pdfTeX-1.40.21', 'creationDate': 'D:20210622012710Z', 'modDate': 'D:20210622012710Z', 'trapped': '', 'encryption': None}, lookup_index=0)"
|
||||
]
|
||||
"text/plain": "Document(page_content='LayoutParser: A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1 (<28>), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1 Allen Institute for AI\\nshannons@allenai.org\\n2 Brown University\\nruochen zhang@brown.edu\\n3 Harvard University\\n{melissadell,jacob carlson}@fas.harvard.edu\\n4 University of Washington\\nbcgl@cs.washington.edu\\n5 University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser, an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io.\\nKeywords: Document Image Analysis · Deep Learning · Layout Analysis\\n· Character Recognition · Open Source library · Toolkit.\\n1\\nIntroduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classification [11,\\narXiv:2103.15348v2 [cs.CV] 21 Jun 2021\\n', lookup_str='', metadata={'file_path': 'example_data/layout-parser-paper.pdf', 'page_number': 1, 'total_pages': 16, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'LaTeX with hyperref', 'producer': 'pdfTeX-1.40.21', 'creationDate': 'D:20210622012710Z', 'modDate': 'D:20210622012710Z', 'trapped': '', 'encryption': None}, lookup_index=0)"
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
@@ -246,51 +239,53 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "278c881f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Fetching remote PDFs using Unstructured\n",
|
||||
"\n",
|
||||
"This covers how to load online pdfs into a document format that we can use downstream. This can be used for various online pdf sites such as https://open.umn.edu/opentextbooks/textbooks/ and https://arxiv.org/archive/\n",
|
||||
"\n",
|
||||
"Note: all other pdf loaders can also be used to fetch remote PDFs, but `OnlinePDFLoader` is a legacy function, and works specifically with `UnstructuredPDFLoader`.\n"
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0c2686fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import OnlinePDFLoader"
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "101e0b82",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = OnlinePDFLoader(\"https://arxiv.org/pdf/2302.03803.pdf\")"
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "be3ccbfa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "e1298dd6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -302,13 +297,17 @@
|
||||
],
|
||||
"source": [
|
||||
"print(data)"
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "05187b33",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
@@ -350,204 +349,55 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c90a5fe8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using PDFMiner to generate HTML text"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eb785e1c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This can be helpful for chunking texts semantically into sections as the output html content can be parsed via `BeautifulSoup` to get more structured and rich information about font size, page numbers, pdf headers/footers, etc."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "601000d7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PDFMinerPDFasHTMLLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a5525fb0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = PDFMinerPDFasHTMLLoader(\"example_data/layout-parser-paper.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "dac7ff68",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()[0] # entire pdf is loaded as a single Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "0ba9f645",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"soup = BeautifulSoup(data.page_content,'html.parser')\n",
|
||||
"content = soup.find_all('div')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "35304e21",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"cur_fs = None\n",
|
||||
"cur_text = ''\n",
|
||||
"snippets = [] # first collect all snippets that have the same font size\n",
|
||||
"for c in content:\n",
|
||||
" sp = c.find('span')\n",
|
||||
" if not sp:\n",
|
||||
" continue\n",
|
||||
" st = sp.get('style')\n",
|
||||
" if not st:\n",
|
||||
" continue\n",
|
||||
" fs = re.findall('font-size:(\\d+)px',st)\n",
|
||||
" if not fs:\n",
|
||||
" continue\n",
|
||||
" fs = int(fs[0])\n",
|
||||
" if not cur_fs:\n",
|
||||
" cur_fs = fs\n",
|
||||
" if fs == cur_fs:\n",
|
||||
" cur_text += c.text\n",
|
||||
" else:\n",
|
||||
" snippets.append((cur_text,cur_fs))\n",
|
||||
" cur_fs = fs\n",
|
||||
" cur_text = c.text\n",
|
||||
"snippets.append((cur_text,cur_fs))\n",
|
||||
"# Note: The above logic is very straightforward. One can also add more strategies such as removing duplicate snippets (as\n",
|
||||
"# headers/footers in a PDF appear on multiple pages so if we find duplicatess safe to assume that it is redundant info)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "af8adf2f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"cur_idx = -1\n",
|
||||
"semantic_snippets = []\n",
|
||||
"# Assumption: headings have higher font size than their respective content\n",
|
||||
"for s in snippets:\n",
|
||||
" # if current snippet's font size > previous section's heading => it is a new heading\n",
|
||||
" if not semantic_snippets or s[1] > semantic_snippets[cur_idx].metadata['heading_font']:\n",
|
||||
" metadata={'heading':s[0], 'content_font': 0, 'heading_font': s[1]}\n",
|
||||
" metadata.update(data.metadata)\n",
|
||||
" semantic_snippets.append(Document(page_content='',metadata=metadata))\n",
|
||||
" cur_idx += 1\n",
|
||||
" continue\n",
|
||||
" \n",
|
||||
" # if current snippet's font size <= previous section's content => content belongs to the same section (one can also create\n",
|
||||
" # a tree like structure for sub sections if needed but that may require some more thinking and may be data specific)\n",
|
||||
" if not semantic_snippets[cur_idx].metadata['content_font'] or s[1] <= semantic_snippets[cur_idx].metadata['content_font']:\n",
|
||||
" semantic_snippets[cur_idx].page_content += s[0]\n",
|
||||
" semantic_snippets[cur_idx].metadata['content_font'] = max(s[1], semantic_snippets[cur_idx].metadata['content_font'])\n",
|
||||
" continue\n",
|
||||
" \n",
|
||||
" # if current snippet's font size > previous section's content but less tha previous section's heading than also make a new \n",
|
||||
" # section (e.g. title of a pdf will have the highest font size but we don't want it to subsume all sections)\n",
|
||||
" metadata={'heading':s[0], 'content_font': 0, 'heading_font': s[1]}\n",
|
||||
" metadata.update(data.metadata)\n",
|
||||
" semantic_snippets.append(Document(page_content='',metadata=metadata))\n",
|
||||
" cur_idx += 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "db7f6674",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='Recently, various DL models and datasets have been developed for layout analysis\\ntasks. The dhSegment [22] utilizes fully convolutional networks [20] for segmen-\\ntation tasks on historical documents. Object detection-based methods like Faster\\nR-CNN [28] and Mask R-CNN [12] are used for identifying document elements [38]\\nand detecting tables [30, 26]. Most recently, Graph Neural Networks [29] have also\\nbeen used in table detection [27]. However, these models are usually implemented\\nindividually and there is no unified framework to load and use such models.\\nThere has been a surge of interest in creating open-source tools for document\\nimage processing: a search of document image analysis in Github leads to 5M\\nrelevant code pieces 6; yet most of them rely on traditional rule-based methods\\nor provide limited functionalities. The closest prior research to our work is the\\nOCR-D project7, which also tries to build a complete toolkit for DIA. However,\\nsimilar to the platform developed by Neudecker et al. [21], it is designed for\\nanalyzing historical documents, and provides no supports for recent DL models.\\nThe DocumentLayoutAnalysis project8 focuses on processing born-digital PDF\\ndocuments via analyzing the stored PDF data. Repositories like DeepLayout9\\nand Detectron2-PubLayNet10 are individual deep learning models trained on\\nlayout analysis datasets without support for the full DIA pipeline. The Document\\nAnalysis and Exploitation (DAE) platform [15] and the DeepDIVA project [2]\\naim to improve the reproducibility of DIA methods (or DL models), yet they\\nare not actively maintained. OCR engines like Tesseract [14], easyOCR11 and\\npaddleOCR12 usually do not come with comprehensive functionalities for other\\nDIA tasks like layout analysis.\\nRecent years have also seen numerous efforts to create libraries for promoting\\nreproducibility and reusability in the field of DL. Libraries like Dectectron2 [35],\\n6 The number shown is obtained by specifying the search type as ‘code’.\\n7 https://ocr-d.de/en/about\\n8 https://github.com/BobLd/DocumentLayoutAnalysis\\n9 https://github.com/leonlulu/DeepLayout\\n10 https://github.com/hpanwar08/detectron2\\n11 https://github.com/JaidedAI/EasyOCR\\n12 https://github.com/PaddlePaddle/PaddleOCR\\n4\\nZ. Shen et al.\\nFig. 1: The overall architecture of LayoutParser. For an input document image,\\nthe core LayoutParser library provides a set of off-the-shelf tools for layout\\ndetection, OCR, visualization, and storage, backed by a carefully designed layout\\ndata structure. LayoutParser also supports high level customization via efficient\\nlayout annotation and model training functions. These improve model accuracy\\non the target samples. The community platform enables the easy sharing of DIA\\nmodels and whole digitization pipelines to promote reusability and reproducibility.\\nA collection of detailed documentation, tutorials and exemplar projects make\\nLayoutParser easy to learn and use.\\nAllenNLP [8] and transformers [34] have provided the community with complete\\nDL-based support for developing and deploying models for general computer\\nvision and natural language processing problems. LayoutParser, on the other\\nhand, specializes specifically in DIA tasks. LayoutParser is also equipped with a\\ncommunity platform inspired by established model hubs such as Torch Hub [23]\\nand TensorFlow Hub [1]. It enables the sharing of pretrained models as well as\\nfull document processing pipelines that are unique to DIA tasks.\\nThere have been a variety of document data collections to facilitate the\\ndevelopment of DL models. Some examples include PRImA [3](magazine layouts),\\nPubLayNet [38](academic paper layouts), Table Bank [18](tables in academic\\npapers), Newspaper Navigator Dataset [16, 17](newspaper figure layouts) and\\nHJDataset [31](historical Japanese document layouts). A spectrum of models\\ntrained on these datasets are currently available in the LayoutParser model zoo\\nto support different use cases.\\n', metadata={'heading': '2 Related Work\\n', 'content_font': 9, 'heading_font': 11, 'source': 'example_data/layout-parser-paper.pdf'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"semantic_snippets[4]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cc2c2f4f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using PyMuPDF\n",
|
||||
"\n",
|
||||
"This is the fastest of the PDF parsing options, and contains detailed metadata about the PDF and its pages, as well as returns one document per page."
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "55f0c4d8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PyMuPDFLoader"
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "718cbfbc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = PyMuPDFLoader(\"example_data/layout-parser-paper.pdf\")"
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "f2f93a15",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "a24dfaa6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='LayoutParser: A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1 (<28>), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1 Allen Institute for AI\\nshannons@allenai.org\\n2 Brown University\\nruochen zhang@brown.edu\\n3 Harvard University\\n{melissadell,jacob carlson}@fas.harvard.edu\\n4 University of Washington\\nbcgl@cs.washington.edu\\n5 University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser, an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io.\\nKeywords: Document Image Analysis · Deep Learning · Layout Analysis\\n· Character Recognition · Open Source library · Toolkit.\\n1\\nIntroduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classification [11,\\narXiv:2103.15348v2 [cs.CV] 21 Jun 2021\\n', lookup_str='', metadata={'file_path': 'example_data/layout-parser-paper.pdf', 'page_number': 1, 'total_pages': 16, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'LaTeX with hyperref', 'producer': 'pdfTeX-1.40.21', 'creationDate': 'D:20210622012710Z', 'modDate': 'D:20210622012710Z', 'trapped': '', 'encryption': None}, lookup_index=0)"
|
||||
]
|
||||
"text/plain": "Document(page_content='LayoutParser: A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1 (<28>), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1 Allen Institute for AI\\nshannons@allenai.org\\n2 Brown University\\nruochen zhang@brown.edu\\n3 Harvard University\\n{melissadell,jacob carlson}@fas.harvard.edu\\n4 University of Washington\\nbcgl@cs.washington.edu\\n5 University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser, an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io.\\nKeywords: Document Image Analysis · Deep Learning · Layout Analysis\\n· Character Recognition · Open Source library · Toolkit.\\n1\\nIntroduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classification [11,\\narXiv:2103.15348v2 [cs.CV] 21 Jun 2021\\n', lookup_str='', metadata={'file_path': 'example_data/layout-parser-paper.pdf', 'page_number': 1, 'total_pages': 16, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'LaTeX with hyperref', 'producer': 'pdfTeX-1.40.21', 'creationDate': 'D:20210622012710Z', 'modDate': 'D:20210622012710Z', 'trapped': '', 'encryption': None}, lookup_index=0)"
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
@@ -556,30 +406,35 @@
|
||||
],
|
||||
"source": [
|
||||
"data[0]"
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "83cb52a0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Additionally, you can pass along any of the options from the [PyMuPDF documentation](https://pymupdf.readthedocs.io/en/latest/app1.html#plain-text/) as keyword arguments in the `load` call, and it will be pass along to the `get_text()` call."
|
||||
]
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1bf73c97",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain_dev",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "langchain_dev"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -591,7 +446,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1dc7df1d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Slack (Local Exported Zipfile)\n",
|
||||
"\n",
|
||||
"This notebook covers how to load documents from a Zipfile generated from a Slack export.\n",
|
||||
"\n",
|
||||
"In order to get this Slack export, follow these instructions:\n",
|
||||
"\n",
|
||||
"## 🧑 Instructions for ingesting your own dataset\n",
|
||||
"\n",
|
||||
"Export your Slack data. You can do this by going to your Workspace Management page and clicking the Import/Export option ({your_slack_domain}.slack.com/services/export). Then, choose the right date range and click `Start export`. Slack will send you an email and a DM when the export is ready.\n",
|
||||
"\n",
|
||||
"The download will produce a `.zip` file in your Downloads folder (or wherever your downloads can be found, depending on your OS configuration).\n",
|
||||
"\n",
|
||||
"Copy the path to the `.zip` file, and assign it as `LOCAL_ZIPFILE` below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "007c5cbf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import SlackDirectoryLoader "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a1caec59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Optionally set your Slack URL. This will give you proper URLs in the docs sources.\n",
|
||||
"SLACK_WORKSPACE_URL = \"https://xxx.slack.com\"\n",
|
||||
"LOCAL_ZIPFILE = \"\" # Paste the local paty to your Slack zip file here.\n",
|
||||
"\n",
|
||||
"loader = SlackDirectoryLoader(LOCAL_ZIPFILE, SLACK_WORKSPACE_URL)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b1c30ff7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -112,79 +112,6 @@
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "a2c1c79f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Playwright URL Loader\n",
|
||||
"\n",
|
||||
"This covers how to load HTML documents from a list of URLs using the `PlaywrightURLLoader`.\n",
|
||||
"\n",
|
||||
"As in the Selenium case, Playwright allows us to load pages that need JavaScript to render.\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To use the `PlaywrightURLLoader`, you will need to install `playwright` and `unstructured`. Additionally, you will need to install the Playwright Chromium browser:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "53158417",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install playwright\n",
|
||||
"!pip install \"playwright\"\n",
|
||||
"!pip install \"unstructured\"\n",
|
||||
"!playwright install"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0ab4e115",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PlaywrightURLLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ce5a9a0a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"urls = [\n",
|
||||
" \"https://www.youtube.com/watch?v=dQw4w9WgXcQ\",\n",
|
||||
" \"https://goo.gl/maps/NDSHwePEyaHMFGwh8\"\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2dc3e0bc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = PlaywrightURLLoader(urls=urls, remove_selectors=[\"header\", \"footer\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "10b79f80",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -203,7 +130,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -89,7 +89,7 @@
|
||||
"id": "150988e6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Loading multiple webpages\n",
|
||||
"# Loading multiple webpages\n",
|
||||
"\n",
|
||||
"You can also load multiple webpages at once by passing in a list of urls to the loader. This will return a list of documents in the same order as the urls passed in."
|
||||
]
|
||||
@@ -123,7 +123,7 @@
|
||||
"id": "641be294",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load multiple urls concurrently\n",
|
||||
"## Load multiple urls concurrently\n",
|
||||
"\n",
|
||||
"You can speed up the scraping process by scraping and parsing multiple urls concurrently.\n",
|
||||
"\n",
|
||||
|
||||
@@ -99,7 +99,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"loader = TextLoader('../state_of_the_union.txt', encoding='utf8')"
|
||||
"loader = TextLoader('../state_of_the_union.txt')"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,95 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "9fc6205b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Databerry\n",
|
||||
"\n",
|
||||
"This notebook shows how to use [Databerry's](https://www.databerry.ai/) retriever.\n",
|
||||
"\n",
|
||||
"First, you will need to sign up for Databerry, create a datastore, add some data and get your datastore api endpoint url"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "944e172b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query\n",
|
||||
"\n",
|
||||
"Now that our index is set up, we can set up a retriever and start querying it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "d0e6f506",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.retrievers import DataberryRetriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "f381f642",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = DataberryRetriever(\n",
|
||||
" datastore_url=\"https://clg1xg2h80000l708dymr0fxc.databerry.ai/query\",\n",
|
||||
" # api_key=\"DATABERRY_API_KEY\", # optional if datastore is public\n",
|
||||
" # top_k=10 # optional\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "20ae1a74",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='✨ Made with DaftpageOpen main menuPricingTemplatesLoginSearchHelpGetting StartedFeaturesAffiliate ProgramGetting StartedDaftpage is a new type of website builder that works like a doc.It makes website building easy, fun and offers tons of powerful features for free. Just type / in your page to get started!DaftpageCopyright © 2022 Daftpage, Inc.All rights reserved.ProductPricingTemplatesHelp & SupportHelp CenterGetting startedBlogCompanyAboutRoadmapTwitterAffiliate Program👾 Discord', metadata={'source': 'https:/daftpage.com/help/getting-started', 'score': 0.8697265}),\n",
|
||||
" Document(page_content=\"✨ Made with DaftpageOpen main menuPricingTemplatesLoginSearchHelpGetting StartedFeaturesAffiliate ProgramHelp CenterWelcome to Daftpage’s help center—the one-stop shop for learning everything about building websites with Daftpage.Daftpage is the simplest way to create websites for all purposes in seconds. Without knowing how to code, and for free!Get StartedDaftpage is a new type of website builder that works like a doc.It makes website building easy, fun and offers tons of powerful features for free. Just type / in your page to get started!Start here✨ Create your first site🧱 Add blocks🚀 PublishGuides🔖 Add a custom domainFeatures🔥 Drops🎨 Drawings👻 Ghost mode💀 Skeleton modeCant find the answer you're looking for?mail us at support@daftpage.comJoin the awesome Daftpage community on: 👾 DiscordDaftpageCopyright © 2022 Daftpage, Inc.All rights reserved.ProductPricingTemplatesHelp & SupportHelp CenterGetting startedBlogCompanyAboutRoadmapTwitterAffiliate Program👾 Discord\", metadata={'source': 'https:/daftpage.com/help', 'score': 0.86570895}),\n",
|
||||
" Document(page_content=\" is the simplest way to create websites for all purposes in seconds. Without knowing how to code, and for free!Get StartedDaftpage is a new type of website builder that works like a doc.It makes website building easy, fun and offers tons of powerful features for free. Just type / in your page to get started!Start here✨ Create your first site🧱 Add blocks🚀 PublishGuides🔖 Add a custom domainFeatures🔥 Drops🎨 Drawings👻 Ghost mode💀 Skeleton modeCant find the answer you're looking for?mail us at support@daftpage.comJoin the awesome Daftpage community on: 👾 DiscordDaftpageCopyright © 2022 Daftpage, Inc.All rights reserved.ProductPricingTemplatesHelp & SupportHelp CenterGetting startedBlogCompanyAboutRoadmapTwitterAffiliate Program👾 Discord\", metadata={'source': 'https:/daftpage.com/help', 'score': 0.8645384})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever.get_relevant_documents(\"What is Daftpage?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "ab66dd43",
|
||||
"metadata": {},
|
||||
@@ -10,12 +9,12 @@
|
||||
"\n",
|
||||
"This notebook goes over how to use a retriever that under the hood uses Pinecone and Hybrid Search.\n",
|
||||
"\n",
|
||||
"The logic of this retriever is taken from [this documentaion](https://docs.pinecone.io/docs/hybrid-search)"
|
||||
"The logic of this retriever is largely taken from [this blog post](https://www.pinecone.io/learn/hybrid-search-intro/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 75,
|
||||
"execution_count": 1,
|
||||
"id": "393ac030",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -32,61 +31,43 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "15390796",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pinecone # !pip install pinecone-client\n",
|
||||
"\n",
|
||||
"pinecone.init(\n",
|
||||
" api_key=\"...\", # API key here\n",
|
||||
" environment=\"...\" # find next to api key in console\n",
|
||||
")\n",
|
||||
"# choose a name for your index\n",
|
||||
"index_name = \"...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "95d5d7f9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You should only have to do this part once.\n",
|
||||
"\n",
|
||||
"Note: it's important to make sure that the \"context\" field that holds the document text in the metadata is not indexed. Currently you need to specify explicitly the fields you do want to index. For more information checkout Pinecone's [docs](https://docs.pinecone.io/docs/manage-indexes#selective-metadata-indexing)."
|
||||
"You should only have to do this part once."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"id": "3b8f7697",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"WhoAmIResponse(username='load', user_label='label', projectname='load-test')"
|
||||
]
|
||||
},
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import pinecone\n",
|
||||
"\n",
|
||||
"api_key = os.getenv(\"PINECONE_API_KEY\") or \"PINECONE_API_KEY\"\n",
|
||||
"# find environment next to your API key in the Pinecone console\n",
|
||||
"env = os.getenv(\"PINECONE_ENVIRONMENT\") or \"PINECONE_ENVIRONMENT\"\n",
|
||||
"\n",
|
||||
"index_name = \"langchain-pinecone-hybrid-search\"\n",
|
||||
"\n",
|
||||
"pinecone.init(api_key=api_key, enviroment=env)\n",
|
||||
"pinecone.whoami()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"execution_count": null,
|
||||
"id": "cfa3a8d8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
" # create the index\n",
|
||||
"# create the index\n",
|
||||
"pinecone.create_index(\n",
|
||||
" name = index_name,\n",
|
||||
" dimension = 1536, # dimensionality of dense model\n",
|
||||
" metric = \"dotproduct\", # sparse values supported only for dotproduct\n",
|
||||
" pod_type = \"s1\",\n",
|
||||
" metadata_config={\"indexed\": []} # see explaination above\n",
|
||||
" metric = \"dotproduct\",\n",
|
||||
" pod_type = \"s1\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -100,7 +81,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 78,
|
||||
"execution_count": 4,
|
||||
"id": "bcb3c8c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -109,19 +90,18 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "dbc025d6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get embeddings and sparse encoders\n",
|
||||
"## Get embeddings and tokenizers\n",
|
||||
"\n",
|
||||
"Embeddings are used for the dense vectors, tokenizer is used for the sparse vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 79,
|
||||
"execution_count": 5,
|
||||
"id": "2f63c911",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -130,51 +110,19 @@
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "96bf8879",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To encode the text to sparse values you can either choose SPLADE or BM25. For out of domain tasks we recommend using BM25.\n",
|
||||
"\n",
|
||||
"For more information about the sparse encoders you can checkout pinecone-text library [docs](https://pinecone-io.github.io/pinecone-text/pinecone_text.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 80,
|
||||
"execution_count": 6,
|
||||
"id": "c3f030e5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pinecone_text.sparse import BM25Encoder\n",
|
||||
"# or from pinecone_text.sparse import SpladeEncoder if you wish to work with SPLADE\n",
|
||||
"from transformers import BertTokenizerFast # !pip install transformers\n",
|
||||
"\n",
|
||||
"# use default tf-idf values\n",
|
||||
"bm25_encoder = BM25Encoder().default()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "23601ddb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above code is using default tfids values. It's highly recommended to fit the tf-idf values to your own corpus. You can do it as follow:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"corpus = [\"foo\", \"bar\", \"world\", \"hello\"]\n",
|
||||
"\n",
|
||||
"# fit tf-idf values on your corpus\n",
|
||||
"bm25_encoder.fit(corpus)\n",
|
||||
"\n",
|
||||
"# store the values to a json file\n",
|
||||
"bm25_encoder.dump(\"bm25_values.json\")\n",
|
||||
"\n",
|
||||
"# load to your BM25Encoder object\n",
|
||||
"bm25_encoder = BM25Encoder().load(\"bm25_values.json\")\n",
|
||||
"```"
|
||||
"# load bert tokenizer from huggingface\n",
|
||||
"tokenizer = BertTokenizerFast.from_pretrained(\n",
|
||||
" 'bert-base-uncased'\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -189,12 +137,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 81,
|
||||
"execution_count": 7,
|
||||
"id": "ac77d835",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = PineconeHybridSearchRetriever(embeddings=embeddings, sparse_encoder=bm25_encoder, index=index)"
|
||||
"retriever = PineconeHybridSearchRetriever(embeddings=embeddings, index=index, tokenizer=tokenizer)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -209,16 +157,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 82,
|
||||
"execution_count": 8,
|
||||
"id": "98b1c017",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████| 1/1 [00:02<00:00, 2.27s/it]\n"
|
||||
]
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "4d6f3ee7ca754d07a1a18d100d99e0cd",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/1 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -237,7 +192,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 83,
|
||||
"execution_count": 9,
|
||||
"id": "c0455218",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -247,7 +202,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 84,
|
||||
"execution_count": 10,
|
||||
"id": "7dfa5c29",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -257,7 +212,7 @@
|
||||
"Document(page_content='foo', metadata={})"
|
||||
]
|
||||
},
|
||||
"execution_count": 84,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -265,11 +220,19 @@
|
||||
"source": [
|
||||
"result[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "74bd9256",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -283,12 +246,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.13"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "7ec0d8babd8cabf695a1d94b1e586d626e046c9df609f6bad065d15d49f67f54"
|
||||
}
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,128 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ab66dd43",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SVM Retriever\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use a retriever that under the hood uses an SVM using scikit-learn.\n",
|
||||
"\n",
|
||||
"Largely based on https://github.com/karpathy/randomfun/blob/master/knn_vs_svm.ipynb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "393ac030",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.retrievers import SVMRetriever\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a801b57c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install scikit-learn"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "aaf80e7f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create New Retriever with Texts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "98b1c017",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = SVMRetriever.from_texts([\"foo\", \"bar\", \"world\", \"hello\", \"foo bar\"], OpenAIEmbeddings())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "08437fa2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use Retriever\n",
|
||||
"\n",
|
||||
"We can now use the retriever!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "c0455218",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = retriever.get_relevant_documents(\"foo\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "7dfa5c29",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='foo', metadata={}),\n",
|
||||
" Document(page_content='foo bar', metadata={}),\n",
|
||||
" Document(page_content='hello', metadata={}),\n",
|
||||
" Document(page_content='world', metadata={})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "74bd9256",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,213 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "a90b7557",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Time Weighted VectorStore Retriever\n",
|
||||
"\n",
|
||||
"This retriever uses a combination of semantic similarity and recency.\n",
|
||||
"\n",
|
||||
"The algorithm for scoring them is:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"semantic_similarity + (1.0 - decay_rate) ** hours_passed\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Notably, hours_passed refers to the hours passed since the object in the retriever **was last accessed**, not since it was created. This means that frequently accessed objects remain \"fresh.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "f22cc96b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import faiss\n",
|
||||
"\n",
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"from langchain.docstore import InMemoryDocstore\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.retrievers import TimeWeightedVectorStoreRetriever\n",
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain.vectorstores import FAISS\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "6af7ea6b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Low Decay Rate\n",
|
||||
"\n",
|
||||
"A low decay rate (in this, to be extreme, we will set close to 0) means memories will be \"remembered\" for longer. A decay rate of 0 means memories never be forgotten, making this retriever equivalent to the vector lookup."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "c10e7696",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define your embedding model\n",
|
||||
"embeddings_model = OpenAIEmbeddings()\n",
|
||||
"# Initialize the vectorstore as empty\n",
|
||||
"embedding_size = 1536\n",
|
||||
"index = faiss.IndexFlatL2(embedding_size)\n",
|
||||
"vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})\n",
|
||||
"retriever = TimeWeightedVectorStoreRetriever(vectorstore=vectorstore, decay_rate=.0000000000000000000000001, k=1) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "86dbadb9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['30fac87b-0357-450c-9a92-63e0147d005c']"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"yesterday = datetime.now() - timedelta(days=1)\n",
|
||||
"retriever.add_documents([Document(page_content=\"hello world\", metadata={\"last_accessed_at\": yesterday})])\n",
|
||||
"retriever.add_documents([Document(page_content=\"hello foo\")])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "a580be32",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='hello world', metadata={'last_accessed_at': datetime.datetime(2023, 4, 17, 6, 52, 55, 670204), 'created_at': datetime.datetime(2023, 4, 17, 6, 52, 54, 819346), 'buffer_idx': 0})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# \"Hello World\" is returned first because it is most salient, and the decay rate is close to 0., meaning it's still recent enough\n",
|
||||
"retriever.get_relevant_documents(\"hello world\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "ca056896",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## High Decay Rate\n",
|
||||
"\n",
|
||||
"With a high decay factor (e.g., several 9's), the recency score quickly goes to 0! If you set this all the way to 1, recency is 0 for all objects, once again making this equivalent to a vector lookup.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "dc37669b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define your embedding model\n",
|
||||
"embeddings_model = OpenAIEmbeddings()\n",
|
||||
"# Initialize the vectorstore as empty\n",
|
||||
"embedding_size = 1536\n",
|
||||
"index = faiss.IndexFlatL2(embedding_size)\n",
|
||||
"vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})\n",
|
||||
"retriever = TimeWeightedVectorStoreRetriever(vectorstore=vectorstore, decay_rate=.999, k=1) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "fa284384",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['a8d823e6-baee-46c8-aaa2-519cfb9b59ba']"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"yesterday = datetime.now() - timedelta(days=1)\n",
|
||||
"retriever.add_documents([Document(page_content=\"hello world\", metadata={\"last_accessed_at\": yesterday})])\n",
|
||||
"retriever.add_documents([Document(page_content=\"hello foo\")])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "7558f94d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='hello foo', metadata={'last_accessed_at': datetime.datetime(2023, 4, 17, 6, 52, 57, 11210), 'created_at': datetime.datetime(2023, 4, 17, 6, 52, 56, 17459), 'buffer_idx': 1})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# \"Hello Foo\" is returned first because \"hello world\" is mostly forgotten\n",
|
||||
"retriever.get_relevant_documents(\"hello world\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bf6d8c90",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,132 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ce0f17b9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Weaviate Hybrid Search\n",
|
||||
"\n",
|
||||
"This notebook shows how to use [Weaviate hybrid search](https://weaviate.io/blog/hybrid-search-explained) as a LangChain retriever."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "c10dd962",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import weaviate\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"WEAVIATE_URL = \"...\"\n",
|
||||
"client = weaviate.Client(\n",
|
||||
" url=WEAVIATE_URL,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "f47a2bfe",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever\n",
|
||||
"from langchain.schema import Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "f2eff08e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = WeaviateHybridSearchRetriever(client, index_name=\"LangChain\", text_key=\"text\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "cd8a7b17",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = [Document(page_content=\"foo\")]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "3c5970db",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['3f79d151-fb84-44cf-85e0-8682bfe145e0']"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever.add_documents(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "bf7dbb98",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='foo', metadata={})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever.get_relevant_documents(\"foo\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b2bc87c1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,572 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Annoy\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the Annoy vector database.\n",
|
||||
"\n",
|
||||
"> \"Annoy (Approximate Nearest Neighbors Oh Yeah) is a C++ library with Python bindings to search for points in space that are close to a given query point. It also creates large read-only file-based data structures that are mmapped into memory so that many processes may share the same data.\"\n",
|
||||
"\n",
|
||||
"via [Annoy](https://github.com/spotify/annoy) \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "3b450bdc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"```{note}\n",
|
||||
"Annoy is read-only - once the index is built you cannot add any more emebddings!\n",
|
||||
"If you want to progressively add to your VectorStore then better choose an alternative!\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "6613d222",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create VectorStore from texts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "dc7351b5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import HuggingFaceEmbeddings\n",
|
||||
"from langchain.vectorstores import Annoy\n",
|
||||
"\n",
|
||||
"embeddings_func = HuggingFaceEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "d2cb5f7d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"texts = [\"pizza is great\", \"I love salad\", \"my car\", \"a dog\"]\n",
|
||||
"\n",
|
||||
"# default metric is angular\n",
|
||||
"vector_store = Annoy.from_texts(texts, embeddings_func)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "a856b2d1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# allows for custom annoy parameters, defaults are n_trees=100, n_jobs=-1, metric=\"angular\"\n",
|
||||
"vector_store_v2 = Annoy.from_texts(\n",
|
||||
" texts, embeddings_func, metric=\"dot\", n_trees=100, n_jobs=1\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "8ada534a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='pizza is great', metadata={}),\n",
|
||||
" Document(page_content='I love salad', metadata={}),\n",
|
||||
" Document(page_content='my car', metadata={})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vector_store.similarity_search(\"food\", k=3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0470c5c8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(Document(page_content='pizza is great', metadata={}), 1.0944390296936035),\n",
|
||||
" (Document(page_content='I love salad', metadata={}), 1.1273186206817627),\n",
|
||||
" (Document(page_content='my car', metadata={}), 1.1580758094787598)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# the score is a distance metric, so lower is better\n",
|
||||
"vector_store.similarity_search_with_score(\"food\", k=3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "4583b231",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create VectorStore from docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "fbe898a8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "51ea6b5c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.', metadata={'source': '../../../state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \\n\\nIn this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \\n\\nLet each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \\n\\nPlease rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \\n\\nThroughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos. \\n\\nThey keep moving. \\n\\nAnd the costs and the threats to America and the world keep rising. \\n\\nThat’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. \\n\\nThe United States is a member along with 29 other nations. \\n\\nIt matters. American diplomacy matters. American resolve matters.', metadata={'source': '../../../state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='Putin’s latest attack on Ukraine was premeditated and unprovoked. \\n\\nHe rejected repeated efforts at diplomacy. \\n\\nHe thought the West and NATO wouldn’t respond. And he thought he could divide us at home. Putin was wrong. We were ready. Here is what we did. \\n\\nWe prepared extensively and carefully. \\n\\nWe spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin. \\n\\nI spent countless hours unifying our European allies. We shared with the world in advance what we knew Putin was planning and precisely how he would try to falsely justify his aggression. \\n\\nWe countered Russia’s lies with truth. \\n\\nAnd now that he has acted the free world is holding him accountable. \\n\\nAlong with twenty-seven members of the European Union including France, Germany, Italy, as well as countries like the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and many others, even Switzerland.', metadata={'source': '../../../state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='We are inflicting pain on Russia and supporting the people of Ukraine. Putin is now isolated from the world more than ever. \\n\\nTogether with our allies –we are right now enforcing powerful economic sanctions. \\n\\nWe are cutting off Russia’s largest banks from the international financial system. \\n\\nPreventing Russia’s central bank from defending the Russian Ruble making Putin’s $630 Billion “war fund” worthless. \\n\\nWe are choking off Russia’s access to technology that will sap its economic strength and weaken its military for years to come. \\n\\nTonight I say to the Russian oligarchs and corrupt leaders who have bilked billions of dollars off this violent regime no more. \\n\\nThe U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs. \\n\\nWe are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains.', metadata={'source': '../../../state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='And tonight I am announcing that we will join our allies in closing off American air space to all Russian flights – further isolating Russia – and adding an additional squeeze –on their economy. The Ruble has lost 30% of its value. \\n\\nThe Russian stock market has lost 40% of its value and trading remains suspended. Russia’s economy is reeling and Putin alone is to blame. \\n\\nTogether with our allies we are providing support to the Ukrainians in their fight for freedom. Military assistance. Economic assistance. Humanitarian assistance. \\n\\nWe are giving more than $1 Billion in direct assistance to Ukraine. \\n\\nAnd we will continue to aid the Ukrainian people as they defend their country and to help ease their suffering. \\n\\nLet me be clear, our forces are not engaged and will not engage in conflict with Russian forces in Ukraine. \\n\\nOur forces are not going to Europe to fight in Ukraine, but to defend our NATO Allies – in the event that Putin decides to keep moving west.', metadata={'source': '../../../state_of_the_union.txt'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[:5]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "d080985b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store_from_docs = Annoy.from_documents(docs, embeddings_func)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "4931cb99",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = vector_store_from_docs.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "97969d5b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Ac\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content[:100])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "79628542",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create VectorStore via existing embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "3432eddb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embs = embeddings_func.embed_documents(texts)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "b69f8408",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = list(zip(texts, embs))\n",
|
||||
"\n",
|
||||
"vector_store_from_embeddings = Annoy.from_embeddings(data, embeddings_func)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "e260758d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(Document(page_content='pizza is great', metadata={}), 1.0944390296936035),\n",
|
||||
" (Document(page_content='I love salad', metadata={}), 1.1273186206817627),\n",
|
||||
" (Document(page_content='my car', metadata={}), 1.1580758094787598)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vector_store_from_embeddings.similarity_search_with_score(\"food\", k=3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "341390c2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Search via embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "b9bce06d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"motorbike_emb = embeddings_func.embed_query(\"motorbike\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "af2552c9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='my car', metadata={}),\n",
|
||||
" Document(page_content='a dog', metadata={}),\n",
|
||||
" Document(page_content='pizza is great', metadata={})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vector_store.similarity_search_by_vector(motorbike_emb, k=3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "c7a1a924",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(Document(page_content='my car', metadata={}), 1.0870471000671387),\n",
|
||||
" (Document(page_content='a dog', metadata={}), 1.2095637321472168),\n",
|
||||
" (Document(page_content='pizza is great', metadata={}), 1.3254905939102173)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vector_store.similarity_search_with_score_by_vector(motorbike_emb, k=3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4b77be77",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Search via docstore id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "bbd971f0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{0: '2d1498a8-a37c-4798-acb9-0016504ed798',\n",
|
||||
" 1: '2d30aecc-88e0-4469-9d51-0ef7e9858e6d',\n",
|
||||
" 2: '927f1120-985b-4691-b577-ad5cb42e011c',\n",
|
||||
" 3: '3056ddcf-a62f-48c8-bd98-b9e57a3dfcae'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vector_store.index_to_docstore_id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "6dbf3365",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='pizza is great', metadata={})"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"some_docstore_id = 0 # texts[0]\n",
|
||||
"\n",
|
||||
"vector_store.docstore._dict[vector_store.index_to_docstore_id[some_docstore_id]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "98b27172",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(Document(page_content='pizza is great', metadata={}), 0.0),\n",
|
||||
" (Document(page_content='I love salad', metadata={}), 1.0734446048736572),\n",
|
||||
" (Document(page_content='my car', metadata={}), 1.2895267009735107)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# same document has distance 0\n",
|
||||
"vector_store.similarity_search_with_score_by_index(some_docstore_id, k=3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "6f570f69",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Save and load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "ef91cc69",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"saving config\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vector_store.save_local(\"my_annoy_index_and_docstore\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "7a9d2fce",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loaded_vector_store = Annoy.load_local(\n",
|
||||
" \"my_annoy_index_and_docstore\", embeddings=embeddings_func\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "bba77cae",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(Document(page_content='pizza is great', metadata={}), 0.0),\n",
|
||||
" (Document(page_content='I love salad', metadata={}), 1.0734446048736572),\n",
|
||||
" (Document(page_content='my car', metadata={}), 1.2895267009735107)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# same document has distance 0\n",
|
||||
"loaded_vector_store.similarity_search_with_score_by_index(some_docstore_id, k=3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "df4beb83",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Construct from scratch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "26fcf742",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import uuid\n",
|
||||
"from annoy import AnnoyIndex\n",
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"from langchain.docstore.in_memory import InMemoryDocstore\n",
|
||||
"\n",
|
||||
"metadatas = [{\"x\": \"food\"}, {\"x\": \"food\"}, {\"x\": \"stuff\"}, {\"x\": \"animal\"}]\n",
|
||||
"\n",
|
||||
"# embeddings\n",
|
||||
"embeddings = embeddings_func.embed_documents(texts)\n",
|
||||
"\n",
|
||||
"# embedding dim\n",
|
||||
"f = len(embeddings[0])\n",
|
||||
"\n",
|
||||
"# index\n",
|
||||
"metric = \"angular\"\n",
|
||||
"index = AnnoyIndex(f, metric=metric)\n",
|
||||
"for i, emb in enumerate(embeddings):\n",
|
||||
" index.add_item(i, emb)\n",
|
||||
"index.build(10)\n",
|
||||
"\n",
|
||||
"# docstore\n",
|
||||
"documents = []\n",
|
||||
"for i, text in enumerate(texts):\n",
|
||||
" metadata = metadatas[i] if metadatas else {}\n",
|
||||
" documents.append(Document(page_content=text, metadata=metadata))\n",
|
||||
"index_to_docstore_id = {i: str(uuid.uuid4()) for i in range(len(documents))}\n",
|
||||
"docstore = InMemoryDocstore(\n",
|
||||
" {index_to_docstore_id[i]: doc for i, doc in enumerate(documents)}\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"db_manually = Annoy(\n",
|
||||
" embeddings_func.embed_query, index, metric, docstore, index_to_docstore_id\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "2b3f6f5c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(Document(page_content='pizza is great', metadata={'x': 'food'}),\n",
|
||||
" 1.1314140558242798),\n",
|
||||
" (Document(page_content='I love salad', metadata={'x': 'food'}),\n",
|
||||
" 1.1668788194656372),\n",
|
||||
" (Document(page_content='my car', metadata={'x': 'stuff'}), 1.226445198059082)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db_manually.similarity_search_with_score(\"eating!\", k=3)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -17,36 +17,34 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!python3 -m pip install openai deeplake tiktoken"
|
||||
"!python3 -m pip install openai deeplake"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import DeepLake"
|
||||
"from langchain.vectorstores import DeepLake\n",
|
||||
"from langchain.document_loaders import TextLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
"os.environ['OPENAI_API_KEY'] = 'sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -62,38 +60,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"mem://langchain loaded successfully.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Evaluating ingest: 100%|██████████| 1/1 [00:04<00:00\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='mem://langchain', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (4, 1536) float32 None \n",
|
||||
" ids text (4, 1) str None \n",
|
||||
" metadata json (4, 1) str None \n",
|
||||
" text text (4, 1) str None \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = DeepLake.from_documents(docs, embeddings)\n",
|
||||
"\n",
|
||||
@@ -103,23 +72,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
@@ -134,18 +89,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/media/sdb/davit/.local/lib/python3.10/site-packages/langchain/llms/openai.py:624: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI`\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.llms import OpenAIChat\n",
|
||||
@@ -155,20 +101,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The president nominated Circuit Court of Appeals Judge Ketanji Brown Jackson for the United States Supreme Court and praised her qualifications and broad support from both Democrats and Republicans.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = 'What did the president say about Ketanji Brown Jackson'\n",
|
||||
"qa.run(query)"
|
||||
@@ -184,43 +119,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"mem://langchain loaded successfully.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Evaluating ingest: 100%|██████████| 1/1 [00:04<00:00\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='mem://langchain', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (42, 1536) float32 None \n",
|
||||
" ids text (42, 1) str None \n",
|
||||
" metadata json (42, 1) str None \n",
|
||||
" text text (42, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": []
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import random\n",
|
||||
"\n",
|
||||
@@ -232,30 +133,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████| 42/42 [00:00<00:00, 3456.17it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='Vice President Harris and I ran for office with a new economic vision for America. \\n\\nInvest in America. Educate Americans. Grow the workforce. Build the economy from the bottom up \\nand the middle out, not from the top down. \\n\\nBecause we know that when the middle class grows, the poor have a ladder up and the wealthy do very well. \\n\\nAmerica used to have the best roads, bridges, and airports on Earth. \\n\\nNow our infrastructure is ranked 13th in the world. \\n\\nWe won’t be able to compete for the jobs of the 21st Century if we don’t fix that. \\n\\nThat’s why it was so important to pass the Bipartisan Infrastructure Law—the most sweeping investment to rebuild America in history. \\n\\nThis was a bipartisan effort, and I want to thank the members of both parties who worked to make it happen. \\n\\nWe’re done talking about infrastructure weeks. \\n\\nWe’re going to have an infrastructure decade.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='It is going to transform America and put us on a path to win the economic competition of the 21st Century that we face with the rest of the world—particularly with China. \\n\\nAs I’ve told Xi Jinping, it is never a good bet to bet against the American people. \\n\\nWe’ll create good jobs for millions of Americans, modernizing roads, airports, ports, and waterways all across America. \\n\\nAnd we’ll do it all to withstand the devastating effects of the climate crisis and promote environmental justice. \\n\\nWe’ll build a national network of 500,000 electric vehicle charging stations, begin to replace poisonous lead pipes—so every child—and every American—has clean water to drink at home and at school, provide affordable high-speed internet for every American—urban, suburban, rural, and tribal communities. \\n\\n4,000 projects have already been announced. \\n\\nAnd tonight, I’m announcing that this year we will start fixing over 65,000 miles of highway and 1,500 bridges in disrepair.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db.similarity_search('What did the president say about Ketanji Brown Jackson', filter={'year': 2013})"
|
||||
]
|
||||
@@ -271,23 +151,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
|
||||
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='Tonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWe’ll also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLet’s pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLet’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2014})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db.similarity_search('What did the president say about Ketanji Brown Jackson?', distance_metric='cos')"
|
||||
]
|
||||
@@ -303,23 +169,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
|
||||
" Document(page_content='One was stationed at bases and breathing in toxic smoke from “burn pits” that incinerated wastes of war—medical and hazard material, jet fuel, and more. \\n\\nWhen they came home, many of the world’s fittest and best trained warriors were never the same. \\n\\nHeadaches. Numbness. Dizziness. \\n\\nA cancer that would put them in a flag-draped coffin. \\n\\nI know. \\n\\nOne of those soldiers was my son Major Beau Biden. \\n\\nWe don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. \\n\\nBut I’m committed to finding out everything we can. \\n\\nCommitted to military families like Danielle Robinson from Ohio. \\n\\nThe widow of Sergeant First Class Heath Robinson. \\n\\nHe was born a soldier. Army National Guard. Combat medic in Kosovo and Iraq. \\n\\nStationed near Baghdad, just yards from burn pits the size of football fields. \\n\\nHeath’s widow Danielle is here with us tonight. They loved going to Ohio State football games. He loved building Legos with their daughter.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2014}),\n",
|
||||
" Document(page_content='As Ohio Senator Sherrod Brown says, “It’s time to bury the label “Rust Belt.” \\n\\nIt’s time. \\n\\nBut with all the bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up with the bills. \\n\\nInflation is robbing them of the gains they might otherwise feel. \\n\\nI get it. That’s why my top priority is getting prices under control. \\n\\nLook, our economy roared back faster than most predicted, but the pandemic meant that businesses had a hard time hiring enough workers to keep up production in their factories. \\n\\nThe pandemic also disrupted global supply chains. \\n\\nWhen factories close, it takes longer to make goods and get them from the warehouse to the store, and prices go up. \\n\\nLook at cars. \\n\\nLast year, there weren’t enough semiconductors to make all the cars that people wanted to buy. \\n\\nAnd guess what, prices of automobiles went up. \\n\\nSo—we have a choice. \\n\\nOne way to fight inflation is to drive down wages and make Americans poorer.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
|
||||
" Document(page_content='We can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \\n\\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \\n\\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \\n\\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \\n\\nI’ve worked on these issues a long time. \\n\\nI know what works: Investing in crime preventionand community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db.max_marginal_relevance_search('What did the president say about Ketanji Brown Jackson?')"
|
||||
]
|
||||
@@ -335,87 +187,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ['ACTIVELOOP_TOKEN'] = getpass.getpass('Activeloop Token:')"
|
||||
"!activeloop login -t <token>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Your Deep Lake dataset has been successfully created!\n",
|
||||
"The dataset is private so make sure you are logged in!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\\"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/linkedin\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"hub://davitbun/linkedin loaded successfully.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Evaluating ingest: 100%|██████████| 1/1 [00:23<00:00\n",
|
||||
"/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://davitbun/linkedin', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (42, 1536) float32 None \n",
|
||||
" ids text (42, 1) str None \n",
|
||||
" metadata json (42, 1) str None \n",
|
||||
" text text (42, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Embed and store the texts\n",
|
||||
"dataset_path = f\"hub://{USERNAME}/{DATASET_NAME}\" # could be also ./local/path (much faster locally), s3://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
|
||||
"dataset_path = \"hub://{username}/{dataset_name}\" # could be also ./local/path (much faster locally), s3://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
|
||||
"\n",
|
||||
"embedding = OpenAIEmbeddings()\n",
|
||||
"vectordb = DeepLake.from_documents(documents=docs, embedding=embedding, dataset_path=dataset_path)"
|
||||
@@ -423,23 +209,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = db.similarity_search(query)\n",
|
||||
@@ -448,35 +220,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://davitbun/linkedin', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (42, 1536) float32 None \n",
|
||||
" ids text (42, 1) str None \n",
|
||||
" metadata json (42, 1) str None \n",
|
||||
" text text (42, 1) str None \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vectordb.ds.summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = vectordb.ds.embedding.numpy()"
|
||||
"vectordb.ds.summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -484,7 +232,9 @@
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"embeddings = vectordb.ds.embedding.numpy()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -55,7 +55,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docsearch = OpenSearchVectorSearch.from_documents(docs, embeddings, opensearch_url=\"http://localhost:9200\")\n",
|
||||
"docsearch = OpenSearchVectorSearch.from_texts(texts, embeddings, opensearch_url=\"http://localhost:9200\")\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
@@ -94,7 +94,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docsearch = OpenSearchVectorSearch.from_documents(docs, embeddings, opensearch_url=\"http://localhost:9200\", engine=\"faiss\", space_type=\"innerproduct\", ef_construction=256, m=48)\n",
|
||||
"docsearch = OpenSearchVectorSearch.from_texts(texts, embeddings, opensearch_url=\"http://localhost:9200\", engine=\"faiss\", space_type=\"innerproduct\", ef_construction=256, m=48)\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
@@ -133,7 +133,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docsearch = OpenSearchVectorSearch.from_documents(docs, embeddings, opensearch_url=\"http://localhost:9200\", is_appx_search=False)\n",
|
||||
"docsearch = OpenSearchVectorSearch.from_texts(texts, embeddings, opensearch_url=\"http://localhost:9200\", is_appx_search=False)\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(\"What did the president say about Ketanji Brown Jackson\", k=1, search_type=\"script_scoring\")"
|
||||
@@ -172,10 +172,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docsearch = OpenSearchVectorSearch.from_documents(docs, embeddings, opensearch_url=\"http://localhost:9200\", is_appx_search=False)\n",
|
||||
"docsearch = OpenSearchVectorSearch.from_texts(texts, embeddings, opensearch_url=\"http://localhost:9200\", is_appx_search=False)\n",
|
||||
"filter = {\"bool\": {\"filter\": {\"term\": {\"text\": \"smuggling\"}}}}\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(\"What did the president say about Ketanji Brown Jackson\", search_type=\"painless_scripting\", space_type=\"cosineSimilarity\", pre_filter=filter)"
|
||||
"docs = docsearch.similarity_search(\"What did the president say about Ketanji Brown Jackson\", search_type=\"painless_scripting\", space_type=\"cosinesimil\", pre_filter=filter)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -238,4 +238,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,34 +1,32 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Redis\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the [Redis vector database](https://redis.com/solutions/use-cases/vector-database/)."
|
||||
"This notebook shows how to use functionality related to the Redis database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores.redis import Redis"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
@@ -39,7 +37,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -48,7 +46,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -57,7 +55,7 @@
|
||||
"'link'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -68,7 +66,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -93,14 +91,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['doc:link:d7d02e3faf1b40bbbe29a683ff75b280']\n"
|
||||
"['doc:333eadf75bd74be393acafa8bca48669']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -110,7 +108,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -129,25 +127,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load from existing index\n",
|
||||
"#Query\n",
|
||||
"rds = Redis.from_existing_index(embeddings, redis_url=\"redis://localhost:6379\", index_name='link')\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
@@ -168,7 +152,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -177,7 +161,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -193,7 +177,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -202,13 +186,31 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Here we can see it doesn't return any results because there are no relevant documents\n",
|
||||
"retriever.get_relevant_documents(\"where did ankush go to college?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -227,7 +229,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -139,7 +139,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm_chain.predict(human_input=\"Not too bad - how are you?\")"
|
||||
"llm_chain.predict(human_input=\"Not to bad - how are you?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
"In order to add a memory with an external message store to an agent we are going to do the following steps:\n",
|
||||
"\n",
|
||||
"1. We are going to create a `RedisChatMessageHistory` to connect to an external database to store the messages in.\n",
|
||||
"2. We are going to create an `LLMChain` using that chat history as memory.\n",
|
||||
"2. We are going to create an `LLMChain` useing that chat history as memory.\n",
|
||||
"3. We are going to use that `LLMChain` to create a custom Agent.\n",
|
||||
"\n",
|
||||
"For the purposes of this exercise, we are going to create a simple custom Agent that has access to a search tool and utilizes the `ConversationBufferMemory` class."
|
||||
|
||||
@@ -1,196 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Motörhead Memory\n",
|
||||
"[Motörhead](https://github.com/getmetal/motorhead) is a memory server implemented in Rust. It automatically handles incremental summarization in the background and allows for stateless applications.\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"See instructions at [Motörhead](https://github.com/getmetal/motorhead) for running the server locally.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory.motorhead_memory import MotorheadMemory\n",
|
||||
"from langchain import OpenAI, LLMChain, PromptTemplate\n",
|
||||
"\n",
|
||||
"template = \"\"\"You are a chatbot having a conversation with a human.\n",
|
||||
"\n",
|
||||
"{chat_history}\n",
|
||||
"Human: {human_input}\n",
|
||||
"AI:\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(\n",
|
||||
" input_variables=[\"chat_history\", \"human_input\"], \n",
|
||||
" template=template\n",
|
||||
")\n",
|
||||
"memory = MotorheadMemory(\n",
|
||||
" session_id=\"testing-1\",\n",
|
||||
" url=\"http://localhost:8080\",\n",
|
||||
" memory_key=\"chat_history\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"await memory.init(); # loads previous state from Motörhead 🤘\n",
|
||||
"\n",
|
||||
"llm_chain = LLMChain(\n",
|
||||
" llm=OpenAI(), \n",
|
||||
" prompt=prompt, \n",
|
||||
" verbose=True, \n",
|
||||
" memory=memory,\n",
|
||||
")\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mYou are a chatbot having a conversation with a human.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Human: hi im bob\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Hi Bob, nice to meet you! How are you doing today?'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm_chain.run(\"hi im bob\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mYou are a chatbot having a conversation with a human.\n",
|
||||
"\n",
|
||||
"Human: hi im bob\n",
|
||||
"AI: Hi Bob, nice to meet you! How are you doing today?\n",
|
||||
"Human: whats my name?\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' You said your name is Bob. Is that correct?'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm_chain.run(\"whats my name?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mYou are a chatbot having a conversation with a human.\n",
|
||||
"\n",
|
||||
"Human: hi im bob\n",
|
||||
"AI: Hi Bob, nice to meet you! How are you doing today?\n",
|
||||
"Human: whats my name?\n",
|
||||
"AI: You said your name is Bob. Is that correct?\n",
|
||||
"Human: whats for dinner?\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" I'm sorry, I'm not sure what you're asking. Could you please rephrase your question?\""
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm_chain.run(\"whats for dinner?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "91c6a7ef",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Postgres Chat Message History\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use Postgres to store chat message history."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d15e3302",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import PostgresChatMessageHistory\n",
|
||||
"\n",
|
||||
"history = PostgresChatMessageHistory(connection_string=\"postgresql://postgres:mypassword@localhost/chat_history\", session_id=\"foo\")\n",
|
||||
"\n",
|
||||
"history.add_user_message(\"hi!\")\n",
|
||||
"\n",
|
||||
"history.add_ai_message(\"whats up?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64fc465e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"history.messages"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -314,7 +314,7 @@
|
||||
"source": [
|
||||
"## Saving Message History\n",
|
||||
"\n",
|
||||
"You may often have to save messages, and then load them to use again. This can be done easily by first converting the messages to normal python dictionaries, saving those (as json or something) and then loading those. Here is an example of doing that."
|
||||
"You may often to save messages, and then load them to use again. This can be done easily by first converting the messages to normal python dictionaries, saving those (as json or something) and then loading those. Here is an example of doing that."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -32,8 +32,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=10)\n",
|
||||
"memory.save_context({\"input\": \"hi\"}, {\"output\": \"whats up\"})\n",
|
||||
"memory.save_context({\"input\": \"not much you\"}, {\"output\": \"not much\"})"
|
||||
"memory.save_context({\"input\": \"hi\"}, {\"ouput\": \"whats up\"})\n",
|
||||
"memory.save_context({\"input\": \"not much you\"}, {\"ouput\": \"not much\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -73,8 +73,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"memory = ConversationSummaryBufferMemory(llm=llm, max_token_limit=10, return_messages=True)\n",
|
||||
"memory.save_context({\"input\": \"hi\"}, {\"output\": \"whats up\"})\n",
|
||||
"memory.save_context({\"input\": \"not much you\"}, {\"output\": \"not much\"})"
|
||||
"memory.save_context({\"input\": \"hi\"}, {\"ouput\": \"whats up\"})\n",
|
||||
"memory.save_context({\"input\": \"not much you\"}, {\"ouput\": \"not much\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,368 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ff4be5f3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# VectorStore-Backed Memory\n",
|
||||
"\n",
|
||||
"`VectorStoreRetrieverMemory` stores memories in a VectorDB and queries the top-K most \"salient\" docs every time it is called.\n",
|
||||
"\n",
|
||||
"This differs from most of the other Memory classes in that it doesn't explicitly track the order of interactions.\n",
|
||||
"\n",
|
||||
"In this case, the \"docs\" are previous conversation snippets. This can be useful to refer to relevant pieces of information that the AI was told earlier in the conversation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "da3384db",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.memory import VectorStoreRetrieverMemory\n",
|
||||
"from langchain.chains import ConversationChain\n",
|
||||
"from langchain.prompts import PromptTemplate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c2e7abdf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialize your VectorStore\n",
|
||||
"\n",
|
||||
"Depending on the store you choose, this step may look different. Consult the relevant VectorStore documentation for more details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "eef56f65",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import faiss\n",
|
||||
"\n",
|
||||
"from langchain.docstore import InMemoryDocstore\n",
|
||||
"from langchain.vectorstores import FAISS\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"embedding_size = 1536 # Dimensions of the OpenAIEmbeddings\n",
|
||||
"index = faiss.IndexFlatL2(embedding_size)\n",
|
||||
"embedding_fn = OpenAIEmbeddings().embed_query\n",
|
||||
"vectorstore = FAISS(embedding_fn, index, InMemoryDocstore({}), {})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8f4bdf92",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create your the VectorStoreRetrieverMemory\n",
|
||||
"\n",
|
||||
"The memory object is instantiated from any VectorStoreRetriever."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "e00d4938",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# In actual usage, you would set `k` to be a higher value, but we use k=1 to show that\n",
|
||||
"# the vector lookup still returns the semantically relevant information\n",
|
||||
"retriever = vectorstore.as_retriever(search_kwargs=dict(k=1))\n",
|
||||
"memory = VectorStoreRetrieverMemory(retriever=retriever)\n",
|
||||
"\n",
|
||||
"# When added to an agent, the memory object can save pertinent information from conversations or used tools\n",
|
||||
"memory.save_context({\"input\": \"My favorite food is pizza\"}, {\"output\": \"thats good to know\"})\n",
|
||||
"memory.save_context({\"input\": \"My favorite sport is soccer\"}, {\"output\": \"...\"})\n",
|
||||
"memory.save_context({\"input\": \"I don't the Celtics\"}, {\"output\": \"ok\"}) # "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "2fe28a28",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"input: My favorite sport is soccer\n",
|
||||
"output: ...\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Notice the first result returned is the memory pertaining to tax help, which the language model deems more semantically relevant\n",
|
||||
"# to a 1099 than the other documents, despite them both containing numbers.\n",
|
||||
"print(memory.load_memory_variables({\"prompt\": \"what sport should i watch?\"})[\"history\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a6d2569f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using in a chain\n",
|
||||
"Let's walk through an example, again setting `verbose=True` so we can see the prompt."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "ebd68c10",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Relevant pieces of previous conversation:\n",
|
||||
"input: My favorite food is pizza\n",
|
||||
"output: thats good to know\n",
|
||||
"\n",
|
||||
"(You do not need to use these pieces of information if not relevant)\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"Human: Hi, my name is Perry, what's up?\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" Hi Perry, I'm doing well. How about you?\""
|
||||
]
|
||||
},
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0) # Can be any valid LLM\n",
|
||||
"_DEFAULT_TEMPLATE = \"\"\"The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Relevant pieces of previous conversation:\n",
|
||||
"{history}\n",
|
||||
"\n",
|
||||
"(You do not need to use these pieces of information if not relevant)\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"Human: {input}\n",
|
||||
"AI:\"\"\"\n",
|
||||
"PROMPT = PromptTemplate(\n",
|
||||
" input_variables=[\"history\", \"input\"], template=_DEFAULT_TEMPLATE\n",
|
||||
")\n",
|
||||
"conversation_with_summary = ConversationChain(\n",
|
||||
" llm=llm, \n",
|
||||
" prompt=PROMPT,\n",
|
||||
" # We set a very low max_token_limit for the purposes of testing.\n",
|
||||
" memory=memory,\n",
|
||||
" verbose=True\n",
|
||||
")\n",
|
||||
"conversation_with_summary.predict(input=\"Hi, my name is Perry, what's up?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "86207a61",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Relevant pieces of previous conversation:\n",
|
||||
"input: My favorite sport is soccer\n",
|
||||
"output: ...\n",
|
||||
"\n",
|
||||
"(You do not need to use these pieces of information if not relevant)\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"Human: what's my favorite sport?\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' You told me earlier that your favorite sport is soccer.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Here, the basketball related content is surfaced\n",
|
||||
"conversation_with_summary.predict(input=\"what's my favorite sport?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"id": "8c669db1",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Relevant pieces of previous conversation:\n",
|
||||
"input: My favorite food is pizza\n",
|
||||
"output: thats good to know\n",
|
||||
"\n",
|
||||
"(You do not need to use these pieces of information if not relevant)\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"Human: Whats my favorite food\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' You said your favorite food is pizza.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Even though the language model is stateless, since relavent memory is fetched, it can \"reason\" about the time.\n",
|
||||
"# Timestamping memories and data is useful in general to let the agent determine temporal relevance\n",
|
||||
"conversation_with_summary.predict(input=\"Whats my favorite food\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"id": "8c09a239",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Relevant pieces of previous conversation:\n",
|
||||
"input: Hi, my name is Perry, what's up?\n",
|
||||
"response: Hi Perry, I'm doing well. How about you?\n",
|
||||
"\n",
|
||||
"(You do not need to use these pieces of information if not relevant)\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"Human: What's my name?\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Your name is Perry.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# The memories from the conversation are automatically stored,\n",
|
||||
"# since this query best matches the introduction chat above,\n",
|
||||
"# the agent is able to 'remember' the user's name.\n",
|
||||
"conversation_with_summary.predict(input=\"What's my name?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "df27c7dc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -115,7 +115,7 @@
|
||||
"id": "a2d76826",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**The above request should now appear on your [PromptLayer dashboard](https://www.promptlayer.com).**"
|
||||
"**The above request should now appear on your [PromptLayer dashboard](https://ww.promptlayer.com).**"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -60,14 +60,14 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 14.2 ms, sys: 4.9 ms, total: 19.1 ms\n",
|
||||
"Wall time: 1.1 s\n"
|
||||
"CPU times: user 30.7 ms, sys: 18.6 ms, total: 49.3 ms\n",
|
||||
"Wall time: 791 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
"\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was...two tired!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
@@ -91,14 +91,14 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 162 µs, sys: 7 µs, total: 169 µs\n",
|
||||
"Wall time: 175 µs\n"
|
||||
"CPU times: user 80 µs, sys: 0 ns, total: 80 µs\n",
|
||||
"Wall time: 83.9 µs\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
"\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was...two tired!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
@@ -252,249 +252,6 @@
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "684eab55",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## GPTCache\n",
|
||||
"\n",
|
||||
"We can use [GPTCache](https://github.com/zilliztech/GPTCache) for exact match caching OR to cache results based on semantic similarity\n",
|
||||
"\n",
|
||||
"Let's first start with an example of exact match"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "14a82124",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import gptcache\n",
|
||||
"from gptcache.processor.pre import get_prompt\n",
|
||||
"from gptcache.manager.factory import get_data_manager\n",
|
||||
"from langchain.cache import GPTCache\n",
|
||||
"\n",
|
||||
"# Avoid multiple caches using the same file, causing different llm model caches to affect each other\n",
|
||||
"i = 0\n",
|
||||
"file_prefix = \"data_map\"\n",
|
||||
"\n",
|
||||
"def init_gptcache_map(cache_obj: gptcache.Cache):\n",
|
||||
" global i\n",
|
||||
" cache_path = f'{file_prefix}_{i}.txt'\n",
|
||||
" cache_obj.init(\n",
|
||||
" pre_embedding_func=get_prompt,\n",
|
||||
" data_manager=get_data_manager(data_path=cache_path),\n",
|
||||
" )\n",
|
||||
" i += 1\n",
|
||||
"\n",
|
||||
"langchain.llm_cache = GPTCache(init_gptcache_map)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "9e4ecfd1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 8.6 ms, sys: 3.82 ms, total: 12.4 ms\n",
|
||||
"Wall time: 881 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# The first time, it is not yet in cache, so it should take longer\n",
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "c98bbe3b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 286 µs, sys: 21 µs, total: 307 µs\n",
|
||||
"Wall time: 316 µs\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# The second time it is, so it goes faster\n",
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "502b6076",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's now show an example of similarity caching"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "b3c663bb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import gptcache\n",
|
||||
"from gptcache.processor.pre import get_prompt\n",
|
||||
"from gptcache.manager.factory import get_data_manager\n",
|
||||
"from langchain.cache import GPTCache\n",
|
||||
"from gptcache.manager import get_data_manager, CacheBase, VectorBase\n",
|
||||
"from gptcache import Cache\n",
|
||||
"from gptcache.embedding import Onnx\n",
|
||||
"from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation\n",
|
||||
"\n",
|
||||
"# Avoid multiple caches using the same file, causing different llm model caches to affect each other\n",
|
||||
"i = 0\n",
|
||||
"file_prefix = \"data_map\"\n",
|
||||
"llm_cache = Cache()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def init_gptcache_map(cache_obj: gptcache.Cache):\n",
|
||||
" global i\n",
|
||||
" cache_path = f'{file_prefix}_{i}.txt'\n",
|
||||
" onnx = Onnx()\n",
|
||||
" cache_base = CacheBase('sqlite')\n",
|
||||
" vector_base = VectorBase('faiss', dimension=onnx.dimension)\n",
|
||||
" data_manager = get_data_manager(cache_base, vector_base, max_size=10, clean_size=2)\n",
|
||||
" cache_obj.init(\n",
|
||||
" pre_embedding_func=get_prompt,\n",
|
||||
" embedding_func=onnx.to_embeddings,\n",
|
||||
" data_manager=data_manager,\n",
|
||||
" similarity_evaluation=SearchDistanceEvaluation(),\n",
|
||||
" )\n",
|
||||
" i += 1\n",
|
||||
"\n",
|
||||
"langchain.llm_cache = GPTCache(init_gptcache_map)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "8c273ced",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 1.01 s, sys: 153 ms, total: 1.16 s\n",
|
||||
"Wall time: 2.49 s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# The first time, it is not yet in cache, so it should take longer\n",
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "93e21a5f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 745 ms, sys: 13.2 ms, total: 758 ms\n",
|
||||
"Wall time: 136 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# This is an exact match, so it finds it in the cache\n",
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "c4bb024b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 737 ms, sys: 7.79 ms, total: 745 ms\n",
|
||||
"Wall time: 135 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# This is not an exact match, but semantically within distance so it hits!\n",
|
||||
"llm(\"Tell me joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "934943dc",
|
||||
|
||||
@@ -43,18 +43,22 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tokens Used: 42\n",
|
||||
"\tPrompt Tokens: 4\n",
|
||||
"\tCompletion Tokens: 38\n",
|
||||
"Total Tokens: 39\n",
|
||||
"Prompt Tokens: 4\n",
|
||||
"Completion Tokens: 35\n",
|
||||
"Successful Requests: 1\n",
|
||||
"Total Cost (USD): $0.00084\n"
|
||||
"Total Cost (USD): $0.0007800000000000001\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"with get_openai_callback() as cb:\n",
|
||||
" result = llm(\"Tell me a joke\")\n",
|
||||
" print(cb)"
|
||||
" print(f\"Total Tokens: {cb.total_tokens}\")\n",
|
||||
" print(f\"Prompt Tokens: {cb.prompt_tokens}\")\n",
|
||||
" print(f\"Completion Tokens: {cb.completion_tokens}\")\n",
|
||||
" print(f\"Successful Requests: {cb.successful_requests}\")\n",
|
||||
" print(f\"Total Cost (USD): ${cb.total_cost}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -186,7 +186,7 @@
|
||||
"source": [
|
||||
"**Number of Tokens:** You can also estimate how many tokens a piece of text will be in that model. This is useful because models have a context length (and cost more for more tokens), which means you need to be aware of how long the text you are passing in is.\n",
|
||||
"\n",
|
||||
"Notice that by default the tokens are estimated using [tiktoken](https://github.com/openai/tiktoken) (except for legacy version <3.8, where a Hugging Face tokenizer is used)"
|
||||
"Notice that by default the tokens are estimated using a HuggingFace tokenizer."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# GPT4All\n",
|
||||
"# GPT4all\n",
|
||||
"\n",
|
||||
"This example goes over how to use LangChain to interact with GPT4All models"
|
||||
]
|
||||
@@ -15,7 +15,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install pyllamacpp > /dev/null"
|
||||
"!pip install pyllamacpp"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -24,10 +24,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate, LLMChain\n",
|
||||
"from langchain.llms import GPT4All\n",
|
||||
"from langchain.callbacks.base import CallbackManager\n",
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler"
|
||||
"from langchain import PromptTemplate, LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -43,70 +41,15 @@
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Specify Model\n",
|
||||
"\n",
|
||||
"To run locally, download a compatible ggml-formatted model. For more info, visit https://github.com/nomic-ai/pyllamacpp\n",
|
||||
"\n",
|
||||
"Note that new models are uploaded regularly - check the link above for the most recent `.bin` URL"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_path = './models/gpt4all-lora-quantized-ggml.bin' # replace with your desired local file path"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Uncomment the below block to download a model. You may want to update `url` to a new version."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import requests\n",
|
||||
"\n",
|
||||
"# from pathlib import Path\n",
|
||||
"# from tqdm import tqdm\n",
|
||||
"\n",
|
||||
"# Path(local_path).parent.mkdir(parents=True, exist_ok=True)\n",
|
||||
"\n",
|
||||
"# # Example model. Check https://github.com/nomic-ai/pyllamacpp for the latest models.\n",
|
||||
"# url = 'https://the-eye.eu/public/AI/models/nomic-ai/gpt4all/gpt4all-lora-quantized-ggml.bin'\n",
|
||||
"\n",
|
||||
"# # send a GET request to the URL to download the file. Stream since it's large\n",
|
||||
"# response = requests.get(url, stream=True)\n",
|
||||
"\n",
|
||||
"# # open the file in binary mode and write the contents of the response to it in chunks\n",
|
||||
"# # This is a large file, so be prepared to wait.\n",
|
||||
"# with open(local_path, 'wb') as f:\n",
|
||||
"# for chunk in tqdm(response.iter_content(chunk_size=8192)):\n",
|
||||
"# if chunk:\n",
|
||||
"# f.write(chunk)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Callbacks support token-wise streaming\n",
|
||||
"callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])\n",
|
||||
"# Verbose is required to pass to the callback manager\n",
|
||||
"llm = GPT4All(model=local_path, callback_manager=callback_manager, verbose=True)"
|
||||
"# You'll need to download a compatible model and convert it to ggml.\n",
|
||||
"# See: https://github.com/nomic-ai/gpt4all for more information.\n",
|
||||
"llm = GPT4All(model=\"./models/gpt4all-model.bin\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -146,7 +89,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": 3,
|
||||
"id": "3acf0069",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -20,7 +20,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The FIFA World Cup is a football tournament that is played every 4 years. The year 1994 was the 44th FIFA World Cup. The final answer: Brazil.\n"
|
||||
"The Seattle Seahawks won the Super Bowl in 2010. Justin Beiber was born in 2010. The final answer: Seattle Seahawks.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -33,7 +33,7 @@
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=HuggingFaceHub(repo_id=\"google/flan-t5-xl\", model_kwargs={\"temperature\":0, \"max_length\":64}))\n",
|
||||
"\n",
|
||||
"question = \"Who won the FIFA World Cup in the year 1994? \"\n",
|
||||
"question = \"What NFL team won the Super Bowl in the year Justin Beiber was born?\"\n",
|
||||
"\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
@@ -41,7 +41,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "843a3837",
|
||||
"id": "ae4559c7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -63,7 +63,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.12"
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -32,8 +32,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings(model=\"your-embeddings-deployment-name\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
"\n",
|
||||
"And then one optional one:\n",
|
||||
"\n",
|
||||
"- `parse_with_prompt(str, PromptValue) -> Any`: A method which takes in a string (assumed to be the response from a language model) and a prompt (assumed to the prompt that generated such a response) and parses it into some structure. The prompt is largely provided in the event the OutputParser wants to retry or fix the output in some way, and needs information from the prompt to do so.\n",
|
||||
"- `parse_with_prompt(str) -> Any`: A method which takes in a string (assumed to be the response from a language model) and a prompt (assumed to the prompt that generated such a response) and parses it into some structure. The prompt is largely provided in the event the OutputParser wants to retry or fix the output in some way, and needs information from the prompt to do so.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Below we go over the main type of output parser, the `PydanticOutputParser`. See the `examples` folder for other options."
|
||||
|
||||
@@ -55,12 +55,6 @@ The following use cases require specific installs and api keys:
|
||||
- _LlamaCpp_:
|
||||
- Install requirements with `pip install llama-cpp-python`
|
||||
- Download model and convert following [llama.cpp instructions](https://github.com/ggerganov/llama.cpp)
|
||||
- _Milvus_:
|
||||
- Install requirements with `pip install pymilvus`
|
||||
- In order to setup a local cluster, take a look [here](https://milvus.io/docs).
|
||||
- _Zilliz_:
|
||||
- Install requirements with `pip install pymilvus`
|
||||
- To get up and running, take a look [here](https://zilliz.com/doc/quick_start).
|
||||
|
||||
|
||||
If you are using the `NLTKTextSplitter` or the `SpacyTextSplitter`, you will also need to install the appropriate models. For example, if you want to use the `SpacyTextSplitter`, you will need to install the `en_core_web_sm` model with `python -m spacy download en_core_web_sm`. Similarly, if you want to use the `NLTKTextSplitter`, you will need to install the `punkt` model with `python -m nltk.downloader punkt`.
|
||||
|
||||
@@ -1,565 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "517a9fd4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BabyAGI User Guide\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to implement [BabyAGI](https://github.com/yoheinakajima/babyagi/tree/main) by [Yohei Nakajima](https://twitter.com/yoheinakajima). BabyAGI is an AI agent that can generate and pretend to execute tasks based on a given objective.\n",
|
||||
"\n",
|
||||
"This guide will help you understand the components to create your own recursive agents.\n",
|
||||
"\n",
|
||||
"Although BabyAGI uses specific vectorstores/model providers (Pinecone, OpenAI), one of the benefits of implementing it with LangChain is that you can easily swap those out for different options. In this implementation we use a FAISS vectorstore (because it runs locally and is free)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "556af556",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Install and Import Required Modules"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 116,
|
||||
"id": "c8a354b6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from collections import deque\n",
|
||||
"from typing import Dict, List, Optional, Any\n",
|
||||
"\n",
|
||||
"from langchain import LLMChain, OpenAI, PromptTemplate\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.llms import BaseLLM\n",
|
||||
"from langchain.vectorstores.base import VectorStore\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"from langchain.chains.base import Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09f70772",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Connect to the Vector Store\n",
|
||||
"\n",
|
||||
"Depending on what vectorstore you use, this step may look different."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 71,
|
||||
"id": "794045d4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.vectorstores import FAISS\n",
|
||||
"from langchain.docstore import InMemoryDocstore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"id": "6e0305eb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define your embedding model\n",
|
||||
"embeddings_model = OpenAIEmbeddings()\n",
|
||||
"# Initialize the vectorstore as empty\n",
|
||||
"import faiss\n",
|
||||
"\n",
|
||||
"embedding_size = 1536\n",
|
||||
"index = faiss.IndexFlatL2(embedding_size)\n",
|
||||
"vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0f3b72bf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Define the Chains\n",
|
||||
"\n",
|
||||
"BabyAGI relies on three LLM chains:\n",
|
||||
"- Task creation chain to select new tasks to add to the list\n",
|
||||
"- Task prioritization chain to re-prioritize tasks\n",
|
||||
"- Execution Chain to execute the tasks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"id": "bf4bd5cd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class TaskCreationChain(LLMChain):\n",
|
||||
" \"\"\"Chain to generates tasks.\"\"\"\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def from_llm(cls, llm: BaseLLM, verbose: bool = True) -> LLMChain:\n",
|
||||
" \"\"\"Get the response parser.\"\"\"\n",
|
||||
" task_creation_template = (\n",
|
||||
" \"You are a task creation AI that uses the result of an execution agent\"\n",
|
||||
" \" to create new tasks with the following objective: {objective},\"\n",
|
||||
" \" The last completed task has the result: {result}.\"\n",
|
||||
" \" This result was based on this task description: {task_description}.\"\n",
|
||||
" \" These are incomplete tasks: {incomplete_tasks}.\"\n",
|
||||
" \" Based on the result, create new tasks to be completed\"\n",
|
||||
" \" by the AI system that do not overlap with incomplete tasks.\"\n",
|
||||
" \" Return the tasks as an array.\"\n",
|
||||
" )\n",
|
||||
" prompt = PromptTemplate(\n",
|
||||
" template=task_creation_template,\n",
|
||||
" input_variables=[\n",
|
||||
" \"result\",\n",
|
||||
" \"task_description\",\n",
|
||||
" \"incomplete_tasks\",\n",
|
||||
" \"objective\",\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 74,
|
||||
"id": "b6488ffe",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class TaskPrioritizationChain(LLMChain):\n",
|
||||
" \"\"\"Chain to prioritize tasks.\"\"\"\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def from_llm(cls, llm: BaseLLM, verbose: bool = True) -> LLMChain:\n",
|
||||
" \"\"\"Get the response parser.\"\"\"\n",
|
||||
" task_prioritization_template = (\n",
|
||||
" \"You are a task prioritization AI tasked with cleaning the formatting of and reprioritizing\"\n",
|
||||
" \" the following tasks: {task_names}.\"\n",
|
||||
" \" Consider the ultimate objective of your team: {objective}.\"\n",
|
||||
" \" Do not remove any tasks. Return the result as a numbered list, like:\"\n",
|
||||
" \" #. First task\"\n",
|
||||
" \" #. Second task\"\n",
|
||||
" \" Start the task list with number {next_task_id}.\"\n",
|
||||
" )\n",
|
||||
" prompt = PromptTemplate(\n",
|
||||
" template=task_prioritization_template,\n",
|
||||
" input_variables=[\"task_names\", \"next_task_id\", \"objective\"],\n",
|
||||
" )\n",
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 84,
|
||||
"id": "b43cd580",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class ExecutionChain(LLMChain):\n",
|
||||
" \"\"\"Chain to execute tasks.\"\"\"\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def from_llm(cls, llm: BaseLLM, verbose: bool = True) -> LLMChain:\n",
|
||||
" \"\"\"Get the response parser.\"\"\"\n",
|
||||
" execution_template = (\n",
|
||||
" \"You are an AI who performs one task based on the following objective: {objective}.\"\n",
|
||||
" \" Take into account these previously completed tasks: {context}.\"\n",
|
||||
" \" Your task: {task}.\"\n",
|
||||
" \" Response:\"\n",
|
||||
" )\n",
|
||||
" prompt = PromptTemplate(\n",
|
||||
" template=execution_template,\n",
|
||||
" input_variables=[\"objective\", \"context\", \"task\"],\n",
|
||||
" )\n",
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3ad996c5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Define the BabyAGI Controller\n",
|
||||
"\n",
|
||||
"BabyAGI composes the chains defined above in a (potentially-)infinite loop."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 85,
|
||||
"id": "0ada0636",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_next_task(\n",
|
||||
" task_creation_chain: LLMChain,\n",
|
||||
" result: Dict,\n",
|
||||
" task_description: str,\n",
|
||||
" task_list: List[str],\n",
|
||||
" objective: str,\n",
|
||||
") -> List[Dict]:\n",
|
||||
" \"\"\"Get the next task.\"\"\"\n",
|
||||
" incomplete_tasks = \", \".join(task_list)\n",
|
||||
" response = task_creation_chain.run(\n",
|
||||
" result=result,\n",
|
||||
" task_description=task_description,\n",
|
||||
" incomplete_tasks=incomplete_tasks,\n",
|
||||
" objective=objective,\n",
|
||||
" )\n",
|
||||
" new_tasks = response.split(\"\\n\")\n",
|
||||
" return [{\"task_name\": task_name} for task_name in new_tasks if task_name.strip()]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 86,
|
||||
"id": "d35250ad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def prioritize_tasks(\n",
|
||||
" task_prioritization_chain: LLMChain,\n",
|
||||
" this_task_id: int,\n",
|
||||
" task_list: List[Dict],\n",
|
||||
" objective: str,\n",
|
||||
") -> List[Dict]:\n",
|
||||
" \"\"\"Prioritize tasks.\"\"\"\n",
|
||||
" task_names = [t[\"task_name\"] for t in task_list]\n",
|
||||
" next_task_id = int(this_task_id) + 1\n",
|
||||
" response = task_prioritization_chain.run(\n",
|
||||
" task_names=task_names, next_task_id=next_task_id, objective=objective\n",
|
||||
" )\n",
|
||||
" new_tasks = response.split(\"\\n\")\n",
|
||||
" prioritized_task_list = []\n",
|
||||
" for task_string in new_tasks:\n",
|
||||
" if not task_string.strip():\n",
|
||||
" continue\n",
|
||||
" task_parts = task_string.strip().split(\".\", 1)\n",
|
||||
" if len(task_parts) == 2:\n",
|
||||
" task_id = task_parts[0].strip()\n",
|
||||
" task_name = task_parts[1].strip()\n",
|
||||
" prioritized_task_list.append({\"task_id\": task_id, \"task_name\": task_name})\n",
|
||||
" return prioritized_task_list"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 87,
|
||||
"id": "e3f1840c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def _get_top_tasks(vectorstore, query: str, k: int) -> List[str]:\n",
|
||||
" \"\"\"Get the top k tasks based on the query.\"\"\"\n",
|
||||
" results = vectorstore.similarity_search_with_score(query, k=k)\n",
|
||||
" if not results:\n",
|
||||
" return []\n",
|
||||
" sorted_results, _ = zip(*sorted(results, key=lambda x: x[1], reverse=True))\n",
|
||||
" return [str(item.metadata[\"task\"]) for item in sorted_results]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def execute_task(\n",
|
||||
" vectorstore, execution_chain: LLMChain, objective: str, task: str, k: int = 5\n",
|
||||
") -> str:\n",
|
||||
" \"\"\"Execute a task.\"\"\"\n",
|
||||
" context = _get_top_tasks(vectorstore, query=objective, k=k)\n",
|
||||
" return execution_chain.run(objective=objective, context=context, task=task)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 137,
|
||||
"id": "1e978938",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class BabyAGI(Chain, BaseModel):\n",
|
||||
" \"\"\"Controller model for the BabyAGI agent.\"\"\"\n",
|
||||
"\n",
|
||||
" task_list: deque = Field(default_factory=deque)\n",
|
||||
" task_creation_chain: TaskCreationChain = Field(...)\n",
|
||||
" task_prioritization_chain: TaskPrioritizationChain = Field(...)\n",
|
||||
" execution_chain: ExecutionChain = Field(...)\n",
|
||||
" task_id_counter: int = Field(1)\n",
|
||||
" vectorstore: VectorStore = Field(init=False)\n",
|
||||
" max_iterations: Optional[int] = None\n",
|
||||
"\n",
|
||||
" class Config:\n",
|
||||
" \"\"\"Configuration for this pydantic object.\"\"\"\n",
|
||||
"\n",
|
||||
" arbitrary_types_allowed = True\n",
|
||||
"\n",
|
||||
" def add_task(self, task: Dict):\n",
|
||||
" self.task_list.append(task)\n",
|
||||
"\n",
|
||||
" def print_task_list(self):\n",
|
||||
" print(\"\\033[95m\\033[1m\" + \"\\n*****TASK LIST*****\\n\" + \"\\033[0m\\033[0m\")\n",
|
||||
" for t in self.task_list:\n",
|
||||
" print(str(t[\"task_id\"]) + \": \" + t[\"task_name\"])\n",
|
||||
"\n",
|
||||
" def print_next_task(self, task: Dict):\n",
|
||||
" print(\"\\033[92m\\033[1m\" + \"\\n*****NEXT TASK*****\\n\" + \"\\033[0m\\033[0m\")\n",
|
||||
" print(str(task[\"task_id\"]) + \": \" + task[\"task_name\"])\n",
|
||||
"\n",
|
||||
" def print_task_result(self, result: str):\n",
|
||||
" print(\"\\033[93m\\033[1m\" + \"\\n*****TASK RESULT*****\\n\" + \"\\033[0m\\033[0m\")\n",
|
||||
" print(result)\n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def input_keys(self) -> List[str]:\n",
|
||||
" return [\"objective\"]\n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def output_keys(self) -> List[str]:\n",
|
||||
" return []\n",
|
||||
"\n",
|
||||
" def _call(self, inputs: Dict[str, Any]) -> Dict[str, Any]:\n",
|
||||
" \"\"\"Run the agent.\"\"\"\n",
|
||||
" objective = inputs[\"objective\"]\n",
|
||||
" first_task = inputs.get(\"first_task\", \"Make a todo list\")\n",
|
||||
" self.add_task({\"task_id\": 1, \"task_name\": first_task})\n",
|
||||
" num_iters = 0\n",
|
||||
" while True:\n",
|
||||
" if self.task_list:\n",
|
||||
" self.print_task_list()\n",
|
||||
"\n",
|
||||
" # Step 1: Pull the first task\n",
|
||||
" task = self.task_list.popleft()\n",
|
||||
" self.print_next_task(task)\n",
|
||||
"\n",
|
||||
" # Step 2: Execute the task\n",
|
||||
" result = execute_task(\n",
|
||||
" self.vectorstore, self.execution_chain, objective, task[\"task_name\"]\n",
|
||||
" )\n",
|
||||
" this_task_id = int(task[\"task_id\"])\n",
|
||||
" self.print_task_result(result)\n",
|
||||
"\n",
|
||||
" # Step 3: Store the result in Pinecone\n",
|
||||
" result_id = f\"result_{task['task_id']}\"\n",
|
||||
" self.vectorstore.add_texts(\n",
|
||||
" texts=[result],\n",
|
||||
" metadatas=[{\"task\": task[\"task_name\"]}],\n",
|
||||
" ids=[result_id],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Step 4: Create new tasks and reprioritize task list\n",
|
||||
" new_tasks = get_next_task(\n",
|
||||
" self.task_creation_chain,\n",
|
||||
" result,\n",
|
||||
" task[\"task_name\"],\n",
|
||||
" [t[\"task_name\"] for t in self.task_list],\n",
|
||||
" objective,\n",
|
||||
" )\n",
|
||||
" for new_task in new_tasks:\n",
|
||||
" self.task_id_counter += 1\n",
|
||||
" new_task.update({\"task_id\": self.task_id_counter})\n",
|
||||
" self.add_task(new_task)\n",
|
||||
" self.task_list = deque(\n",
|
||||
" prioritize_tasks(\n",
|
||||
" self.task_prioritization_chain,\n",
|
||||
" this_task_id,\n",
|
||||
" list(self.task_list),\n",
|
||||
" objective,\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" num_iters += 1\n",
|
||||
" if self.max_iterations is not None and num_iters == self.max_iterations:\n",
|
||||
" print(\n",
|
||||
" \"\\033[91m\\033[1m\" + \"\\n*****TASK ENDING*****\\n\" + \"\\033[0m\\033[0m\"\n",
|
||||
" )\n",
|
||||
" break\n",
|
||||
" return {}\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def from_llm(\n",
|
||||
" cls, llm: BaseLLM, vectorstore: VectorStore, verbose: bool = False, **kwargs\n",
|
||||
" ) -> \"BabyAGI\":\n",
|
||||
" \"\"\"Initialize the BabyAGI Controller.\"\"\"\n",
|
||||
" task_creation_chain = TaskCreationChain.from_llm(llm, verbose=verbose)\n",
|
||||
" task_prioritization_chain = TaskPrioritizationChain.from_llm(\n",
|
||||
" llm, verbose=verbose\n",
|
||||
" )\n",
|
||||
" execution_chain = ExecutionChain.from_llm(llm, verbose=verbose)\n",
|
||||
" return cls(\n",
|
||||
" task_creation_chain=task_creation_chain,\n",
|
||||
" task_prioritization_chain=task_prioritization_chain,\n",
|
||||
" execution_chain=execution_chain,\n",
|
||||
" vectorstore=vectorstore,\n",
|
||||
" **kwargs,\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "05ba762e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Run the BabyAGI\n",
|
||||
"\n",
|
||||
"Now it's time to create the BabyAGI controller and watch it try to accomplish your objective."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 138,
|
||||
"id": "3d220b69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"OBJECTIVE = \"Write a weather report for SF today\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 139,
|
||||
"id": "8a8e5543",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 140,
|
||||
"id": "3d69899b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Logging of LLMChains\n",
|
||||
"verbose = False\n",
|
||||
"# If None, will keep on going forever\n",
|
||||
"max_iterations: Optional[int] = 3\n",
|
||||
"baby_agi = BabyAGI.from_llm(\n",
|
||||
" llm=llm, vectorstore=vectorstore, verbose=verbose, max_iterations=max_iterations\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 141,
|
||||
"id": "f7957b51",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[95m\u001b[1m\n",
|
||||
"*****TASK LIST*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"1: Make a todo list\n",
|
||||
"\u001b[92m\u001b[1m\n",
|
||||
"*****NEXT TASK*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"1: Make a todo list\n",
|
||||
"\u001b[93m\u001b[1m\n",
|
||||
"*****TASK RESULT*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"1. Check the temperature range for the day.\n",
|
||||
"2. Gather temperature data for SF today.\n",
|
||||
"3. Analyze the temperature data and create a weather report.\n",
|
||||
"4. Publish the weather report.\n",
|
||||
"\u001b[95m\u001b[1m\n",
|
||||
"*****TASK LIST*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"2: Gather data on the expected temperature range for the day.\n",
|
||||
"3: Collect data on the expected precipitation for the day.\n",
|
||||
"4: Analyze the data and create a weather report.\n",
|
||||
"5: Check the current weather conditions in SF.\n",
|
||||
"6: Publish the weather report.\n",
|
||||
"\u001b[92m\u001b[1m\n",
|
||||
"*****NEXT TASK*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"2: Gather data on the expected temperature range for the day.\n",
|
||||
"\u001b[93m\u001b[1m\n",
|
||||
"*****TASK RESULT*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"I have gathered data on the expected temperature range for the day in San Francisco. The forecast is for temperatures to range from a low of 55 degrees Fahrenheit to a high of 68 degrees Fahrenheit.\n",
|
||||
"\u001b[95m\u001b[1m\n",
|
||||
"*****TASK LIST*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"3: Check the current weather conditions in SF.\n",
|
||||
"4: Calculate the average temperature for the day in San Francisco.\n",
|
||||
"5: Determine the probability of precipitation for the day in San Francisco.\n",
|
||||
"6: Identify any potential weather warnings or advisories for the day in San Francisco.\n",
|
||||
"7: Research any historical weather patterns for the day in San Francisco.\n",
|
||||
"8: Compare the expected temperature range to the historical average for the day in San Francisco.\n",
|
||||
"9: Collect data on the expected precipitation for the day.\n",
|
||||
"10: Analyze the data and create a weather report.\n",
|
||||
"11: Publish the weather report.\n",
|
||||
"\u001b[92m\u001b[1m\n",
|
||||
"*****NEXT TASK*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"3: Check the current weather conditions in SF.\n",
|
||||
"\u001b[93m\u001b[1m\n",
|
||||
"*****TASK RESULT*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"I am checking the current weather conditions in SF. According to the data I have gathered, the temperature in SF today is currently around 65 degrees Fahrenheit with clear skies. The temperature range for the day is expected to be between 60 and 70 degrees Fahrenheit.\n",
|
||||
"\u001b[91m\u001b[1m\n",
|
||||
"*****TASK ENDING*****\n",
|
||||
"\u001b[0m\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'objective': 'Write a weather report for SF today'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 141,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"baby_agi({\"objective\": OBJECTIVE})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "898a210b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,647 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "517a9fd4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BabyAGI with Tools\n",
|
||||
"\n",
|
||||
"This notebook builds on top of [baby agi](baby_agi.ipynb), but shows how you can swap out the execution chain. The previous execution chain was just an LLM which made stuff up. By swapping it out with an agent that has access to tools, we can hopefully get real reliable information"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "556af556",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Install and Import Required Modules"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "c8a354b6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from collections import deque\n",
|
||||
"from typing import Dict, List, Optional, Any\n",
|
||||
"\n",
|
||||
"from langchain import LLMChain, OpenAI, PromptTemplate\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.llms import BaseLLM\n",
|
||||
"from langchain.vectorstores.base import VectorStore\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"from langchain.chains.base import Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09f70772",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Connect to the Vector Store\n",
|
||||
"\n",
|
||||
"Depending on what vectorstore you use, this step may look different."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "794045d4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install faiss-cpu > /dev/null\n",
|
||||
"%pip install google-search-results > /dev/null\n",
|
||||
"from langchain.vectorstores import FAISS\n",
|
||||
"from langchain.docstore import InMemoryDocstore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "6e0305eb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define your embedding model\n",
|
||||
"embeddings_model = OpenAIEmbeddings()\n",
|
||||
"# Initialize the vectorstore as empty\n",
|
||||
"import faiss\n",
|
||||
"\n",
|
||||
"embedding_size = 1536\n",
|
||||
"index = faiss.IndexFlatL2(embedding_size)\n",
|
||||
"vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0f3b72bf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Define the Chains\n",
|
||||
"\n",
|
||||
"BabyAGI relies on three LLM chains:\n",
|
||||
"- Task creation chain to select new tasks to add to the list\n",
|
||||
"- Task prioritization chain to re-prioritize tasks\n",
|
||||
"- Execution Chain to execute the tasks\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"NOTE: in this notebook, the Execution chain will now be an agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "bf4bd5cd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class TaskCreationChain(LLMChain):\n",
|
||||
" \"\"\"Chain to generates tasks.\"\"\"\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def from_llm(cls, llm: BaseLLM, verbose: bool = True) -> LLMChain:\n",
|
||||
" \"\"\"Get the response parser.\"\"\"\n",
|
||||
" task_creation_template = (\n",
|
||||
" \"You are an task creation AI that uses the result of an execution agent\"\n",
|
||||
" \" to create new tasks with the following objective: {objective},\"\n",
|
||||
" \" The last completed task has the result: {result}.\"\n",
|
||||
" \" This result was based on this task description: {task_description}.\"\n",
|
||||
" \" These are incomplete tasks: {incomplete_tasks}.\"\n",
|
||||
" \" Based on the result, create new tasks to be completed\"\n",
|
||||
" \" by the AI system that do not overlap with incomplete tasks.\"\n",
|
||||
" \" Return the tasks as an array.\"\n",
|
||||
" )\n",
|
||||
" prompt = PromptTemplate(\n",
|
||||
" template=task_creation_template,\n",
|
||||
" input_variables=[\n",
|
||||
" \"result\",\n",
|
||||
" \"task_description\",\n",
|
||||
" \"incomplete_tasks\",\n",
|
||||
" \"objective\",\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "b6488ffe",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class TaskPrioritizationChain(LLMChain):\n",
|
||||
" \"\"\"Chain to prioritize tasks.\"\"\"\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def from_llm(cls, llm: BaseLLM, verbose: bool = True) -> LLMChain:\n",
|
||||
" \"\"\"Get the response parser.\"\"\"\n",
|
||||
" task_prioritization_template = (\n",
|
||||
" \"You are an task prioritization AI tasked with cleaning the formatting of and reprioritizing\"\n",
|
||||
" \" the following tasks: {task_names}.\"\n",
|
||||
" \" Consider the ultimate objective of your team: {objective}.\"\n",
|
||||
" \" Do not remove any tasks. Return the result as a numbered list, like:\"\n",
|
||||
" \" #. First task\"\n",
|
||||
" \" #. Second task\"\n",
|
||||
" \" Start the task list with number {next_task_id}.\"\n",
|
||||
" )\n",
|
||||
" prompt = PromptTemplate(\n",
|
||||
" template=task_prioritization_template,\n",
|
||||
" input_variables=[\"task_names\", \"next_task_id\", \"objective\"],\n",
|
||||
" )\n",
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"id": "b43cd580",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import ZeroShotAgent, Tool, AgentExecutor\n",
|
||||
"from langchain import OpenAI, SerpAPIWrapper, LLMChain\n",
|
||||
"\n",
|
||||
"todo_prompt = PromptTemplate.from_template(\n",
|
||||
" \"You are a planner who is an expert at coming up with a todo list for a given objective. Come up with a todo list for this objective: {objective}\"\n",
|
||||
")\n",
|
||||
"todo_chain = LLMChain(llm=OpenAI(temperature=0), prompt=todo_prompt)\n",
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name=\"Search\",\n",
|
||||
" func=search.run,\n",
|
||||
" description=\"useful for when you need to answer questions about current events\",\n",
|
||||
" ),\n",
|
||||
" Tool(\n",
|
||||
" name=\"TODO\",\n",
|
||||
" func=todo_chain.run,\n",
|
||||
" description=\"useful for when you need to come up with todo lists. Input: an objective to create a todo list for. Output: a todo list for that objective. Please be very clear what the objective is!\",\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"prefix = \"\"\"You are an AI who performs one task based on the following objective: {objective}. Take into account these previously completed tasks: {context}.\"\"\"\n",
|
||||
"suffix = \"\"\"Question: {task}\n",
|
||||
"{agent_scratchpad}\"\"\"\n",
|
||||
"prompt = ZeroShotAgent.create_prompt(\n",
|
||||
" tools,\n",
|
||||
" prefix=prefix,\n",
|
||||
" suffix=suffix,\n",
|
||||
" input_variables=[\"objective\", \"task\", \"context\", \"agent_scratchpad\"],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3ad996c5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Define the BabyAGI Controller\n",
|
||||
"\n",
|
||||
"BabyAGI composes the chains defined above in a (potentially-)infinite loop."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"id": "0ada0636",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_next_task(\n",
|
||||
" task_creation_chain: LLMChain,\n",
|
||||
" result: Dict,\n",
|
||||
" task_description: str,\n",
|
||||
" task_list: List[str],\n",
|
||||
" objective: str,\n",
|
||||
") -> List[Dict]:\n",
|
||||
" \"\"\"Get the next task.\"\"\"\n",
|
||||
" incomplete_tasks = \", \".join(task_list)\n",
|
||||
" response = task_creation_chain.run(\n",
|
||||
" result=result,\n",
|
||||
" task_description=task_description,\n",
|
||||
" incomplete_tasks=incomplete_tasks,\n",
|
||||
" objective=objective,\n",
|
||||
" )\n",
|
||||
" new_tasks = response.split(\"\\n\")\n",
|
||||
" return [{\"task_name\": task_name} for task_name in new_tasks if task_name.strip()]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"id": "d35250ad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def prioritize_tasks(\n",
|
||||
" task_prioritization_chain: LLMChain,\n",
|
||||
" this_task_id: int,\n",
|
||||
" task_list: List[Dict],\n",
|
||||
" objective: str,\n",
|
||||
") -> List[Dict]:\n",
|
||||
" \"\"\"Prioritize tasks.\"\"\"\n",
|
||||
" task_names = [t[\"task_name\"] for t in task_list]\n",
|
||||
" next_task_id = int(this_task_id) + 1\n",
|
||||
" response = task_prioritization_chain.run(\n",
|
||||
" task_names=task_names, next_task_id=next_task_id, objective=objective\n",
|
||||
" )\n",
|
||||
" new_tasks = response.split(\"\\n\")\n",
|
||||
" prioritized_task_list = []\n",
|
||||
" for task_string in new_tasks:\n",
|
||||
" if not task_string.strip():\n",
|
||||
" continue\n",
|
||||
" task_parts = task_string.strip().split(\".\", 1)\n",
|
||||
" if len(task_parts) == 2:\n",
|
||||
" task_id = task_parts[0].strip()\n",
|
||||
" task_name = task_parts[1].strip()\n",
|
||||
" prioritized_task_list.append({\"task_id\": task_id, \"task_name\": task_name})\n",
|
||||
" return prioritized_task_list"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"id": "e3f1840c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def _get_top_tasks(vectorstore, query: str, k: int) -> List[str]:\n",
|
||||
" \"\"\"Get the top k tasks based on the query.\"\"\"\n",
|
||||
" results = vectorstore.similarity_search_with_score(query, k=k)\n",
|
||||
" if not results:\n",
|
||||
" return []\n",
|
||||
" sorted_results, _ = zip(*sorted(results, key=lambda x: x[1], reverse=True))\n",
|
||||
" return [str(item.metadata[\"task\"]) for item in sorted_results]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def execute_task(\n",
|
||||
" vectorstore, execution_chain: LLMChain, objective: str, task: str, k: int = 5\n",
|
||||
") -> str:\n",
|
||||
" \"\"\"Execute a task.\"\"\"\n",
|
||||
" context = _get_top_tasks(vectorstore, query=objective, k=k)\n",
|
||||
" return execution_chain.run(objective=objective, context=context, task=task)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"id": "1e978938",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class BabyAGI(Chain, BaseModel):\n",
|
||||
" \"\"\"Controller model for the BabyAGI agent.\"\"\"\n",
|
||||
"\n",
|
||||
" task_list: deque = Field(default_factory=deque)\n",
|
||||
" task_creation_chain: TaskCreationChain = Field(...)\n",
|
||||
" task_prioritization_chain: TaskPrioritizationChain = Field(...)\n",
|
||||
" execution_chain: AgentExecutor = Field(...)\n",
|
||||
" task_id_counter: int = Field(1)\n",
|
||||
" vectorstore: VectorStore = Field(init=False)\n",
|
||||
" max_iterations: Optional[int] = None\n",
|
||||
"\n",
|
||||
" class Config:\n",
|
||||
" \"\"\"Configuration for this pydantic object.\"\"\"\n",
|
||||
"\n",
|
||||
" arbitrary_types_allowed = True\n",
|
||||
"\n",
|
||||
" def add_task(self, task: Dict):\n",
|
||||
" self.task_list.append(task)\n",
|
||||
"\n",
|
||||
" def print_task_list(self):\n",
|
||||
" print(\"\\033[95m\\033[1m\" + \"\\n*****TASK LIST*****\\n\" + \"\\033[0m\\033[0m\")\n",
|
||||
" for t in self.task_list:\n",
|
||||
" print(str(t[\"task_id\"]) + \": \" + t[\"task_name\"])\n",
|
||||
"\n",
|
||||
" def print_next_task(self, task: Dict):\n",
|
||||
" print(\"\\033[92m\\033[1m\" + \"\\n*****NEXT TASK*****\\n\" + \"\\033[0m\\033[0m\")\n",
|
||||
" print(str(task[\"task_id\"]) + \": \" + task[\"task_name\"])\n",
|
||||
"\n",
|
||||
" def print_task_result(self, result: str):\n",
|
||||
" print(\"\\033[93m\\033[1m\" + \"\\n*****TASK RESULT*****\\n\" + \"\\033[0m\\033[0m\")\n",
|
||||
" print(result)\n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def input_keys(self) -> List[str]:\n",
|
||||
" return [\"objective\"]\n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def output_keys(self) -> List[str]:\n",
|
||||
" return []\n",
|
||||
"\n",
|
||||
" def _call(self, inputs: Dict[str, Any]) -> Dict[str, Any]:\n",
|
||||
" \"\"\"Run the agent.\"\"\"\n",
|
||||
" objective = inputs[\"objective\"]\n",
|
||||
" first_task = inputs.get(\"first_task\", \"Make a todo list\")\n",
|
||||
" self.add_task({\"task_id\": 1, \"task_name\": first_task})\n",
|
||||
" num_iters = 0\n",
|
||||
" while True:\n",
|
||||
" if self.task_list:\n",
|
||||
" self.print_task_list()\n",
|
||||
"\n",
|
||||
" # Step 1: Pull the first task\n",
|
||||
" task = self.task_list.popleft()\n",
|
||||
" self.print_next_task(task)\n",
|
||||
"\n",
|
||||
" # Step 2: Execute the task\n",
|
||||
" result = execute_task(\n",
|
||||
" self.vectorstore, self.execution_chain, objective, task[\"task_name\"]\n",
|
||||
" )\n",
|
||||
" this_task_id = int(task[\"task_id\"])\n",
|
||||
" self.print_task_result(result)\n",
|
||||
"\n",
|
||||
" # Step 3: Store the result in Pinecone\n",
|
||||
" result_id = f\"result_{task['task_id']}\"\n",
|
||||
" self.vectorstore.add_texts(\n",
|
||||
" texts=[result],\n",
|
||||
" metadatas=[{\"task\": task[\"task_name\"]}],\n",
|
||||
" ids=[result_id],\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Step 4: Create new tasks and reprioritize task list\n",
|
||||
" new_tasks = get_next_task(\n",
|
||||
" self.task_creation_chain,\n",
|
||||
" result,\n",
|
||||
" task[\"task_name\"],\n",
|
||||
" [t[\"task_name\"] for t in self.task_list],\n",
|
||||
" objective,\n",
|
||||
" )\n",
|
||||
" for new_task in new_tasks:\n",
|
||||
" self.task_id_counter += 1\n",
|
||||
" new_task.update({\"task_id\": self.task_id_counter})\n",
|
||||
" self.add_task(new_task)\n",
|
||||
" self.task_list = deque(\n",
|
||||
" prioritize_tasks(\n",
|
||||
" self.task_prioritization_chain,\n",
|
||||
" this_task_id,\n",
|
||||
" list(self.task_list),\n",
|
||||
" objective,\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" num_iters += 1\n",
|
||||
" if self.max_iterations is not None and num_iters == self.max_iterations:\n",
|
||||
" print(\n",
|
||||
" \"\\033[91m\\033[1m\" + \"\\n*****TASK ENDING*****\\n\" + \"\\033[0m\\033[0m\"\n",
|
||||
" )\n",
|
||||
" break\n",
|
||||
" return {}\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def from_llm(\n",
|
||||
" cls, llm: BaseLLM, vectorstore: VectorStore, verbose: bool = False, **kwargs\n",
|
||||
" ) -> \"BabyAGI\":\n",
|
||||
" \"\"\"Initialize the BabyAGI Controller.\"\"\"\n",
|
||||
" task_creation_chain = TaskCreationChain.from_llm(llm, verbose=verbose)\n",
|
||||
" task_prioritization_chain = TaskPrioritizationChain.from_llm(\n",
|
||||
" llm, verbose=verbose\n",
|
||||
" )\n",
|
||||
" llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
|
||||
" tool_names = [tool.name for tool in tools]\n",
|
||||
" agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names)\n",
|
||||
" agent_executor = AgentExecutor.from_agent_and_tools(\n",
|
||||
" agent=agent, tools=tools, verbose=True\n",
|
||||
" )\n",
|
||||
" return cls(\n",
|
||||
" task_creation_chain=task_creation_chain,\n",
|
||||
" task_prioritization_chain=task_prioritization_chain,\n",
|
||||
" execution_chain=agent_executor,\n",
|
||||
" vectorstore=vectorstore,\n",
|
||||
" **kwargs,\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "05ba762e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Run the BabyAGI\n",
|
||||
"\n",
|
||||
"Now it's time to create the BabyAGI controller and watch it try to accomplish your objective."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"id": "3d220b69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"OBJECTIVE = \"Write a weather report for SF today\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"id": "8a8e5543",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"id": "3d69899b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Logging of LLMChains\n",
|
||||
"verbose = False\n",
|
||||
"# If None, will keep on going forever\n",
|
||||
"max_iterations: Optional[int] = 3\n",
|
||||
"baby_agi = BabyAGI.from_llm(\n",
|
||||
" llm=llm, vectorstore=vectorstore, verbose=verbose, max_iterations=max_iterations\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"id": "f7957b51",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[95m\u001b[1m\n",
|
||||
"*****TASK LIST*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"1: Make a todo list\n",
|
||||
"\u001b[92m\u001b[1m\n",
|
||||
"*****NEXT TASK*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"1: Make a todo list\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to gather data on the current weather conditions in SF\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Current weather conditions in SF\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mHigh 67F. Winds WNW at 10 to 15 mph. Clear to partly cloudy.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to make a todo list\n",
|
||||
"Action: TODO\n",
|
||||
"Action Input: Write a weather report for SF today\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m\n",
|
||||
"\n",
|
||||
"1. Research current weather conditions in San Francisco\n",
|
||||
"2. Gather data on temperature, humidity, wind speed, and other relevant weather conditions\n",
|
||||
"3. Analyze data to determine current weather trends\n",
|
||||
"4. Write a brief introduction to the weather report\n",
|
||||
"5. Describe current weather conditions in San Francisco\n",
|
||||
"6. Discuss any upcoming weather changes\n",
|
||||
"7. Summarize the weather report\n",
|
||||
"8. Proofread and edit the report\n",
|
||||
"9. Submit the report\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: A weather report for SF today should include research on current weather conditions in San Francisco, gathering data on temperature, humidity, wind speed, and other relevant weather conditions, analyzing data to determine current weather trends, writing a brief introduction to the weather report, describing current weather conditions in San Francisco, discussing any upcoming weather changes, summarizing the weather report, proofreading and editing the report, and submitting the report.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001b[93m\u001b[1m\n",
|
||||
"*****TASK RESULT*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"A weather report for SF today should include research on current weather conditions in San Francisco, gathering data on temperature, humidity, wind speed, and other relevant weather conditions, analyzing data to determine current weather trends, writing a brief introduction to the weather report, describing current weather conditions in San Francisco, discussing any upcoming weather changes, summarizing the weather report, proofreading and editing the report, and submitting the report.\n",
|
||||
"\u001b[95m\u001b[1m\n",
|
||||
"*****TASK LIST*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"2: Gather data on temperature, humidity, wind speed, and other relevant weather conditions\n",
|
||||
"3: Analyze data to determine current weather trends\n",
|
||||
"4: Write a brief introduction to the weather report\n",
|
||||
"5: Describe current weather conditions in San Francisco\n",
|
||||
"6: Discuss any upcoming weather changes\n",
|
||||
"7: Summarize the weather report\n",
|
||||
"8: Proofread and edit the report\n",
|
||||
"9: Submit the report\n",
|
||||
"1: Research current weather conditions in San Francisco\n",
|
||||
"\u001b[92m\u001b[1m\n",
|
||||
"*****NEXT TASK*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"2: Gather data on temperature, humidity, wind speed, and other relevant weather conditions\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to search for the current weather conditions in SF\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Current weather conditions in SF\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mHigh 67F. Winds WNW at 10 to 15 mph. Clear to partly cloudy.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to make a todo list\n",
|
||||
"Action: TODO\n",
|
||||
"Action Input: Create a weather report for SF today\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m\n",
|
||||
"\n",
|
||||
"1. Gather current weather data for SF, including temperature, wind speed, humidity, and precipitation.\n",
|
||||
"2. Research historical weather data for SF to compare current conditions.\n",
|
||||
"3. Analyze current and historical data to determine any trends or patterns.\n",
|
||||
"4. Create a visual representation of the data, such as a graph or chart.\n",
|
||||
"5. Write a summary of the weather report, including key findings and any relevant information.\n",
|
||||
"6. Publish the weather report on a website or other platform.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Today in San Francisco, the temperature is 67F with winds WNW at 10 to 15 mph. The sky is clear to partly cloudy.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001b[93m\u001b[1m\n",
|
||||
"*****TASK RESULT*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"Today in San Francisco, the temperature is 67F with winds WNW at 10 to 15 mph. The sky is clear to partly cloudy.\n",
|
||||
"\u001b[95m\u001b[1m\n",
|
||||
"*****TASK LIST*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"3: Research current weather conditions in San Francisco\n",
|
||||
"4: Compare the current weather conditions in San Francisco to the average for this time of year.\n",
|
||||
"5: Identify any potential weather-related hazards in the area.\n",
|
||||
"6: Research any historical weather patterns in San Francisco.\n",
|
||||
"7: Analyze data to determine current weather trends\n",
|
||||
"8: Include any relevant data from nearby cities in the report.\n",
|
||||
"9: Include any relevant data from the National Weather Service in the report.\n",
|
||||
"10: Include any relevant data from local news sources in the report.\n",
|
||||
"11: Include any relevant data from online weather sources in the report.\n",
|
||||
"12: Include any relevant data from local meteorologists in the report.\n",
|
||||
"13: Include any relevant data from local weather stations in the report.\n",
|
||||
"14: Include any relevant data from satellite images in the report.\n",
|
||||
"15: Describe current weather conditions in San Francisco\n",
|
||||
"16: Discuss any upcoming weather changes\n",
|
||||
"17: Write a brief introduction to the weather report\n",
|
||||
"18: Summarize the weather report\n",
|
||||
"19: Proofread and edit the report\n",
|
||||
"20: Submit the report\n",
|
||||
"\u001b[92m\u001b[1m\n",
|
||||
"*****NEXT TASK*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"3: Research current weather conditions in San Francisco\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to search for current weather conditions in San Francisco\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Current weather conditions in San Francisco\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mTodaySun 04/09 High 67 · 1% Precip. ; TonightSun 04/09 Low 49 · 9% Precip. ; TomorrowMon 04/10 High 64 · 11% Precip.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Today in San Francisco, the high temperature is 67 degrees with 1% chance of precipitation. The low temperature tonight is 49 degrees with 9% chance of precipitation. Tomorrow's high temperature is 64 degrees with 11% chance of precipitation.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001b[93m\u001b[1m\n",
|
||||
"*****TASK RESULT*****\n",
|
||||
"\u001b[0m\u001b[0m\n",
|
||||
"Today in San Francisco, the high temperature is 67 degrees with 1% chance of precipitation. The low temperature tonight is 49 degrees with 9% chance of precipitation. Tomorrow's high temperature is 64 degrees with 11% chance of precipitation.\n",
|
||||
"\u001b[91m\u001b[1m\n",
|
||||
"*****TASK ENDING*****\n",
|
||||
"\u001b[0m\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'objective': 'Write a weather report for SF today'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 54,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"baby_agi({\"objective\": OBJECTIVE})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "898a210b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,693 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# CAMEL Role-Playing Autonomous Cooperative Agents\n",
|
||||
"\n",
|
||||
"This is a langchain implementation of paper: \"CAMEL: Communicative Agents for “Mind” Exploration of Large Scale Language Model Society\".\n",
|
||||
"\n",
|
||||
"Overview:\n",
|
||||
"\n",
|
||||
"The rapid advancement of conversational and chat-based language models has led to remarkable progress in complex task-solving. However, their success heavily relies on human input to guide the conversation, which can be challenging and time-consuming. This paper explores the potential of building scalable techniques to facilitate autonomous cooperation among communicative agents and provide insight into their \"cognitive\" processes. To address the challenges of achieving autonomous cooperation, we propose a novel communicative agent framework named role-playing. Our approach involves using inception prompting to guide chat agents toward task completion while maintaining consistency with human intentions. We showcase how role-playing can be used to generate conversational data for studying the behaviors and capabilities of chat agents, providing a valuable resource for investigating conversational language models. Our contributions include introducing a novel communicative agent framework, offering a scalable approach for studying the cooperative behaviors and capabilities of multi-agent systems, and open-sourcing our library to support research on communicative agents and beyond.\n",
|
||||
"\n",
|
||||
"The original implementation: https://github.com/lightaime/camel\n",
|
||||
"\n",
|
||||
"Project website: https://www.camel-ai.org/\n",
|
||||
"\n",
|
||||
"Arxiv paper: https://arxiv.org/abs/2303.17760\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Import LangChain related modules "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.prompts.chat import (\n",
|
||||
" SystemMessagePromptTemplate,\n",
|
||||
" HumanMessagePromptTemplate,\n",
|
||||
")\n",
|
||||
"from langchain.schema import (\n",
|
||||
" AIMessage,\n",
|
||||
" HumanMessage,\n",
|
||||
" SystemMessage,\n",
|
||||
" BaseMessage,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Define a CAMEL agent helper class"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class CAMELAgent:\n",
|
||||
"\n",
|
||||
" def __init__(\n",
|
||||
" self,\n",
|
||||
" system_message: SystemMessage,\n",
|
||||
" model: ChatOpenAI,\n",
|
||||
" ) -> None:\n",
|
||||
" self.system_message = system_message\n",
|
||||
" self.model = model\n",
|
||||
" self.init_messages()\n",
|
||||
"\n",
|
||||
" def reset(self) -> None:\n",
|
||||
" self.init_messages()\n",
|
||||
" return self.stored_messages\n",
|
||||
"\n",
|
||||
" def init_messages(self) -> None:\n",
|
||||
" self.stored_messages = [self.system_message]\n",
|
||||
"\n",
|
||||
" def update_messages(self, message: BaseMessage) -> List[BaseMessage]:\n",
|
||||
" self.stored_messages.append(message)\n",
|
||||
" return self.stored_messages\n",
|
||||
"\n",
|
||||
" def step(\n",
|
||||
" self,\n",
|
||||
" input_message: HumanMessage,\n",
|
||||
" ) -> AIMessage:\n",
|
||||
" messages = self.update_messages(input_message)\n",
|
||||
"\n",
|
||||
" output_message = self.model(messages)\n",
|
||||
" self.update_messages(output_message)\n",
|
||||
"\n",
|
||||
" return output_message\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup OpenAI API key and roles and task for role-playing"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
|
||||
"\n",
|
||||
"assistant_role_name = \"Python Programmer\"\n",
|
||||
"user_role_name = \"Stock Trader\"\n",
|
||||
"task = \"Develop a trading bot for the stock market\"\n",
|
||||
"word_limit = 50 # word limit for task brainstorming"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a task specify agent for brainstorming and get the specified task"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Specified task: Develop a Python-based swing trading bot that scans market trends, monitors stocks, and generates trading signals to help a stock trader to place optimal buy and sell orders with defined stop losses and profit targets.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"task_specifier_sys_msg = SystemMessage(content=\"You can make a task more specific.\")\n",
|
||||
"task_specifier_prompt = (\n",
|
||||
"\"\"\"Here is a task that {assistant_role_name} will help {user_role_name} to complete: {task}.\n",
|
||||
"Please make it more specific. Be creative and imaginative.\n",
|
||||
"Please reply with the specified task in {word_limit} words or less. Do not add anything else.\"\"\"\n",
|
||||
")\n",
|
||||
"task_specifier_template = HumanMessagePromptTemplate.from_template(template=task_specifier_prompt)\n",
|
||||
"task_specify_agent = CAMELAgent(task_specifier_sys_msg, ChatOpenAI(temperature=1.0))\n",
|
||||
"task_specifier_msg = task_specifier_template.format_messages(assistant_role_name=assistant_role_name,\n",
|
||||
" user_role_name=user_role_name,\n",
|
||||
" task=task, word_limit=word_limit)[0]\n",
|
||||
"specified_task_msg = task_specify_agent.step(task_specifier_msg)\n",
|
||||
"print(f\"Specified task: {specified_task_msg.content}\")\n",
|
||||
"specified_task = specified_task_msg.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create inception prompts for AI assistant and AI user for role-playing"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"assistant_inception_prompt = (\n",
|
||||
"\"\"\"Never forget you are a {assistant_role_name} and I am a {user_role_name}. Never flip roles! Never instruct me!\n",
|
||||
"We share a common interest in collaborating to successfully complete a task.\n",
|
||||
"You must help me to complete the task.\n",
|
||||
"Here is the task: {task}. Never forget our task!\n",
|
||||
"I must instruct you based on your expertise and my needs to complete the task.\n",
|
||||
"\n",
|
||||
"I must give you one instruction at a time.\n",
|
||||
"You must write a specific solution that appropriately completes the requested instruction.\n",
|
||||
"You must decline my instruction honestly if you cannot perform the instruction due to physical, moral, legal reasons or your capability and explain the reasons.\n",
|
||||
"Do not add anything else other than your solution to my instruction.\n",
|
||||
"You are never supposed to ask me any questions you only answer questions.\n",
|
||||
"You are never supposed to reply with a flake solution. Explain your solutions.\n",
|
||||
"Your solution must be declarative sentences and simple present tense.\n",
|
||||
"Unless I say the task is completed, you should always start with:\n",
|
||||
"\n",
|
||||
"Solution: <YOUR_SOLUTION>\n",
|
||||
"\n",
|
||||
"<YOUR_SOLUTION> should be specific and provide preferable implementations and examples for task-solving.\n",
|
||||
"Always end <YOUR_SOLUTION> with: Next request.\"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"user_inception_prompt = (\n",
|
||||
"\"\"\"Never forget you are a {user_role_name} and I am a {assistant_role_name}. Never flip roles! You will always instruct me.\n",
|
||||
"We share a common interest in collaborating to successfully complete a task.\n",
|
||||
"I must help you to complete the task.\n",
|
||||
"Here is the task: {task}. Never forget our task!\n",
|
||||
"You must instruct me based on my expertise and your needs to complete the task ONLY in the following two ways:\n",
|
||||
"\n",
|
||||
"1. Instruct with a necessary input:\n",
|
||||
"Instruction: <YOUR_INSTRUCTION>\n",
|
||||
"Input: <YOUR_INPUT>\n",
|
||||
"\n",
|
||||
"2. Instruct without any input:\n",
|
||||
"Instruction: <YOUR_INSTRUCTION>\n",
|
||||
"Input: None\n",
|
||||
"\n",
|
||||
"The \"Instruction\" describes a task or question. The paired \"Input\" provides further context or information for the requested \"Instruction\".\n",
|
||||
"\n",
|
||||
"You must give me one instruction at a time.\n",
|
||||
"I must write a response that appropriately completes the requested instruction.\n",
|
||||
"I must decline your instruction honestly if I cannot perform the instruction due to physical, moral, legal reasons or my capability and explain the reasons.\n",
|
||||
"You should instruct me not ask me questions.\n",
|
||||
"Now you must start to instruct me using the two ways described above.\n",
|
||||
"Do not add anything else other than your instruction and the optional corresponding input!\n",
|
||||
"Keep giving me instructions and necessary inputs until you think the task is completed.\n",
|
||||
"When the task is completed, you must only reply with a single word <CAMEL_TASK_DONE>.\n",
|
||||
"Never say <CAMEL_TASK_DONE> unless my responses have solved your task.\"\"\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a helper helper to get system messages for AI assistant and AI user from role names and the task"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_sys_msgs(assistant_role_name: str, user_role_name: str, task: str):\n",
|
||||
" \n",
|
||||
" assistant_sys_template = SystemMessagePromptTemplate.from_template(template=assistant_inception_prompt)\n",
|
||||
" assistant_sys_msg = assistant_sys_template.format_messages(assistant_role_name=assistant_role_name, user_role_name=user_role_name, task=task)[0]\n",
|
||||
" \n",
|
||||
" user_sys_template = SystemMessagePromptTemplate.from_template(template=user_inception_prompt)\n",
|
||||
" user_sys_msg = user_sys_template.format_messages(assistant_role_name=assistant_role_name, user_role_name=user_role_name, task=task)[0]\n",
|
||||
" \n",
|
||||
" return assistant_sys_msg, user_sys_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create AI assistant agent and AI user agent from obtained system messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"assistant_sys_msg, user_sys_msg = get_sys_msgs(assistant_role_name, user_role_name, specified_task)\n",
|
||||
"assistant_agent = CAMELAgent(assistant_sys_msg, ChatOpenAI(temperature=0.2))\n",
|
||||
"user_agent = CAMELAgent(user_sys_msg, ChatOpenAI(temperature=0.2))\n",
|
||||
"\n",
|
||||
"# Reset agents\n",
|
||||
"assistant_agent.reset()\n",
|
||||
"user_agent.reset()\n",
|
||||
"\n",
|
||||
"# Initialize chats \n",
|
||||
"assistant_msg = HumanMessage(\n",
|
||||
" content=(f\"{user_sys_msg.content}. \"\n",
|
||||
" \"Now start to give me introductions one by one. \"\n",
|
||||
" \"Only reply with Instruction and Input.\"))\n",
|
||||
"\n",
|
||||
"user_msg = HumanMessage(content=f\"{assistant_sys_msg.content}\")\n",
|
||||
"user_msg = assistant_agent.step(user_msg)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Start role-playing session to solve the task!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Original task prompt:\n",
|
||||
"Develop a trading bot for the stock market\n",
|
||||
"\n",
|
||||
"Specified task prompt:\n",
|
||||
"Develop a Python-based swing trading bot that scans market trends, monitors stocks, and generates trading signals to help a stock trader to place optimal buy and sell orders with defined stop losses and profit targets.\n",
|
||||
"\n",
|
||||
"AI User (Stock Trader):\n",
|
||||
"\n",
|
||||
"Instruction: Install the necessary Python libraries for data analysis and trading.\n",
|
||||
"Input: None\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI Assistant (Python Programmer):\n",
|
||||
"\n",
|
||||
"Solution: We can install the necessary Python libraries using pip, a package installer for Python. We can install pandas, numpy, matplotlib, and ta-lib for data analysis and trading. We can use the following command to install these libraries:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"pip install pandas numpy matplotlib ta-lib\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Next request.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI User (Stock Trader):\n",
|
||||
"\n",
|
||||
"Instruction: Import the necessary libraries in the Python script.\n",
|
||||
"Input: None\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI Assistant (Python Programmer):\n",
|
||||
"\n",
|
||||
"Solution: We can import the necessary libraries in the Python script using the import statement. We need to import pandas, numpy, matplotlib, and ta-lib for data analysis and trading. We can use the following code to import these libraries:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import talib as ta\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Next request.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI User (Stock Trader):\n",
|
||||
"\n",
|
||||
"Instruction: Load historical stock data into a pandas DataFrame.\n",
|
||||
"Input: The path to the CSV file containing the historical stock data.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI Assistant (Python Programmer):\n",
|
||||
"\n",
|
||||
"Solution: We can load historical stock data into a pandas DataFrame using the `read_csv()` function from pandas. We need to pass the path to the CSV file containing the historical stock data as an argument to this function. We can use the following code to load the historical stock data:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"df = pd.read_csv('path/to/csv/file.csv')\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This will load the historical stock data into a pandas DataFrame called `df`. Next request.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI User (Stock Trader):\n",
|
||||
"\n",
|
||||
"Instruction: Preprocess the historical stock data by setting the date column as the index and sorting the DataFrame in ascending order by date.\n",
|
||||
"Input: None.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI Assistant (Python Programmer):\n",
|
||||
"\n",
|
||||
"Solution: We can preprocess the historical stock data by setting the date column as the index and sorting the DataFrame in ascending order by date using the `set_index()` and `sort_index()` functions from pandas. We can use the following code to preprocess the historical stock data:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"df = df.set_index('date')\n",
|
||||
"df = df.sort_index(ascending=True)\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This will set the date column as the index and sort the DataFrame in ascending order by date. Next request.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI User (Stock Trader):\n",
|
||||
"\n",
|
||||
"Instruction: Calculate the short-term and long-term moving averages for the stock data using the `ta.SMA()` function from ta-lib.\n",
|
||||
"Input: The period for the short-term moving average and the period for the long-term moving average.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI Assistant (Python Programmer):\n",
|
||||
"\n",
|
||||
"Solution: We can calculate the short-term and long-term moving averages for the stock data using the `ta.SMA()` function from ta-lib. We need to pass the period for the short-term moving average and the period for the long-term moving average as arguments to this function. We can use the following code to calculate the short-term and long-term moving averages:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"short_ma = ta.SMA(df['close'], timeperiod=short_period)\n",
|
||||
"long_ma = ta.SMA(df['close'], timeperiod=long_period)\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This will calculate the short-term and long-term moving averages for the stock data and store them in the `short_ma` and `long_ma` variables, respectively. Next request.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI User (Stock Trader):\n",
|
||||
"\n",
|
||||
"Instruction: Create a new DataFrame that combines the historical stock data with the short-term and long-term moving averages.\n",
|
||||
"Input: None.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI Assistant (Python Programmer):\n",
|
||||
"\n",
|
||||
"Solution: We can create a new DataFrame that combines the historical stock data with the short-term and long-term moving averages using the `concat()` function from pandas. We need to pass the historical stock data, the short-term moving average, and the long-term moving average as arguments to this function. We can use the following code to create the new DataFrame:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"new_df = pd.concat([df, short_ma, long_ma], axis=1)\n",
|
||||
"new_df.columns = ['open', 'high', 'low', 'close', 'volume', 'short_ma', 'long_ma']\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This will create a new DataFrame called `new_df` that combines the historical stock data with the short-term and long-term moving averages. The columns of the new DataFrame are named 'open', 'high', 'low', 'close', 'volume', 'short_ma', and 'long_ma'. Next request.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI User (Stock Trader):\n",
|
||||
"\n",
|
||||
"Instruction: Create a new column in the DataFrame that indicates when to buy or sell the stock based on the crossover of the short-term and long-term moving averages.\n",
|
||||
"Input: None.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI Assistant (Python Programmer):\n",
|
||||
"\n",
|
||||
"Solution: We can create a new column in the DataFrame that indicates when to buy or sell the stock based on the crossover of the short-term and long-term moving averages. We can use the following code to create the new column:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"new_df['signal'] = np.where(new_df['short_ma'] > new_df['long_ma'], 1, -1)\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This will create a new column called 'signal' in the DataFrame that indicates when to buy or sell the stock based on the crossover of the short-term and long-term moving averages. If the short-term moving average is greater than the long-term moving average, the signal is 1 (buy), otherwise the signal is -1 (sell). Next request.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI User (Stock Trader):\n",
|
||||
"\n",
|
||||
"Instruction: Create a new column in the DataFrame that indicates the profit or loss for each trade based on the buy and sell signals and the defined stop loss and profit target.\n",
|
||||
"Input: The stop loss and profit target as percentages.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI Assistant (Python Programmer):\n",
|
||||
"\n",
|
||||
"Solution: We can create a new column in the DataFrame that indicates the profit or loss for each trade based on the buy and sell signals and the defined stop loss and profit target. We need to pass the stop loss and profit target as percentages as arguments to this function. We can use the following code to create the new column:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"stop_loss = stop_loss_percent / 100\n",
|
||||
"profit_target = profit_target_percent / 100\n",
|
||||
"\n",
|
||||
"new_df['pnl'] = 0.0\n",
|
||||
"buy_price = 0.0\n",
|
||||
"for i in range(1, len(new_df)):\n",
|
||||
" if new_df['signal'][i] == 1 and new_df['signal'][i-1] == -1:\n",
|
||||
" buy_price = new_df['close'][i]\n",
|
||||
" elif new_df['signal'][i] == -1 and new_df['signal'][i-1] == 1:\n",
|
||||
" sell_price = new_df['close'][i]\n",
|
||||
" if sell_price <= buy_price * (1 - stop_loss):\n",
|
||||
" new_df['pnl'][i] = -stop_loss\n",
|
||||
" elif sell_price >= buy_price * (1 + profit_target):\n",
|
||||
" new_df['pnl'][i] = profit_target\n",
|
||||
" else:\n",
|
||||
" new_df['pnl'][i] = (sell_price - buy_price) / buy_price\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This will create a new column called 'pnl' in the DataFrame that indicates the profit or loss for each trade based on the buy and sell signals and the defined stop loss and profit target. The stop loss and profit target are calculated based on the stop_loss_percent and profit_target_percent variables, respectively. The buy and sell prices are stored in the buy_price and sell_price variables, respectively. If the sell price is less than or equal to the stop loss, the profit or loss is set to -stop_loss. If the sell price is greater than or equal to the profit target, the profit or loss is set to profit_target. Otherwise, the profit or loss is calculated as (sell_price - buy_price) / buy_price. Next request.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI User (Stock Trader):\n",
|
||||
"\n",
|
||||
"Instruction: Calculate the total profit or loss for all trades.\n",
|
||||
"Input: None.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI Assistant (Python Programmer):\n",
|
||||
"\n",
|
||||
"Solution: We can calculate the total profit or loss for all trades by summing the values in the 'pnl' column of the DataFrame. We can use the following code to calculate the total profit or loss:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"total_pnl = new_df['pnl'].sum()\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This will calculate the total profit or loss for all trades and store it in the total_pnl variable. Next request.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI User (Stock Trader):\n",
|
||||
"\n",
|
||||
"Instruction: Visualize the stock data, short-term moving average, and long-term moving average using a line chart.\n",
|
||||
"Input: None.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI Assistant (Python Programmer):\n",
|
||||
"\n",
|
||||
"Solution: We can visualize the stock data, short-term moving average, and long-term moving average using a line chart using the `plot()` function from pandas. We can use the following code to visualize the data:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"plt.figure(figsize=(12,6))\n",
|
||||
"plt.plot(new_df.index, new_df['close'], label='Close')\n",
|
||||
"plt.plot(new_df.index, new_df['short_ma'], label='Short MA')\n",
|
||||
"plt.plot(new_df.index, new_df['long_ma'], label='Long MA')\n",
|
||||
"plt.xlabel('Date')\n",
|
||||
"plt.ylabel('Price')\n",
|
||||
"plt.title('Stock Data with Moving Averages')\n",
|
||||
"plt.legend()\n",
|
||||
"plt.show()\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This will create a line chart that visualizes the stock data, short-term moving average, and long-term moving average. The x-axis represents the date and the y-axis represents the price. The chart also includes a legend that labels each line. Next request.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI User (Stock Trader):\n",
|
||||
"\n",
|
||||
"Instruction: Visualize the buy and sell signals using a scatter plot.\n",
|
||||
"Input: None.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI Assistant (Python Programmer):\n",
|
||||
"\n",
|
||||
"Solution: We can visualize the buy and sell signals using a scatter plot using the `scatter()` function from matplotlib. We can use the following code to visualize the signals:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"buy_signals = new_df[new_df['signal'] == 1]\n",
|
||||
"sell_signals = new_df[new_df['signal'] == -1]\n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(12,6))\n",
|
||||
"plt.scatter(buy_signals.index, buy_signals['close'], label='Buy', marker='^', color='green')\n",
|
||||
"plt.scatter(sell_signals.index, sell_signals['close'], label='Sell', marker='v', color='red')\n",
|
||||
"plt.plot(new_df.index, new_df['close'], label='Close')\n",
|
||||
"plt.xlabel('Date')\n",
|
||||
"plt.ylabel('Price')\n",
|
||||
"plt.title('Buy and Sell Signals')\n",
|
||||
"plt.legend()\n",
|
||||
"plt.show()\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This will create a scatter plot that visualizes the buy and sell signals. The x-axis represents the date and the y-axis represents the price. The chart also includes a legend that labels each point. Next request.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI User (Stock Trader):\n",
|
||||
"\n",
|
||||
"Instruction: Print the total profit or loss for all trades.\n",
|
||||
"Input: None.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI Assistant (Python Programmer):\n",
|
||||
"\n",
|
||||
"Solution: We can print the total profit or loss for all trades using the `print()` function. We can use the following code to print the total profit or loss:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"print('Total Profit/Loss: {:.2%}'.format(total_pnl))\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This will print the total profit or loss for all trades in percentage format with two decimal places. Next request.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI User (Stock Trader):\n",
|
||||
"\n",
|
||||
"Instruction: Finalize the code by putting all the code snippets together in a single Python script.\n",
|
||||
"Input: The necessary inputs for each code snippet.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI Assistant (Python Programmer):\n",
|
||||
"\n",
|
||||
"Solution: Here is the complete Python script that puts all the code snippets together:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import talib as ta\n",
|
||||
"\n",
|
||||
"# Load historical stock data into a pandas DataFrame\n",
|
||||
"df = pd.read_csv('path/to/csv/file.csv')\n",
|
||||
"\n",
|
||||
"# Preprocess the historical stock data\n",
|
||||
"df = df.set_index('date')\n",
|
||||
"df = df.sort_index(ascending=True)\n",
|
||||
"\n",
|
||||
"# Calculate the short-term and long-term moving averages\n",
|
||||
"short_period = 10\n",
|
||||
"long_period = 30\n",
|
||||
"short_ma = ta.SMA(df['close'], timeperiod=short_period)\n",
|
||||
"long_ma = ta.SMA(df['close'], timeperiod=long_period)\n",
|
||||
"\n",
|
||||
"# Create a new DataFrame that combines the historical stock data with the short-term and long-term moving averages\n",
|
||||
"new_df = pd.concat([df, short_ma, long_ma], axis=1)\n",
|
||||
"new_df.columns = ['open', 'high', 'low', 'close', 'volume', 'short_ma', 'long_ma']\n",
|
||||
"\n",
|
||||
"# Create a new column in the DataFrame that indicates when to buy or sell the stock based on the crossover of the short-term and long-term moving averages\n",
|
||||
"new_df['signal'] = np.where(new_df['short_ma'] > new_df['long_ma'], 1, -1)\n",
|
||||
"\n",
|
||||
"# Create a new column in the DataFrame that indicates the profit or loss for each trade based on the buy and sell signals and the defined stop loss and profit target\n",
|
||||
"stop_loss_percent = 5\n",
|
||||
"profit_target_percent = 10\n",
|
||||
"stop_loss = stop_loss_percent / 100\n",
|
||||
"profit_target = profit_target_percent / 100\n",
|
||||
"new_df['pnl'] = 0.0\n",
|
||||
"buy_price = 0.0\n",
|
||||
"for i in range(1, len(new_df)):\n",
|
||||
" if new_df['signal'][i] == 1 and new_df['signal'][i-1] == -1:\n",
|
||||
" buy_price = new_df['close'][i]\n",
|
||||
" elif new_df['signal'][i] == -1 and new_df['signal'][i-1] == 1:\n",
|
||||
" sell_price = new_df['close'][i]\n",
|
||||
" if sell_price <= buy_price * (1 - stop_loss):\n",
|
||||
" new_df['pnl'][i] = -stop_loss\n",
|
||||
" elif sell_price >= buy_price * (1 + profit_target):\n",
|
||||
" new_df['pnl'][i] = profit_target\n",
|
||||
" else:\n",
|
||||
" new_df['pnl'][i] = (sell_price - buy_price) / buy_price\n",
|
||||
"\n",
|
||||
"# Calculate the total profit or loss for all trades\n",
|
||||
"total_pnl = new_df['pnl'].sum()\n",
|
||||
"\n",
|
||||
"# Visualize the stock data, short-term moving average, and long-term moving average using a line chart\n",
|
||||
"plt.figure(figsize=(12,6))\n",
|
||||
"plt.plot(new_df.index, new_df['close'], label='Close')\n",
|
||||
"plt.plot(new_df.index, new_df['short_ma'], label='Short MA')\n",
|
||||
"plt.plot(new_df.index, new_df['long_ma'], label='Long MA')\n",
|
||||
"plt.xlabel('Date')\n",
|
||||
"plt.ylabel('Price')\n",
|
||||
"plt.title('Stock Data with Moving Averages')\n",
|
||||
"plt.legend()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"# Visualize the buy and sell signals using a scatter plot\n",
|
||||
"buy_signals = new_df[new_df['signal'] == 1]\n",
|
||||
"sell_signals = new_df[new_df['signal'] == -1]\n",
|
||||
"plt.figure(figsize=(12,6))\n",
|
||||
"plt.scatter(buy_signals.index, buy_signals['close'], label='Buy', marker='^', color='green')\n",
|
||||
"plt.scatter(sell_signals.index, sell_signals['close'], label='Sell', marker='v', color='red')\n",
|
||||
"plt.plot(new_df.index, new_df['close'], label='Close')\n",
|
||||
"plt.xlabel('Date')\n",
|
||||
"plt.ylabel('Price')\n",
|
||||
"plt.title('Buy and Sell Signals')\n",
|
||||
"plt.legend()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"# Print the total profit or loss for all trades\n",
|
||||
"print('Total Profit/Loss: {:.2%}'.format(total_pnl))\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You need to replace the path/to/csv/file.csv with the actual path to the CSV file containing the historical stock data. You can also adjust the short_period, long_period, stop_loss_percent, and profit_target_percent variables to suit your needs.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI User (Stock Trader):\n",
|
||||
"\n",
|
||||
"<CAMEL_TASK_DONE>\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"AI Assistant (Python Programmer):\n",
|
||||
"\n",
|
||||
"Great! Let me know if you need any further assistance.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(f\"Original task prompt:\\n{task}\\n\")\n",
|
||||
"print(f\"Specified task prompt:\\n{specified_task}\\n\")\n",
|
||||
"\n",
|
||||
"chat_turn_limit, n = 30, 0\n",
|
||||
"while n < chat_turn_limit:\n",
|
||||
" n += 1\n",
|
||||
" user_ai_msg = user_agent.step(assistant_msg)\n",
|
||||
" user_msg = HumanMessage(content=user_ai_msg.content)\n",
|
||||
" print(f\"AI User ({user_role_name}):\\n\\n{user_msg.content}\\n\\n\")\n",
|
||||
" \n",
|
||||
" assistant_ai_msg = assistant_agent.step(user_msg)\n",
|
||||
" assistant_msg = HumanMessage(content=assistant_ai_msg.content)\n",
|
||||
" print(f\"AI Assistant ({assistant_role_name}):\\n\\n{assistant_msg.content}\\n\\n\")\n",
|
||||
" if \"<CAMEL_TASK_DONE>\" in user_msg.content:\n",
|
||||
" break"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "camel",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,538 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ba5f8741",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Custom Agent with PlugIn Retrieval\n",
|
||||
"\n",
|
||||
"This notebook combines two concepts in order to build a custom agent that can interact with AI Plugins:\n",
|
||||
"\n",
|
||||
"1. [Custom Agent with Retrieval](../../modules/agents/agents/custom_agent_with_plugin_retrieval.ipynb): This introduces the concept of retrieving many tools, which is useful when trying to work with arbitrarily many plugins.\n",
|
||||
"2. [Natural Language API Chains](../../modules/chains/examples/openapi.ipynb): This creates Natural Language wrappers around OpenAPI endpoints. This is useful because (1) plugins use OpenAPI endpoints under the hood, (2) wrapping them in an NLAChain allows the router agent to call it more easily.\n",
|
||||
"\n",
|
||||
"The novel idea introduced in this notebook is the idea of using retrieval to select not the tools explicitly, but the set of OpenAPI specs to use. We can then generate tools from those OpenAPI specs. The use case for this is when trying to get agents to use plugins. It may be more efficient to choose plugins first, then the endpoints, rather than the endpoints directly. This is because the plugins may contain more useful information for selection."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fea4812c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up environment\n",
|
||||
"\n",
|
||||
"Do necessary imports, etc."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "9af9734e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser\n",
|
||||
"from langchain.prompts import StringPromptTemplate\n",
|
||||
"from langchain import OpenAI, SerpAPIWrapper, LLMChain\n",
|
||||
"from typing import List, Union\n",
|
||||
"from langchain.schema import AgentAction, AgentFinish\n",
|
||||
"from langchain.agents.agent_toolkits import NLAToolkit\n",
|
||||
"from langchain.tools.plugin import AIPlugin\n",
|
||||
"import re"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2f91d8b4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup LLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a1a3b59c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6df0253f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up plugins\n",
|
||||
"\n",
|
||||
"Load and index plugins"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "becda2a1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"urls = [\n",
|
||||
" \"https://datasette.io/.well-known/ai-plugin.json\",\n",
|
||||
" \"https://api.speak.com/.well-known/ai-plugin.json\",\n",
|
||||
" \"https://www.wolframalpha.com/.well-known/ai-plugin.json\",\n",
|
||||
" \"https://www.zapier.com/.well-known/ai-plugin.json\",\n",
|
||||
" \"https://www.klarna.com/.well-known/ai-plugin.json\",\n",
|
||||
" \"https://www.joinmilo.com/.well-known/ai-plugin.json\",\n",
|
||||
" \"https://slack.com/.well-known/ai-plugin.json\",\n",
|
||||
" \"https://schooldigger.com/.well-known/ai-plugin.json\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"AI_PLUGINS = [AIPlugin.from_url(url) for url in urls]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "17362717",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tool Retriever\n",
|
||||
"\n",
|
||||
"We will use a vectorstore to create embeddings for each tool description. Then, for an incoming query we can create embeddings for that query and do a similarity search for relevant tools."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "77c4be4b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.vectorstores import FAISS\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.schema import Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9092a158",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.2 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load a Swagger 2.0 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"docs = [\n",
|
||||
" Document(page_content=plugin.description_for_model, \n",
|
||||
" metadata={\"plugin_name\": plugin.name_for_model}\n",
|
||||
" )\n",
|
||||
" for plugin in AI_PLUGINS\n",
|
||||
"]\n",
|
||||
"vector_store = FAISS.from_documents(docs, embeddings)\n",
|
||||
"toolkits_dict = {plugin.name_for_model: \n",
|
||||
" NLAToolkit.from_llm_and_ai_plugin(llm, plugin) \n",
|
||||
" for plugin in AI_PLUGINS}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "735a7566",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = vector_store.as_retriever()\n",
|
||||
"\n",
|
||||
"def get_tools(query):\n",
|
||||
" # Get documents, which contain the Plugins to use\n",
|
||||
" docs = retriever.get_relevant_documents(query)\n",
|
||||
" # Get the toolkits, one for each plugin\n",
|
||||
" tool_kits = [toolkits_dict[d.metadata[\"plugin_name\"]] for d in docs]\n",
|
||||
" # Get the tools: a separate NLAChain for each endpoint\n",
|
||||
" tools = []\n",
|
||||
" for tk in tool_kits:\n",
|
||||
" tools.extend(tk.nla_tools)\n",
|
||||
" return tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7699afd7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can now test this retriever to see if it seems to work."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "425f2886",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['Milo.askMilo',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.search_all_actions',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.preview_a_zap',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.get_configuration_link',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.list_exposed_actions',\n",
|
||||
" 'SchoolDigger_API_V2.0.Autocomplete_GetSchools',\n",
|
||||
" 'SchoolDigger_API_V2.0.Districts_GetAllDistricts2',\n",
|
||||
" 'SchoolDigger_API_V2.0.Districts_GetDistrict2',\n",
|
||||
" 'SchoolDigger_API_V2.0.Rankings_GetSchoolRank2',\n",
|
||||
" 'SchoolDigger_API_V2.0.Rankings_GetRank_District',\n",
|
||||
" 'SchoolDigger_API_V2.0.Schools_GetAllSchools20',\n",
|
||||
" 'SchoolDigger_API_V2.0.Schools_GetSchool20',\n",
|
||||
" 'Speak.translate',\n",
|
||||
" 'Speak.explainPhrase',\n",
|
||||
" 'Speak.explainTask']"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = get_tools(\"What could I do today with my kiddo\")\n",
|
||||
"[t.name for t in tools]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "3aa88768",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['Open_AI_Klarna_product_Api.productsUsingGET',\n",
|
||||
" 'Milo.askMilo',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.search_all_actions',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.preview_a_zap',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.get_configuration_link',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.list_exposed_actions',\n",
|
||||
" 'SchoolDigger_API_V2.0.Autocomplete_GetSchools',\n",
|
||||
" 'SchoolDigger_API_V2.0.Districts_GetAllDistricts2',\n",
|
||||
" 'SchoolDigger_API_V2.0.Districts_GetDistrict2',\n",
|
||||
" 'SchoolDigger_API_V2.0.Rankings_GetSchoolRank2',\n",
|
||||
" 'SchoolDigger_API_V2.0.Rankings_GetRank_District',\n",
|
||||
" 'SchoolDigger_API_V2.0.Schools_GetAllSchools20',\n",
|
||||
" 'SchoolDigger_API_V2.0.Schools_GetSchool20']"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = get_tools(\"what shirts can i buy?\")\n",
|
||||
"[t.name for t in tools]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2e7a075c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prompt Template\n",
|
||||
"\n",
|
||||
"The prompt template is pretty standard, because we're not actually changing that much logic in the actual prompt template, but rather we are just changing how retrieval is done."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "339b1bb8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set up the base template\n",
|
||||
"template = \"\"\"Answer the following questions as best you can, but speaking as a pirate might speak. You have access to the following tools:\n",
|
||||
"\n",
|
||||
"{tools}\n",
|
||||
"\n",
|
||||
"Use the following format:\n",
|
||||
"\n",
|
||||
"Question: the input question you must answer\n",
|
||||
"Thought: you should always think about what to do\n",
|
||||
"Action: the action to take, should be one of [{tool_names}]\n",
|
||||
"Action Input: the input to the action\n",
|
||||
"Observation: the result of the action\n",
|
||||
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
|
||||
"Thought: I now know the final answer\n",
|
||||
"Final Answer: the final answer to the original input question\n",
|
||||
"\n",
|
||||
"Begin! Remember to speak as a pirate when giving your final answer. Use lots of \"Arg\"s\n",
|
||||
"\n",
|
||||
"Question: {input}\n",
|
||||
"{agent_scratchpad}\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1583acdc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The custom prompt template now has the concept of a tools_getter, which we call on the input to select the tools to use"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "fd969d31",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Callable\n",
|
||||
"# Set up a prompt template\n",
|
||||
"class CustomPromptTemplate(StringPromptTemplate):\n",
|
||||
" # The template to use\n",
|
||||
" template: str\n",
|
||||
" ############## NEW ######################\n",
|
||||
" # The list of tools available\n",
|
||||
" tools_getter: Callable\n",
|
||||
" \n",
|
||||
" def format(self, **kwargs) -> str:\n",
|
||||
" # Get the intermediate steps (AgentAction, Observation tuples)\n",
|
||||
" # Format them in a particular way\n",
|
||||
" intermediate_steps = kwargs.pop(\"intermediate_steps\")\n",
|
||||
" thoughts = \"\"\n",
|
||||
" for action, observation in intermediate_steps:\n",
|
||||
" thoughts += action.log\n",
|
||||
" thoughts += f\"\\nObservation: {observation}\\nThought: \"\n",
|
||||
" # Set the agent_scratchpad variable to that value\n",
|
||||
" kwargs[\"agent_scratchpad\"] = thoughts\n",
|
||||
" ############## NEW ######################\n",
|
||||
" tools = self.tools_getter(kwargs[\"input\"])\n",
|
||||
" # Create a tools variable from the list of tools provided\n",
|
||||
" kwargs[\"tools\"] = \"\\n\".join([f\"{tool.name}: {tool.description}\" for tool in tools])\n",
|
||||
" # Create a list of tool names for the tools provided\n",
|
||||
" kwargs[\"tool_names\"] = \", \".join([tool.name for tool in tools])\n",
|
||||
" return self.template.format(**kwargs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "798ef9fb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt = CustomPromptTemplate(\n",
|
||||
" template=template,\n",
|
||||
" tools_getter=get_tools,\n",
|
||||
" # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically\n",
|
||||
" # This includes the `intermediate_steps` variable because that is needed\n",
|
||||
" input_variables=[\"input\", \"intermediate_steps\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ef3a1af3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Output Parser\n",
|
||||
"\n",
|
||||
"The output parser is unchanged from the previous notebook, since we are not changing anything about the output format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "7c6fe0d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class CustomOutputParser(AgentOutputParser):\n",
|
||||
" \n",
|
||||
" def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:\n",
|
||||
" # Check if agent should finish\n",
|
||||
" if \"Final Answer:\" in llm_output:\n",
|
||||
" return AgentFinish(\n",
|
||||
" # Return values is generally always a dictionary with a single `output` key\n",
|
||||
" # It is not recommended to try anything else at the moment :)\n",
|
||||
" return_values={\"output\": llm_output.split(\"Final Answer:\")[-1].strip()},\n",
|
||||
" log=llm_output,\n",
|
||||
" )\n",
|
||||
" # Parse out the action and action input\n",
|
||||
" regex = r\"Action\\s*\\d*\\s*:(.*?)\\nAction\\s*\\d*\\s*Input\\s*\\d*\\s*:[\\s]*(.*)\"\n",
|
||||
" match = re.search(regex, llm_output, re.DOTALL)\n",
|
||||
" if not match:\n",
|
||||
" raise ValueError(f\"Could not parse LLM output: `{llm_output}`\")\n",
|
||||
" action = match.group(1).strip()\n",
|
||||
" action_input = match.group(2)\n",
|
||||
" # Return the action and action input\n",
|
||||
" return AgentAction(tool=action, tool_input=action_input.strip(\" \").strip('\"'), log=llm_output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "d278706a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"output_parser = CustomOutputParser()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "170587b1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up LLM, stop sequence, and the agent\n",
|
||||
"\n",
|
||||
"Also the same as the previous notebook"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "f9d4c374",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "9b1cc2a2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# LLM chain consisting of the LLM and a prompt\n",
|
||||
"llm_chain = LLMChain(llm=llm, prompt=prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "e4f5092f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tool_names = [tool.name for tool in tools]\n",
|
||||
"agent = LLMSingleActionAgent(\n",
|
||||
" llm_chain=llm_chain, \n",
|
||||
" output_parser=output_parser,\n",
|
||||
" stop=[\"\\nObservation:\"], \n",
|
||||
" allowed_tools=tool_names\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "aa8a5326",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use the Agent\n",
|
||||
"\n",
|
||||
"Now we can use it!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "490604e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "653b1617",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find a product API\n",
|
||||
"Action: Open_AI_Klarna_product_Api.productsUsingGET\n",
|
||||
"Action Input: shirts\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3mI found 10 shirts from the API response. They range in price from $9.99 to $450.00 and come in a variety of materials, colors, and patterns.\u001b[0m\u001b[32;1m\u001b[1;3m I now know what shirts I can buy\n",
|
||||
"Final Answer: Arg, I found 10 shirts from the API response. They range in price from $9.99 to $450.00 and come in a variety of materials, colors, and patterns.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Arg, I found 10 shirts from the API response. They range in price from $9.99 to $450.00 and come in a variety of materials, colors, and patterns.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"what shirts can i buy?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2481ee76",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "18784188d7ecd866c0586ac068b02361a6896dc3a29b64f5cc957f09c590acef"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
# Code Understanding
|
||||
|
||||
Overview
|
||||
|
||||
LangChain is a useful tool designed to parse GitHub code repositories. By leveraging VectorStores, Conversational RetrieverChain, and GPT-4, it can answer questions in the context of an entire GitHub repository or generate new code. This documentation page outlines the essential components of the system and guides using LangChain for better code comprehension, contextual question answering, and code generation in GitHub repositories.
|
||||
|
||||
## Conversational Retriever Chain
|
||||
|
||||
Conversational RetrieverChain is a retrieval-focused system that interacts with the data stored in a VectorStore. Utilizing advanced techniques, like context-aware filtering and ranking, it retrieves the most relevant code snippets and information for a given user query. Conversational RetrieverChain is engineered to deliver high-quality, pertinent results while considering conversation history and context.
|
||||
|
||||
LangChain Workflow for Code Understanding and Generation
|
||||
|
||||
1. Index the code base: Clone the target repository, load all files within, chunk the files, and execute the indexing process. Optionally, you can skip this step and use an already indexed dataset.
|
||||
|
||||
2. Embedding and Code Store: Code snippets are embedded using a code-aware embedding model and stored in a VectorStore.
|
||||
Query Understanding: GPT-4 processes user queries, grasping the context and extracting relevant details.
|
||||
|
||||
3. Construct the Retriever: Conversational RetrieverChain searches the VectorStore to identify the most relevant code snippets for a given query.
|
||||
|
||||
4. Build the Conversational Chain: Customize the retriever settings and define any user-defined filters as needed.
|
||||
|
||||
5. Ask questions: Define a list of questions to ask about the codebase, and then use the ConversationalRetrievalChain to generate context-aware answers. The LLM (GPT-4) generates comprehensive, context-aware answers based on retrieved code snippets and conversation history.
|
||||
|
||||
The full tutorial is available below.
|
||||
- [Twitter the-algorithm codebase analysis with Deep Lake](code/twitter-the-algorithm-analysis-deeplake.ipynb): A notebook walking through how to parse github source code and run queries conversation.
|
||||
- [LangChain codebase analysis with Deep Lake](code/code-analysis-deeplake.ipynb): A notebook walking through how to analyze and do question answering over THIS code base.
|
||||
@@ -1,644 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Use LangChain, GPT and Deep Lake to work with code base\n",
|
||||
"In this tutorial, we are going to use Langchain + Deep Lake with GPT to analyze the code base of the LangChain itself. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Design"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"1. Prepare data:\n",
|
||||
" 1. Upload all python project files using the `langchain.document_loaders.TextLoader`. We will call these files the **documents**.\n",
|
||||
" 2. Split all documents to chunks using the `langchain.text_splitter.CharacterTextSplitter`.\n",
|
||||
" 3. Embed chunks and upload them into the DeepLake using `langchain.embeddings.openai.OpenAIEmbeddings` and `langchain.vectorstores.DeepLake`\n",
|
||||
"2. Question-Answering:\n",
|
||||
" 1. Build a chain from `langchain.chat_models.ChatOpenAI` and `langchain.chains.ConversationalRetrievalChain`\n",
|
||||
" 2. Prepare questions.\n",
|
||||
" 3. Get answers running the chain.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Implementation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"### Integration preparations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We need to set up keys for external services and install necessary python libraries."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!python3 -m pip install --upgrade langchain deeplake openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set up OpenAI embeddings, Deep Lake multi-modal vector store api and authenticate. \n",
|
||||
"\n",
|
||||
"For full documentation of Deep Lake please follow https://docs.activeloop.ai/ and API reference https://docs.deeplake.ai/en/latest/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"os.environ['OPENAI_API_KEY'] = getpass()\n",
|
||||
"# Please manually enter OpenAI Key"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Authenticate into Deep Lake if you want to create your own dataset and publish it. You can get an API key from the platform at [app.activeloop.ai](https://app.activeloop.ai)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"os.environ['ACTIVELOOP_TOKEN'] = getpass.getpass('Activeloop Token:')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare data "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load all repository files. Here we assume this notebook is downloaded as the part of the langchain fork and we work with the python files of the `langchain` repo.\n",
|
||||
"\n",
|
||||
"If you want to use files from different repo, change `root_dir` to the root dir of your repo."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1147\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"root_dir = '../../../..'\n",
|
||||
"\n",
|
||||
"docs = []\n",
|
||||
"for dirpath, dirnames, filenames in os.walk(root_dir):\n",
|
||||
" for file in filenames:\n",
|
||||
" if file.endswith('.py') and '/.venv/' not in dirpath:\n",
|
||||
" try: \n",
|
||||
" loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')\n",
|
||||
" docs.extend(loader.load_and_split())\n",
|
||||
" except Exception as e: \n",
|
||||
" pass\n",
|
||||
"print(f'{len(docs)}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then, chunk the files"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Created a chunk of size 1620, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1213, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1263, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1448, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1120, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1148, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1826, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1260, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1195, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2147, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1410, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1269, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1030, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1046, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1024, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1026, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1285, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1370, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1031, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1999, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1029, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1120, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1033, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1143, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1416, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2482, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1890, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1418, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1848, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1069, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2369, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1045, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1501, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1208, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1950, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1283, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1414, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1304, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1224, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1060, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2461, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1099, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1178, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1449, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1345, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 3359, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2248, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1589, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2104, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1505, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1387, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1215, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1240, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1635, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1075, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2180, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1791, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1555, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1082, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1225, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1287, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1085, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1117, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1966, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1150, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1285, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1150, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1585, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1208, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1267, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1542, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1183, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2424, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1017, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1304, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1379, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1324, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1205, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1056, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1195, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 3608, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1058, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1075, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1217, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1109, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1440, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1046, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1220, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1403, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1241, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1427, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1049, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1580, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1565, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1131, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1425, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1054, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1027, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2559, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1028, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1382, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1888, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1475, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1652, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1891, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1899, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1021, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1085, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1854, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1672, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2537, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1251, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1734, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1642, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1376, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1253, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1642, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1419, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1438, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1427, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1684, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1760, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1157, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2504, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1082, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2268, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1784, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1311, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2972, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1144, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1825, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1508, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2901, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1715, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1062, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1206, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1102, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1184, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1002, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1065, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1871, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1754, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2413, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1771, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2054, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2000, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2061, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1066, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1419, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1368, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1008, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1227, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1745, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2296, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1083, which is longer than the specified 1000\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"3477\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_documents(docs)\n",
|
||||
"print(f\"{len(texts)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then embed chunks and upload them to the DeepLake.\n",
|
||||
"\n",
|
||||
"This can take several minutes. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', document_model_name='text-embedding-ada-002', query_model_name='text-embedding-ada-002', embedding_ctx_length=8191, openai_api_key=None, openai_organization=None, allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=6)"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.vectorstores import DeepLake\n",
|
||||
"\n",
|
||||
"db = DeepLake.from_documents(texts, embeddings, dataset_path=f\"hub://{DEEPLAKE_ACCOUNT_NAME}/langchain-code\")\n",
|
||||
"db"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Question Answering\n",
|
||||
"First load the dataset, construct the retriever, then construct the Conversational Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/user_name/langchain-code\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"hub://user_name/langchain-code loaded successfully.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Deep Lake Dataset in hub://user_name/langchain-code already exists, loading from the storage\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://user_name/langchain-code', read_only=True, tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (3477, 1536) float32 None \n",
|
||||
" ids text (3477, 1) str None \n",
|
||||
" metadata json (3477, 1) str None \n",
|
||||
" text text (3477, 1) str None \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db = DeepLake(dataset_path=f\"hub://{DEEPLAKE_ACCOUNT_NAME}/langchain-code\", read_only=True, embedding_function=embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = db.as_retriever()\n",
|
||||
"retriever.search_kwargs['distance_metric'] = 'cos'\n",
|
||||
"retriever.search_kwargs['fetch_k'] = 20\n",
|
||||
"retriever.search_kwargs['maximal_marginal_relevance'] = True\n",
|
||||
"retriever.search_kwargs['k'] = 20"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also specify user defined functions using [Deep Lake filters](https://docs.deeplake.ai/en/latest/deeplake.core.dataset.html#deeplake.core.dataset.Dataset.filter)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def filter(x):\n",
|
||||
" # filter based on source code\n",
|
||||
" if 'something' in x['text'].data()['value']:\n",
|
||||
" return False\n",
|
||||
" \n",
|
||||
" # filter based on path e.g. extension\n",
|
||||
" metadata = x['metadata'].data()['value']\n",
|
||||
" return 'only_this' in metadata['source'] or 'also_that' in metadata['source']\n",
|
||||
"\n",
|
||||
"### turn on below for custom filtering\n",
|
||||
"# retriever.search_kwargs['filter'] = filter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(model='gpt-3.5-turbo') # 'ada' 'gpt-3.5-turbo' 'gpt-4',\n",
|
||||
"qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"questions = [\n",
|
||||
" \"What is the class hierarchy?\",\n",
|
||||
" # \"What classes are derived from the Chain class?\",\n",
|
||||
" # \"What classes and functions in the ./langchain/utilities/ forlder are not covered by unit tests?\",\n",
|
||||
" # \"What one improvement do you propose in code in relation to the class herarchy for the Chain class?\",\n",
|
||||
"] \n",
|
||||
"chat_history = []\n",
|
||||
"\n",
|
||||
"for question in questions: \n",
|
||||
" result = qa({\"question\": question, \"chat_history\": chat_history})\n",
|
||||
" chat_history.append((question, result['answer']))\n",
|
||||
" print(f\"-> **Question**: {question} \\n\")\n",
|
||||
" print(f\"**Answer**: {result['answer']} \\n\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"-> **Question**: What is the class hierarchy? \n",
|
||||
"\n",
|
||||
"**Answer**: There are several class hierarchies in the provided code, so I'll list a few:\n",
|
||||
"\n",
|
||||
"1. `BaseModel` -> `ConstitutionalPrinciple`: `ConstitutionalPrinciple` is a subclass of `BaseModel`.\n",
|
||||
"2. `BasePromptTemplate` -> `StringPromptTemplate`, `AIMessagePromptTemplate`, `BaseChatPromptTemplate`, `ChatMessagePromptTemplate`, `ChatPromptTemplate`, `HumanMessagePromptTemplate`, `MessagesPlaceholder`, `SystemMessagePromptTemplate`, `FewShotPromptTemplate`, `FewShotPromptWithTemplates`, `Prompt`, `PromptTemplate`: All of these classes are subclasses of `BasePromptTemplate`.\n",
|
||||
"3. `APIChain`, `Chain`, `MapReduceDocumentsChain`, `MapRerankDocumentsChain`, `RefineDocumentsChain`, `StuffDocumentsChain`, `HypotheticalDocumentEmbedder`, `LLMChain`, `LLMBashChain`, `LLMCheckerChain`, `LLMMathChain`, `LLMRequestsChain`, `PALChain`, `QAWithSourcesChain`, `VectorDBQAWithSourcesChain`, `VectorDBQA`, `SQLDatabaseChain`: All of these classes are subclasses of `Chain`.\n",
|
||||
"4. `BaseLoader`: `BaseLoader` is a subclass of `ABC`.\n",
|
||||
"5. `BaseTracer` -> `ChainRun`, `LLMRun`, `SharedTracer`, `ToolRun`, `Tracer`, `TracerException`, `TracerSession`: All of these classes are subclasses of `BaseTracer`.\n",
|
||||
"6. `OpenAIEmbeddings`, `HuggingFaceEmbeddings`, `CohereEmbeddings`, `JinaEmbeddings`, `LlamaCppEmbeddings`, `HuggingFaceHubEmbeddings`, `TensorflowHubEmbeddings`, `SagemakerEndpointEmbeddings`, `HuggingFaceInstructEmbeddings`, `SelfHostedEmbeddings`, `SelfHostedHuggingFaceEmbeddings`, `SelfHostedHuggingFaceInstructEmbeddings`, `FakeEmbeddings`, `AlephAlphaAsymmetricSemanticEmbedding`, `AlephAlphaSymmetricSemanticEmbedding`: All of these classes are subclasses of `BaseLLM`. \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"-> **Question**: What classes are derived from the Chain class? \n",
|
||||
"\n",
|
||||
"**Answer**: There are multiple classes that are derived from the Chain class. Some of them are:\n",
|
||||
"- APIChain\n",
|
||||
"- AnalyzeDocumentChain\n",
|
||||
"- ChatVectorDBChain\n",
|
||||
"- CombineDocumentsChain\n",
|
||||
"- ConstitutionalChain\n",
|
||||
"- ConversationChain\n",
|
||||
"- GraphQAChain\n",
|
||||
"- HypotheticalDocumentEmbedder\n",
|
||||
"- LLMChain\n",
|
||||
"- LLMCheckerChain\n",
|
||||
"- LLMRequestsChain\n",
|
||||
"- LLMSummarizationCheckerChain\n",
|
||||
"- MapReduceChain\n",
|
||||
"- OpenAPIEndpointChain\n",
|
||||
"- PALChain\n",
|
||||
"- QAWithSourcesChain\n",
|
||||
"- RetrievalQA\n",
|
||||
"- RetrievalQAWithSourcesChain\n",
|
||||
"- SequentialChain\n",
|
||||
"- SQLDatabaseChain\n",
|
||||
"- TransformChain\n",
|
||||
"- VectorDBQA\n",
|
||||
"- VectorDBQAWithSourcesChain\n",
|
||||
"\n",
|
||||
"There might be more classes that are derived from the Chain class as it is possible to create custom classes that extend the Chain class.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"-> **Question**: What classes and functions in the ./langchain/utilities/ forlder are not covered by unit tests? \n",
|
||||
"\n",
|
||||
"**Answer**: All classes and functions in the `./langchain/utilities/` folder seem to have unit tests written for them. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,446 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Analysis of Twitter the-algorithm source code with LangChain, GPT4 and Deep Lake\n",
|
||||
"In this tutorial, we are going to use Langchain + Deep Lake with GPT4 to analyze the code base of the twitter algorithm. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!python3 -m pip install --upgrade langchain deeplake openai tiktoken"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Define OpenAI embeddings, Deep Lake multi-modal vector store api and authenticate. For full documentation of Deep Lake please follow [docs](https://docs.activeloop.ai/) and [API reference](https://docs.deeplake.ai/en/latest/).\n",
|
||||
"\n",
|
||||
"Authenticate into Deep Lake if you want to create your own dataset and publish it. You can get an API key from the [platform](https://app.activeloop.ai)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores import DeepLake\n",
|
||||
"\n",
|
||||
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')\n",
|
||||
"os.environ['ACTIVELOOP_TOKEN'] = getpass.getpass('Activeloop Token:')\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 1. Index the code base (optional)\n",
|
||||
"You can directly skip this part and directly jump into using already indexed dataset. To begin with, first we will clone the repository, then parse and chunk the code base and use OpenAI indexing."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!git clone https://github.com/twitter/the-algorithm # replace any repository of your choice "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load all files inside the repository"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"root_dir = './the-algorithm'\n",
|
||||
"docs = []\n",
|
||||
"for dirpath, dirnames, filenames in os.walk(root_dir):\n",
|
||||
" for file in filenames:\n",
|
||||
" try: \n",
|
||||
" loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')\n",
|
||||
" docs.extend(loader.load_and_split())\n",
|
||||
" except Exception as e: \n",
|
||||
" pass"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then, chunk the files"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_documents(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Execute the indexing. This will take about ~4 mins to compute embeddings and upload to Activeloop. You can then publish the dataset to be public."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = DeepLake.from_documents(texts, embeddings, dataset_path=\"hub://davitbun/twitter-algorithm\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2. Question Answering on Twitter algorithm codebase\n",
|
||||
"First load the dataset, construct the retriever, then construct the Conversational Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/twitter-algorithm\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"hub://davitbun/twitter-algorithm loaded successfully.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Deep Lake Dataset in hub://davitbun/twitter-algorithm already exists, loading from the storage\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://davitbun/twitter-algorithm', read_only=True, tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (23152, 1536) float32 None \n",
|
||||
" ids text (23152, 1) str None \n",
|
||||
" metadata json (23152, 1) str None \n",
|
||||
" text text (23152, 1) str None \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db = DeepLake(dataset_path=\"hub://davitbun/twitter-algorithm\", read_only=True, embedding_function=embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"retriever = db.as_retriever()\n",
|
||||
"retriever.search_kwargs['distance_metric'] = 'cos'\n",
|
||||
"retriever.search_kwargs['fetch_k'] = 100\n",
|
||||
"retriever.search_kwargs['maximal_marginal_relevance'] = True\n",
|
||||
"retriever.search_kwargs['k'] = 20"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also specify user defined functions using [Deep Lake filters](https://docs.deeplake.ai/en/latest/deeplake.core.dataset.html#deeplake.core.dataset.Dataset.filter)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def filter(x):\n",
|
||||
" # filter based on source code\n",
|
||||
" if 'com.google' in x['text'].data()['value']:\n",
|
||||
" return False\n",
|
||||
" \n",
|
||||
" # filter based on path e.g. extension\n",
|
||||
" metadata = x['metadata'].data()['value']\n",
|
||||
" return 'scala' in metadata['source'] or 'py' in metadata['source']\n",
|
||||
"\n",
|
||||
"### turn on below for custom filtering\n",
|
||||
"# retriever.search_kwargs['filter'] = filter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(model='gpt-4') # 'gpt-3.5-turbo',\n",
|
||||
"qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"questions = [\n",
|
||||
" \"What does favCountParams do?\",\n",
|
||||
" \"is it Likes + Bookmarks, or not clear from the code?\",\n",
|
||||
" \"What are the major negative modifiers that lower your linear ranking parameters?\", \n",
|
||||
" \"How do you get assigned to SimClusters?\",\n",
|
||||
" \"What is needed to migrate from one SimClusters to another SimClusters?\",\n",
|
||||
" \"How much do I get boosted within my cluster?\", \n",
|
||||
" \"How does Heavy ranker work. what are it’s main inputs?\",\n",
|
||||
" \"How can one influence Heavy ranker?\",\n",
|
||||
" \"why threads and long tweets do so well on the platform?\",\n",
|
||||
" \"Are thread and long tweet creators building a following that reacts to only threads?\",\n",
|
||||
" \"Do you need to follow different strategies to get most followers vs to get most likes and bookmarks per tweet?\",\n",
|
||||
" \"Content meta data and how it impacts virality (e.g. ALT in images).\",\n",
|
||||
" \"What are some unexpected fingerprints for spam factors?\",\n",
|
||||
" \"Is there any difference between company verified checkmarks and blue verified individual checkmarks?\",\n",
|
||||
"] \n",
|
||||
"chat_history = []\n",
|
||||
"\n",
|
||||
"for question in questions: \n",
|
||||
" result = qa({\"question\": question, \"chat_history\": chat_history})\n",
|
||||
" chat_history.append((question, result['answer']))\n",
|
||||
" print(f\"-> **Question**: {question} \\n\")\n",
|
||||
" print(f\"**Answer**: {result['answer']} \\n\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"-> **Question**: What does favCountParams do? \n",
|
||||
"\n",
|
||||
"**Answer**: `favCountParams` is an optional ThriftLinearFeatureRankingParams instance that represents the parameters related to the \"favorite count\" feature in the ranking process. It is used to control the weight of the favorite count feature while ranking tweets. The favorite count is the number of times a tweet has been marked as a favorite by users, and it is considered an important signal in the ranking of tweets. By using `favCountParams`, the system can adjust the importance of the favorite count while calculating the final ranking score of a tweet. \n",
|
||||
"\n",
|
||||
"-> **Question**: is it Likes + Bookmarks, or not clear from the code?\n",
|
||||
"\n",
|
||||
"**Answer**: From the provided code, it is not clear if the favorite count metric is determined by the sum of likes and bookmarks. The favorite count is mentioned in the code, but there is no explicit reference to how it is calculated in terms of likes and bookmarks. \n",
|
||||
"\n",
|
||||
"-> **Question**: What are the major negative modifiers that lower your linear ranking parameters?\n",
|
||||
"\n",
|
||||
"**Answer**: In the given code, major negative modifiers that lower the linear ranking parameters are:\n",
|
||||
"\n",
|
||||
"1. `scoringData.querySpecificScore`: This score adjustment is based on the query-specific information. If its value is negative, it will lower the linear ranking parameters.\n",
|
||||
"\n",
|
||||
"2. `scoringData.authorSpecificScore`: This score adjustment is based on the author-specific information. If its value is negative, it will also lower the linear ranking parameters.\n",
|
||||
"\n",
|
||||
"Please note that I cannot provide more information on the exact calculations of these negative modifiers, as the code for their determination is not provided. \n",
|
||||
"\n",
|
||||
"-> **Question**: How do you get assigned to SimClusters?\n",
|
||||
"\n",
|
||||
"**Answer**: The assignment to SimClusters occurs through a Metropolis-Hastings sampling-based community detection algorithm that is run on the Producer-Producer similarity graph. This graph is created by computing the cosine similarity scores between the users who follow each producer. The algorithm identifies communities or clusters of Producers with similar followers, and takes a parameter *k* for specifying the number of communities to be detected.\n",
|
||||
"\n",
|
||||
"After the community detection, different users and content are represented as sparse, interpretable vectors within these identified communities (SimClusters). The resulting SimClusters embeddings can be used for various recommendation tasks. \n",
|
||||
"\n",
|
||||
"-> **Question**: What is needed to migrate from one SimClusters to another SimClusters?\n",
|
||||
"\n",
|
||||
"**Answer**: To migrate from one SimClusters representation to another, you can follow these general steps:\n",
|
||||
"\n",
|
||||
"1. **Prepare the new representation**: Create the new SimClusters representation using any necessary updates or changes in the clustering algorithm, similarity measures, or other model parameters. Ensure that this new representation is properly stored and indexed as needed.\n",
|
||||
"\n",
|
||||
"2. **Update the relevant code and configurations**: Modify the relevant code and configuration files to reference the new SimClusters representation. This may involve updating paths or dataset names to point to the new representation, as well as changing code to use the new clustering method or similarity functions if applicable.\n",
|
||||
"\n",
|
||||
"3. **Test the new representation**: Before deploying the changes to production, thoroughly test the new SimClusters representation to ensure its effectiveness and stability. This may involve running offline jobs like candidate generation and label candidates, validating the output, as well as testing the new representation in the evaluation environment using evaluation tools like TweetSimilarityEvaluationAdhocApp.\n",
|
||||
"\n",
|
||||
"4. **Deploy the changes**: Once the new representation has been tested and validated, deploy the changes to production. This may involve creating a zip file, uploading it to the packer, and then scheduling it with Aurora. Be sure to monitor the system to ensure a smooth transition between representations and verify that the new representation is being used in recommendations as expected.\n",
|
||||
"\n",
|
||||
"5. **Monitor and assess the new representation**: After the new representation has been deployed, continue to monitor its performance and impact on recommendations. Take note of any improvements or issues that arise and be prepared to iterate on the new representation if needed. Always ensure that the results and performance metrics align with the system's goals and objectives. \n",
|
||||
"\n",
|
||||
"-> **Question**: How much do I get boosted within my cluster?\n",
|
||||
"\n",
|
||||
"**Answer**: It's not possible to determine the exact amount your content is boosted within your cluster in the SimClusters representation without specific data about your content and its engagement metrics. However, a combination of factors, such as the favorite score and follow score, alongside other engagement signals and SimCluster calculations, influence the boosting of content. \n",
|
||||
"\n",
|
||||
"-> **Question**: How does Heavy ranker work. what are it’s main inputs?\n",
|
||||
"\n",
|
||||
"**Answer**: The Heavy Ranker is a machine learning model that plays a crucial role in ranking and scoring candidates within the recommendation algorithm. Its primary purpose is to predict the likelihood of a user engaging with a tweet or connecting with another user on the platform.\n",
|
||||
"\n",
|
||||
"Main inputs to the Heavy Ranker consist of:\n",
|
||||
"\n",
|
||||
"1. Static Features: These are features that can be computed directly from a tweet at the time it's created, such as whether it has a URL, has cards, has quotes, etc. These features are produced by the Index Ingester as the tweets are generated and stored in the index.\n",
|
||||
"\n",
|
||||
"2. Real-time Features: These per-tweet features can change after the tweet has been indexed. They mostly consist of social engagements like retweet count, favorite count, reply count, and some spam signals that are computed with later activities. The Signal Ingester, which is part of a Heron topology, processes multiple event streams to collect and compute these real-time features.\n",
|
||||
"\n",
|
||||
"3. User Table Features: These per-user features are obtained from the User Table Updater that processes a stream written by the user service. This input is used to store sparse real-time user information, which is later propagated to the tweet being scored by looking up the author of the tweet.\n",
|
||||
"\n",
|
||||
"4. Search Context Features: These features represent the context of the current searcher, like their UI language, their content consumption, and the current time (implied). They are combined with Tweet Data to compute some of the features used in scoring.\n",
|
||||
"\n",
|
||||
"These inputs are then processed by the Heavy Ranker to score and rank candidates based on their relevance and likelihood of engagement by the user. \n",
|
||||
"\n",
|
||||
"-> **Question**: How can one influence Heavy ranker?\n",
|
||||
"\n",
|
||||
"**Answer**: To influence the Heavy Ranker's output or ranking of content, consider the following actions:\n",
|
||||
"\n",
|
||||
"1. Improve content quality: Create high-quality and engaging content that is relevant, informative, and valuable to users. High-quality content is more likely to receive positive user engagement, which the Heavy Ranker considers when ranking content.\n",
|
||||
"\n",
|
||||
"2. Increase user engagement: Encourage users to interact with content through likes, retweets, replies, and comments. Higher engagement levels can lead to better ranking in the Heavy Ranker's output.\n",
|
||||
"\n",
|
||||
"3. Optimize your user profile: A user's reputation, based on factors such as their follower count and follower-to-following ratio, may impact the ranking of their content. Maintain a good reputation by following relevant users, keeping a reasonable follower-to-following ratio and engaging with your followers.\n",
|
||||
"\n",
|
||||
"4. Enhance content discoverability: Use relevant keywords, hashtags, and mentions in your tweets, making it easier for users to find and engage with your content. This increased discoverability may help improve the ranking of your content by the Heavy Ranker.\n",
|
||||
"\n",
|
||||
"5. Leverage multimedia content: Experiment with different content formats, such as videos, images, and GIFs, which may capture users' attention and increase engagement, resulting in better ranking by the Heavy Ranker.\n",
|
||||
"\n",
|
||||
"6. User feedback: Monitor and respond to feedback for your content. Positive feedback may improve your ranking, while negative feedback provides an opportunity to learn and improve.\n",
|
||||
"\n",
|
||||
"Note that the Heavy Ranker uses a combination of machine learning models and various features to rank the content. While the above actions may help influence the ranking, there are no guarantees as the ranking process is determined by a complex algorithm, which evolves over time. \n",
|
||||
"\n",
|
||||
"-> **Question**: why threads and long tweets do so well on the platform?\n",
|
||||
"\n",
|
||||
"**Answer**: Threads and long tweets perform well on the platform for several reasons:\n",
|
||||
"\n",
|
||||
"1. **More content and context**: Threads and long tweets provide more information and context about a topic, which can make the content more engaging and informative for users. People tend to appreciate a well-structured and detailed explanation of a subject or a story, and threads and long tweets can do that effectively.\n",
|
||||
"\n",
|
||||
"2. **Increased user engagement**: As threads and long tweets provide more content, they also encourage users to engage with the tweets through replies, retweets, and likes. This increased engagement can lead to better visibility of the content, as the Twitter algorithm considers user engagement when ranking and surfacing tweets.\n",
|
||||
"\n",
|
||||
"3. **Narrative structure**: Threads enable users to tell stories or present arguments in a step-by-step manner, making the information more accessible and easier to follow. This narrative structure can capture users' attention and encourage them to read through the entire thread and interact with the content.\n",
|
||||
"\n",
|
||||
"4. **Expanded reach**: When users engage with a thread, their interactions can bring the content to the attention of their followers, helping to expand the reach of the thread. This increased visibility can lead to more interactions and higher performance for the threaded tweets.\n",
|
||||
"\n",
|
||||
"5. **Higher content quality**: Generally, threads and long tweets require more thought and effort to create, which may lead to higher quality content. Users are more likely to appreciate and interact with high-quality, well-reasoned content, further improving the performance of these tweets within the platform.\n",
|
||||
"\n",
|
||||
"Overall, threads and long tweets perform well on Twitter because they encourage user engagement and provide a richer, more informative experience that users find valuable. \n",
|
||||
"\n",
|
||||
"-> **Question**: Are thread and long tweet creators building a following that reacts to only threads?\n",
|
||||
"\n",
|
||||
"**Answer**: Based on the provided code and context, there isn't enough information to conclude if the creators of threads and long tweets primarily build a following that engages with only thread-based content. The code provided is focused on Twitter's recommendation and ranking algorithms, as well as infrastructure components like Kafka, partitions, and the Follow Recommendations Service (FRS). To answer your question, data analysis of user engagement and results of specific edge cases would be required. \n",
|
||||
"\n",
|
||||
"-> **Question**: Do you need to follow different strategies to get most followers vs to get most likes and bookmarks per tweet?\n",
|
||||
"\n",
|
||||
"**Answer**: Yes, different strategies need to be followed to maximize the number of followers compared to maximizing likes and bookmarks per tweet. While there may be some overlap in the approaches, they target different aspects of user engagement.\n",
|
||||
"\n",
|
||||
"Maximizing followers: The primary focus is on growing your audience on the platform. Strategies include:\n",
|
||||
"\n",
|
||||
"1. Consistently sharing high-quality content related to your niche or industry.\n",
|
||||
"2. Engaging with others on the platform by replying, retweeting, and mentioning other users.\n",
|
||||
"3. Using relevant hashtags and participating in trending conversations.\n",
|
||||
"4. Collaborating with influencers and other users with a large following.\n",
|
||||
"5. Posting at optimal times when your target audience is most active.\n",
|
||||
"6. Optimizing your profile by using a clear profile picture, catchy bio, and relevant links.\n",
|
||||
"\n",
|
||||
"Maximizing likes and bookmarks per tweet: The focus is on creating content that resonates with your existing audience and encourages engagement. Strategies include:\n",
|
||||
"\n",
|
||||
"1. Crafting engaging and well-written tweets that encourage users to like or save them.\n",
|
||||
"2. Incorporating visually appealing elements, such as images, GIFs, or videos, that capture attention.\n",
|
||||
"3. Asking questions, sharing opinions, or sparking conversations that encourage users to engage with your tweets.\n",
|
||||
"4. Using analytics to understand the type of content that resonates with your audience and tailoring your tweets accordingly.\n",
|
||||
"5. Posting a mix of educational, entertaining, and promotional content to maintain variety and interest.\n",
|
||||
"6. Timing your tweets strategically to maximize engagement, likes, and bookmarks per tweet.\n",
|
||||
"\n",
|
||||
"Both strategies can overlap, and you may need to adapt your approach by understanding your target audience's preferences and analyzing your account's performance. However, it's essential to recognize that maximizing followers and maximizing likes and bookmarks per tweet have different focuses and require specific strategies. \n",
|
||||
"\n",
|
||||
"-> **Question**: Content meta data and how it impacts virality (e.g. ALT in images).\n",
|
||||
"\n",
|
||||
"**Answer**: There is no direct information in the provided context about how content metadata, such as ALT text in images, impacts the virality of a tweet or post. However, it's worth noting that including ALT text can improve the accessibility of your content for users who rely on screen readers, which may lead to increased engagement for a broader audience. Additionally, metadata can be used in search engine optimization, which might improve the visibility of the content, but the context provided does not mention any specific correlation with virality. \n",
|
||||
"\n",
|
||||
"-> **Question**: What are some unexpected fingerprints for spam factors?\n",
|
||||
"\n",
|
||||
"**Answer**: In the provided context, an unusual indicator of spam factors is when a tweet contains a non-media, non-news link. If the tweet has a link but does not have an image URL, video URL, or news URL, it is considered a potential spam vector, and a threshold for user reputation (tweepCredThreshold) is set to MIN_TWEEPCRED_WITH_LINK.\n",
|
||||
"\n",
|
||||
"While this rule may not cover all possible unusual spam indicators, it is derived from the specific codebase and logic shared in the context. \n",
|
||||
"\n",
|
||||
"-> **Question**: Is there any difference between company verified checkmarks and blue verified individual checkmarks?\n",
|
||||
"\n",
|
||||
"**Answer**: Yes, there is a distinction between the verified checkmarks for companies and blue verified checkmarks for individuals. The code snippet provided mentions \"Blue-verified account boost\" which indicates that there is a separate category for blue verified accounts. Typically, blue verified checkmarks are used to indicate notable individuals, while verified checkmarks are for companies or organizations. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
Evaluation
|
||||
==========
|
||||
==============
|
||||
|
||||
.. note::
|
||||
`Conceptual Guide <https://docs.langchain.com/docs/use-cases/evaluation>`_
|
||||
@@ -69,21 +69,19 @@ To facilitate that, we've included a `template notebook <./evaluation/benchmarki
|
||||
|
||||
The existing examples we have are:
|
||||
|
||||
`Question Answering (State of Union) <./evaluation/qa_benchmarking_sota.html>`_: A notebook showing evaluation of a question-answering task over a State-of-the-Union address.
|
||||
`Question Answering (State of Union) <./evaluation/qa_benchmarking_sota.html>`_: An notebook showing evaluation of a question-answering task over a State-of-the-Union address.
|
||||
|
||||
`Question Answering (Paul Graham Essay) <./evaluation/qa_benchmarking_pg.html>`_: A notebook showing evaluation of a question-answering task over a Paul Graham essay.
|
||||
`Question Answering (Paul Graham Essay) <./evaluation/qa_benchmarking_pg.html>`_: An notebook showing evaluation of a question-answering task over a Paul Graham essay.
|
||||
|
||||
`SQL Question Answering (Chinook) <./evaluation/sql_qa_benchmarking_chinook.html>`_: A notebook showing evaluation of a question-answering task over a SQL database (the Chinook database).
|
||||
`SQL Question Answering (Chinook) <./evaluation/sql_qa_benchmarking_chinook.html>`_: An notebook showing evaluation of a question-answering task over a SQL database (the Chinook database).
|
||||
|
||||
`Agent Vectorstore <./evaluation/agent_vectordb_sota_pg.html>`_: A notebook showing evaluation of an agent doing question answering while routing between two different vector databases.
|
||||
`Agent Vectorstore <./evaluation/agent_vectordb_sota_pg.html>`_: An notebook showing evaluation of an agent doing question answering while routing between two different vector databases.
|
||||
|
||||
`Agent Search + Calculator <./evaluation/agent_benchmarking.html>`_: A notebook showing evaluation of an agent doing question answering using a Search engine and a Calculator as tools.
|
||||
|
||||
`Evaluating an OpenAPI Chain <./evaluation/openapi_eval.html>`_: A notebook showing evaluation of an OpenAPI chain, including how to generate test data if you don't have any.
|
||||
`Agent Search + Calculator <./evaluation/agent_benchmarking.html>`_: An notebook showing evaluation of an agent doing question answering using a Search engine and a Calculator as tools.
|
||||
|
||||
|
||||
Other Examples
|
||||
--------------
|
||||
------------
|
||||
|
||||
In addition, we also have some more generic resources for evaluation.
|
||||
|
||||
|
||||
@@ -14,11 +14,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "46bf9205",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Comment this out if you are NOT using tracing\n",
|
||||
@@ -37,12 +35,32 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"id": "5b2d5e98",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found cached dataset json (/Users/harrisonchase/.cache/huggingface/datasets/LangChainDatasets___json/LangChainDatasets--agent-search-calculator-8a025c0ce5fb99d2/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "3a275586643f4ccfba1a8d54be28c351",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/1 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.evaluation.loading import load_dataset\n",
|
||||
"dataset = load_dataset(\"agent-search-calculator\")"
|
||||
@@ -59,11 +77,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"id": "c18680b5",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
@@ -72,7 +88,7 @@
|
||||
"from langchain.agents import AgentType\n",
|
||||
"\n",
|
||||
"tools = load_tools(['serpapi', 'llm-math'], llm=OpenAI(temperature=0))\n",
|
||||
"agent = initialize_agent(tools, OpenAI(temperature=0), agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)"
|
||||
"agent = initialize_agent(tools, OpenAI(temperature=0), agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -87,14 +103,22 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"id": "cbcafc92",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'38,630,316 people live in Canada as of 2023.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(dataset[0]['question'])\n",
|
||||
"agent.run(dataset[0]['question'])"
|
||||
]
|
||||
},
|
||||
@@ -109,24 +133,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bbbbb20e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(dataset[4]['question'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 8,
|
||||
"id": "24b4c66e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer')).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"predictions = []\n",
|
||||
"predicted_dataset = []\n",
|
||||
@@ -136,8 +154,7 @@
|
||||
" try:\n",
|
||||
" predictions.append(agent(new_data))\n",
|
||||
" predicted_dataset.append(new_data)\n",
|
||||
" except Exception as e:\n",
|
||||
" predictions.append({\"output\": str(e), **new_data})\n",
|
||||
" except Exception:\n",
|
||||
" error_dataset.append(new_data)"
|
||||
]
|
||||
},
|
||||
@@ -152,12 +169,25 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 9,
|
||||
"id": "1d583f03",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'How many people live in canada as of 2023?',\n",
|
||||
" 'answer': 'approximately 38,625,801',\n",
|
||||
" 'output': '38,630,316 people live in Canada as of 2023.',\n",
|
||||
" 'intermediate_steps': [(AgentAction(tool='Search', tool_input='Population of Canada 2023', log=' I need to find population data\\nAction: Search\\nAction Input: Population of Canada 2023'),\n",
|
||||
" '38,630,316')]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"predictions[0]"
|
||||
]
|
||||
@@ -172,11 +202,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 10,
|
||||
"id": "d0a9341d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.evaluation.qa import QAEvalChain"
|
||||
@@ -184,11 +212,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 14,
|
||||
"id": "1612dec1",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
@@ -206,11 +232,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 15,
|
||||
"id": "2a689df5",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for i, prediction in enumerate(predictions):\n",
|
||||
@@ -219,12 +243,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 16,
|
||||
"id": "27b61215",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Counter({' CORRECT': 4, ' INCORRECT': 6})"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from collections import Counter\n",
|
||||
"Counter([pred['grade'] for pred in predictions])"
|
||||
@@ -240,7 +273,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 17,
|
||||
"id": "47c692a1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -250,18 +283,38 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 18,
|
||||
"id": "0ef976c1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': \"who is dua lipa's boyfriend? what is his age raised to the .43 power?\",\n",
|
||||
" 'answer': 'her boyfriend is Romain Gravas. his age raised to the .43 power is approximately 4.9373857399466665',\n",
|
||||
" 'output': \"Isaac Carew, Dua Lipa's boyfriend, is 36 years old and his age raised to the .43 power is 4.6688516567750975.\",\n",
|
||||
" 'intermediate_steps': [(AgentAction(tool='Search', tool_input=\"Dua Lipa's boyfriend\", log=' I need to find out who Dua Lipa\\'s boyfriend is and then calculate his age raised to the .43 power\\nAction: Search\\nAction Input: \"Dua Lipa\\'s boyfriend\"'),\n",
|
||||
" 'Dua and Isaac, a model and a chef, dated on and off from 2013 to 2019. The two first split in early 2017, which is when Dua went on to date LANY ...'),\n",
|
||||
" (AgentAction(tool='Search', tool_input='Isaac Carew age', log=' I need to find out Isaac\\'s age\\nAction: Search\\nAction Input: \"Isaac Carew age\"'),\n",
|
||||
" '36 years'),\n",
|
||||
" (AgentAction(tool='Calculator', tool_input='36^.43', log=' I need to calculate 36 raised to the .43 power\\nAction: Calculator\\nAction Input: 36^.43'),\n",
|
||||
" 'Answer: 4.6688516567750975\\n')],\n",
|
||||
" 'grade': ' INCORRECT'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"incorrect"
|
||||
"incorrect[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3eb948cf-f767-4c87-a12d-275b66eef407",
|
||||
"id": "7710401a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -283,7 +336,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"source": [
|
||||
"# Data Augmented Question Answering\n",
|
||||
"\n",
|
||||
"This notebook uses some generic prompts/language models to evaluate an question answering system that uses other sources of data besides what is in the model. For example, this can be used to evaluate a question answering system over your proprietary data.\n",
|
||||
"This notebook uses some generic prompts/language models to evaluate an question answering system that uses other sources of data besides what is in the model. For example, this can be used to evaluate a question answering system over your propritary data.\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"Let's set up an example with our favorite example - the state of the union address."
|
||||
|
||||
@@ -1,955 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "692f3256",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Evaluating an OpenAPI Chain\n",
|
||||
"\n",
|
||||
"This notebook goes over ways to semantically evaluate an [OpenAPI Chain](openapi.ipynb), which calls an endpoint defined by the OpenAPI specification using purely natural language."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "a457106d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools import OpenAPISpec, APIOperation\n",
|
||||
"from langchain.chains import OpenAPIEndpointChain, LLMChain\n",
|
||||
"from langchain.requests import Requests\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2c3b0954",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load the API Chain\n",
|
||||
"\n",
|
||||
"Load a wrapper of the spec (so we can work with it more easily). You can load from a url or from a local file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "794142ba",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Load and parse the OpenAPI Spec\n",
|
||||
"spec = OpenAPISpec.from_url(\"https://www.klarna.com/us/shopping/public/openai/v0/api-docs/\")\n",
|
||||
"# Load a single endpoint operation\n",
|
||||
"operation = APIOperation.from_openapi_spec(spec, '/public/openai/v0/products', \"get\")\n",
|
||||
"verbose = False\n",
|
||||
"# Select any LangChain LLM\n",
|
||||
"llm = OpenAI(temperature=0, max_tokens=1000)\n",
|
||||
"# Create the endpoint chain\n",
|
||||
"api_chain = OpenAPIEndpointChain.from_api_operation(\n",
|
||||
" operation, \n",
|
||||
" llm, \n",
|
||||
" requests=Requests(), \n",
|
||||
" verbose=verbose,\n",
|
||||
" return_intermediate_steps=True # Return request and response text\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6c05ba5b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### *Optional*: Generate Input Questions and Request Ground Truth Queries\n",
|
||||
"\n",
|
||||
"See [Generating Test Datasets](#Generating-Test-Datasets) at the end of this notebook for more details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a0c0cb7e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import re\n",
|
||||
"# from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"# template = \"\"\"Below is a service description:\n",
|
||||
"\n",
|
||||
"# {spec}\n",
|
||||
"\n",
|
||||
"# Imagine you're a new user trying to use {operation} through a search bar. What are 10 different things you want to request?\n",
|
||||
"# Wants/Questions:\n",
|
||||
"# 1. \"\"\"\n",
|
||||
"\n",
|
||||
"# prompt = PromptTemplate.from_template(template)\n",
|
||||
"\n",
|
||||
"# generation_chain = LLMChain(llm=llm, prompt=prompt)\n",
|
||||
"\n",
|
||||
"# questions_ = generation_chain.run(spec=operation.to_typescript(), operation=operation.operation_id).split('\\n')\n",
|
||||
"# # Strip preceding numeric bullets\n",
|
||||
"# questions = [re.sub(r'^\\d+\\. ', '', q).strip() for q in questions_]\n",
|
||||
"# questions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "f3d767ef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# ground_truths = [\n",
|
||||
"# {\"q\": ...} # What are the best queries for each input?\n",
|
||||
"# ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "81098a05",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run the API Chain\n",
|
||||
"\n",
|
||||
"The two simplest questions a user of the API Chain are:\n",
|
||||
"- Did the chain succesfully access the endpoint?\n",
|
||||
"- Did the action accomplish the correct result?\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "64bc7ed9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from collections import defaultdict\n",
|
||||
"# Collect metrics to report at completion\n",
|
||||
"scores = defaultdict(list)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "dfd2d09f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found cached dataset json (/Users/harrisonchase/.cache/huggingface/datasets/LangChainDatasets___json/LangChainDatasets--openapi-chain-klarna-products-get-5d03362007667626/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "10932c9c139941d1a8be1a798f29e923",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/1 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.evaluation.loading import load_dataset\n",
|
||||
"dataset = load_dataset(\"openapi-chain-klarna-products-get\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "e08191a7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'question': 'What iPhone models are available?',\n",
|
||||
" 'expected_query': {'max_price': None, 'q': 'iPhone'}},\n",
|
||||
" {'question': 'Are there any budget laptops?',\n",
|
||||
" 'expected_query': {'max_price': 300, 'q': 'laptop'}},\n",
|
||||
" {'question': 'Show me the cheapest gaming PC.',\n",
|
||||
" 'expected_query': {'max_price': 500, 'q': 'gaming pc'}},\n",
|
||||
" {'question': 'Are there any tablets under $400?',\n",
|
||||
" 'expected_query': {'max_price': 400, 'q': 'tablet'}},\n",
|
||||
" {'question': 'What are the best headphones?',\n",
|
||||
" 'expected_query': {'max_price': None, 'q': 'headphones'}},\n",
|
||||
" {'question': 'What are the top rated laptops?',\n",
|
||||
" 'expected_query': {'max_price': None, 'q': 'laptop'}},\n",
|
||||
" {'question': 'I want to buy some shoes. I like Adidas and Nike.',\n",
|
||||
" 'expected_query': {'max_price': None, 'q': 'shoe'}},\n",
|
||||
" {'question': 'I want to buy a new skirt',\n",
|
||||
" 'expected_query': {'max_price': None, 'q': 'skirt'}},\n",
|
||||
" {'question': 'My company is asking me to get a professional Deskopt PC - money is no object.',\n",
|
||||
" 'expected_query': {'max_price': 10000, 'q': 'professional desktop PC'}},\n",
|
||||
" {'question': 'What are the best budget cameras?',\n",
|
||||
" 'expected_query': {'max_price': 300, 'q': 'camera'}}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "7ee71384",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"questions = [d['question'] for d in dataset]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "00511f7a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Run the the API chain itself\n",
|
||||
"raise_error = False # Stop on first failed example - useful for development\n",
|
||||
"chain_outputs = []\n",
|
||||
"failed_examples = []\n",
|
||||
"for question in questions:\n",
|
||||
" try:\n",
|
||||
" chain_outputs.append(api_chain(question))\n",
|
||||
" scores[\"completed\"].append(1.0)\n",
|
||||
" except Exception as e:\n",
|
||||
" if raise_error:\n",
|
||||
" raise e\n",
|
||||
" failed_examples.append({'q': question, 'error': e})\n",
|
||||
" scores[\"completed\"].append(0.0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "f3c9729f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# If the chain failed to run, show the failing examples\n",
|
||||
"failed_examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "914e7587",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['There are currently 10 Apple iPhone models available: Apple iPhone 14 Pro Max 256GB, Apple iPhone 12 128GB, Apple iPhone 13 128GB, Apple iPhone 14 Pro 128GB, Apple iPhone 14 Pro 256GB, Apple iPhone 14 Pro Max 128GB, Apple iPhone 13 Pro Max 128GB, Apple iPhone 14 128GB, Apple iPhone 12 Pro 512GB, and Apple iPhone 12 mini 64GB.',\n",
|
||||
" 'Yes, there are several budget laptops in the API response. For example, the HP 14-dq0055dx and HP 15-dw0083wm are both priced at $199.99 and $244.99 respectively.',\n",
|
||||
" 'The cheapest gaming PC available is the Alarco Gaming PC (X_BLACK_GTX750) for $499.99. You can find more information about it here: https://www.klarna.com/us/shopping/pl/cl223/3203154750/Desktop-Computers/Alarco-Gaming-PC-%28X_BLACK_GTX750%29/?utm_source=openai&ref-site=openai_plugin',\n",
|
||||
" 'Yes, there are several tablets under $400. These include the Apple iPad 10.2\" 32GB (2019), Samsung Galaxy Tab A8 10.5 SM-X200 32GB, Samsung Galaxy Tab A7 Lite 8.7 SM-T220 32GB, Amazon Fire HD 8\" 32GB (10th Generation), and Amazon Fire HD 10 32GB.',\n",
|
||||
" 'It looks like you are looking for the best headphones. Based on the API response, it looks like the Apple AirPods Pro (2nd generation) 2022, Apple AirPods Max, and Bose Noise Cancelling Headphones 700 are the best options.',\n",
|
||||
" 'The top rated laptops based on the API response are the Apple MacBook Pro (2021) M1 Pro 8C CPU 14C GPU 16GB 512GB SSD 14\", Apple MacBook Pro (2022) M2 OC 10C GPU 8GB 256GB SSD 13.3\", Apple MacBook Air (2022) M2 OC 8C GPU 8GB 256GB SSD 13.6\", and Apple MacBook Pro (2023) M2 Pro OC 16C GPU 16GB 512GB SSD 14.2\".',\n",
|
||||
" \"I found several Nike and Adidas shoes in the API response. Here are the links to the products: Nike Dunk Low M - Black/White: https://www.klarna.com/us/shopping/pl/cl337/3200177969/Shoes/Nike-Dunk-Low-M-Black-White/?utm_source=openai&ref-site=openai_plugin, Nike Air Jordan 4 Retro M - Midnight Navy: https://www.klarna.com/us/shopping/pl/cl337/3202929835/Shoes/Nike-Air-Jordan-4-Retro-M-Midnight-Navy/?utm_source=openai&ref-site=openai_plugin, Nike Air Force 1 '07 M - White: https://www.klarna.com/us/shopping/pl/cl337/3979297/Shoes/Nike-Air-Force-1-07-M-White/?utm_source=openai&ref-site=openai_plugin, Nike Dunk Low W - White/Black: https://www.klarna.com/us/shopping/pl/cl337/3200134705/Shoes/Nike-Dunk-Low-W-White-Black/?utm_source=openai&ref-site=openai_plugin, Nike Air Jordan 1 Retro High M - White/University Blue/Black: https://www.klarna.com/us/shopping/pl/cl337/3200383658/Shoes/Nike-Air-Jordan-1-Retro-High-M-White-University-Blue-Black/?utm_source=openai&ref-site=openai_plugin, Nike Air Jordan 1 Retro High OG M - True Blue/Cement Grey/White: https://www.klarna.com/us/shopping/pl/cl337/3204655673/Shoes/Nike-Air-Jordan-1-Retro-High-OG-M-True-Blue-Cement-Grey-White/?utm_source=openai&ref-site=openai_plugin, Nike Air Jordan 11 Retro Cherry - White/Varsity Red/Black: https://www.klarna.com/us/shopping/pl/cl337/3202929696/Shoes/Nike-Air-Jordan-11-Retro-Cherry-White-Varsity-Red-Black/?utm_source=openai&ref-site=openai_plugin, Nike Dunk High W - White/Black: https://www.klarna.com/us/shopping/pl/cl337/3201956448/Shoes/Nike-Dunk-High-W-White-Black/?utm_source=openai&ref-site=openai_plugin, Nike Air Jordan 5 Retro M - Black/Taxi/Aquatone: https://www.klarna.com/us/shopping/pl/cl337/3204923084/Shoes/Nike-Air-Jordan-5-Retro-M-Black-Taxi-Aquatone/?utm_source=openai&ref-site=openai_plugin, Nike Court Legacy Lift W: https://www.klarna.com/us/shopping/pl/cl337/3202103728/Shoes/Nike-Court-Legacy-Lift-W/?utm_source=openai&ref-site=openai_plugin\",\n",
|
||||
" \"I found several skirts that may interest you. Please take a look at the following products: Avenue Plus Size Denim Stretch Skirt, LoveShackFancy Ruffled Mini Skirt - Antique White, Nike Dri-Fit Club Golf Skirt - Active Pink, Skims Soft Lounge Ruched Long Skirt, French Toast Girl's Front Pleated Skirt with Tabs, Alexia Admor Women's Harmonie Mini Skirt Pink Pink, Vero Moda Long Skirt, Nike Court Dri-FIT Victory Flouncy Tennis Skirt Women - White/Black, Haoyuan Mini Pleated Skirts W, and Zimmermann Lyre Midi Skirt.\",\n",
|
||||
" 'Based on the API response, you may want to consider the Skytech Archangel Gaming Computer PC Desktop, the CyberPowerPC Gamer Master Gaming Desktop, or the ASUS ROG Strix G10DK-RS756, as they all offer powerful processors and plenty of RAM.',\n",
|
||||
" 'Based on the API response, the best budget cameras are the DJI Mini 2 Dog Camera ($448.50), Insta360 Sphere with Landing Pad ($429.99), DJI FPV Gimbal Camera ($121.06), Parrot Camera & Body ($36.19), and DJI FPV Air Unit ($179.00).']"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"answers = [res['output'] for res in chain_outputs]\n",
|
||||
"answers"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "484f0587",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Evaluate the requests chain\n",
|
||||
"\n",
|
||||
"The API Chain has two main components:\n",
|
||||
"1. Translate the user query to an API request (request synthesizer)\n",
|
||||
"2. Translate the API response to a natural language response\n",
|
||||
"\n",
|
||||
"Here, we construct an evaluation chain to grade the request synthesizer against selected human queries "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "3ea5afd7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"truth_queries = [json.dumps(data[\"expected_query\"]) for data in dataset]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "e055f24b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Collect the API queries generated by the chain\n",
|
||||
"predicted_queries = [output[\"intermediate_steps\"][\"request_args\"] for output in chain_outputs]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "7d4f2b88",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"template = \"\"\"You are trying to answer the following question by querying an API:\n",
|
||||
"\n",
|
||||
"> Question: {question}\n",
|
||||
"\n",
|
||||
"The query you know you should be executing against the API is:\n",
|
||||
"\n",
|
||||
"> Query: {truth_query}\n",
|
||||
"\n",
|
||||
"Is the following predicted query semantically the same (eg likely to produce the same answer)?\n",
|
||||
"\n",
|
||||
"> Predicted Query: {predict_query}\n",
|
||||
"\n",
|
||||
"Please give the Predicted Query a grade of either an A, B, C, D, or F, along with an explanation of why. End the evaluation with 'Final Grade: <the letter>'\n",
|
||||
"\n",
|
||||
"> Explanation: Let's think step by step.\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate.from_template(template)\n",
|
||||
"\n",
|
||||
"eval_chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "8cc1b1db",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[' The original query is asking for all iPhone models, so the \"q\" parameter is correct. The \"max_price\" parameter is also correct, as it is set to null, meaning that no maximum price is set. The predicted query adds two additional parameters, \"size\" and \"min_price\". The \"size\" parameter is not necessary, as it is not relevant to the question being asked. The \"min_price\" parameter is also not necessary, as it is not relevant to the question being asked and it is set to 0, which is the default value. Therefore, the predicted query is not semantically the same as the original query and is not likely to produce the same answer. Final Grade: D',\n",
|
||||
" ' The original query is asking for laptops with a maximum price of 300. The predicted query is asking for laptops with a minimum price of 0 and a maximum price of 500. This means that the predicted query is likely to return more results than the original query, as it is asking for a wider range of prices. Therefore, the predicted query is not semantically the same as the original query, and it is not likely to produce the same answer. Final Grade: F',\n",
|
||||
" \" The first two parameters are the same, so that's good. The third parameter is different, but it's not necessary for the query, so that's not a problem. The fourth parameter is the problem. The original query specifies a maximum price of 500, while the predicted query specifies a maximum price of null. This means that the predicted query will not limit the results to the cheapest gaming PCs, so it is not semantically the same as the original query. Final Grade: F\",\n",
|
||||
" ' The original query is asking for tablets under $400, so the first two parameters are correct. The predicted query also includes the parameters \"size\" and \"min_price\", which are not necessary for the original query. The \"size\" parameter is not relevant to the question, and the \"min_price\" parameter is redundant since the original query already specifies a maximum price. Therefore, the predicted query is not semantically the same as the original query and is not likely to produce the same answer. Final Grade: D',\n",
|
||||
" ' The original query is asking for headphones with no maximum price, so the predicted query is not semantically the same because it has a maximum price of 500. The predicted query also has a size of 10, which is not specified in the original query. Therefore, the predicted query is not semantically the same as the original query. Final Grade: F',\n",
|
||||
" \" The original query is asking for the top rated laptops, so the 'size' parameter should be set to 10 to get the top 10 results. The 'min_price' parameter should be set to 0 to get results from all price ranges. The 'max_price' parameter should be set to null to get results from all price ranges. The 'q' parameter should be set to 'laptop' to get results related to laptops. All of these parameters are present in the predicted query, so it is semantically the same as the original query. Final Grade: A\",\n",
|
||||
" ' The original query is asking for shoes, so the predicted query is asking for the same thing. The original query does not specify a size, so the predicted query is not adding any additional information. The original query does not specify a price range, so the predicted query is adding additional information that is not necessary. Therefore, the predicted query is not semantically the same as the original query and is likely to produce different results. Final Grade: D',\n",
|
||||
" ' The original query is asking for a skirt, so the predicted query is asking for the same thing. The predicted query also adds additional parameters such as size and price range, which could help narrow down the results. However, the size parameter is not necessary for the query to be successful, and the price range is too narrow. Therefore, the predicted query is not as effective as the original query. Final Grade: C',\n",
|
||||
" ' The first part of the query is asking for a Desktop PC, which is the same as the original query. The second part of the query is asking for a size of 10, which is not relevant to the original query. The third part of the query is asking for a minimum price of 0, which is not relevant to the original query. The fourth part of the query is asking for a maximum price of null, which is not relevant to the original query. Therefore, the Predicted Query does not semantically match the original query and is not likely to produce the same answer. Final Grade: F',\n",
|
||||
" ' The original query is asking for cameras with a maximum price of 300. The predicted query is asking for cameras with a maximum price of 500. This means that the predicted query is likely to return more results than the original query, which may include cameras that are not within the budget range. Therefore, the predicted query is not semantically the same as the original query and does not answer the original question. Final Grade: F']"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"request_eval_results = []\n",
|
||||
"for question, predict_query, truth_query in list(zip(questions, predicted_queries, truth_queries)):\n",
|
||||
" eval_output = eval_chain.run(\n",
|
||||
" question=question,\n",
|
||||
" truth_query=truth_query,\n",
|
||||
" predict_query=predict_query,\n",
|
||||
" )\n",
|
||||
" request_eval_results.append(eval_output)\n",
|
||||
"request_eval_results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "0d76f8ba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"from typing import List\n",
|
||||
"# Parse the evaluation chain responses into a rubric\n",
|
||||
"def parse_eval_results(results: List[str]) -> List[float]:\n",
|
||||
" rubric = {\n",
|
||||
" \"A\": 1.0,\n",
|
||||
" \"B\": 0.75,\n",
|
||||
" \"C\": 0.5,\n",
|
||||
" \"D\": 0.25,\n",
|
||||
" \"F\": 0\n",
|
||||
" }\n",
|
||||
" return [rubric[re.search(r'Final Grade: (\\w+)', res).group(1)] for res in results]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"parsed_results = parse_eval_results(request_eval_results)\n",
|
||||
"# Collect the scores for a final evaluation table\n",
|
||||
"scores['request_synthesizer'].extend(parsed_results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6f3ee8ea",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Evaluate the Response Chain\n",
|
||||
"\n",
|
||||
"The second component translated the structured API response to a natural language response.\n",
|
||||
"Evaluate this against the user's original question."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "8b97847c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"template = \"\"\"You are trying to answer the following question by querying an API:\n",
|
||||
"\n",
|
||||
"> Question: {question}\n",
|
||||
"\n",
|
||||
"The API returned a response of:\n",
|
||||
"\n",
|
||||
"> API result: {api_response}\n",
|
||||
"\n",
|
||||
"Your response to the user: {answer}\n",
|
||||
"\n",
|
||||
"Please evaluate the accuracy and utility of your response to the user's original question, conditioned on the information available.\n",
|
||||
"Give a letter grade of either an A, B, C, D, or F, along with an explanation of why. End the evaluation with 'Final Grade: <the letter>'\n",
|
||||
"\n",
|
||||
"> Explanation: Let's think step by step.\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate.from_template(template)\n",
|
||||
"\n",
|
||||
"eval_chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "642852ce",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Extract the API responses from the chain\n",
|
||||
"api_responses = [output[\"intermediate_steps\"][\"response_text\"] for output in chain_outputs]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "08a5eb4f",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[' The original query is asking for all iPhone models, so the \"q\" parameter is correct. The \"max_price\" parameter is also correct, as it is set to null, meaning that no maximum price is set. The predicted query adds two additional parameters, \"size\" and \"min_price\". The \"size\" parameter is not necessary, as it is not relevant to the question being asked. The \"min_price\" parameter is also not necessary, as it is not relevant to the question being asked and it is set to 0, which is the default value. Therefore, the predicted query is not semantically the same as the original query and is not likely to produce the same answer. Final Grade: D',\n",
|
||||
" ' The original query is asking for laptops with a maximum price of 300. The predicted query is asking for laptops with a minimum price of 0 and a maximum price of 500. This means that the predicted query is likely to return more results than the original query, as it is asking for a wider range of prices. Therefore, the predicted query is not semantically the same as the original query, and it is not likely to produce the same answer. Final Grade: F',\n",
|
||||
" \" The first two parameters are the same, so that's good. The third parameter is different, but it's not necessary for the query, so that's not a problem. The fourth parameter is the problem. The original query specifies a maximum price of 500, while the predicted query specifies a maximum price of null. This means that the predicted query will not limit the results to the cheapest gaming PCs, so it is not semantically the same as the original query. Final Grade: F\",\n",
|
||||
" ' The original query is asking for tablets under $400, so the first two parameters are correct. The predicted query also includes the parameters \"size\" and \"min_price\", which are not necessary for the original query. The \"size\" parameter is not relevant to the question, and the \"min_price\" parameter is redundant since the original query already specifies a maximum price. Therefore, the predicted query is not semantically the same as the original query and is not likely to produce the same answer. Final Grade: D',\n",
|
||||
" ' The original query is asking for headphones with no maximum price, so the predicted query is not semantically the same because it has a maximum price of 500. The predicted query also has a size of 10, which is not specified in the original query. Therefore, the predicted query is not semantically the same as the original query. Final Grade: F',\n",
|
||||
" \" The original query is asking for the top rated laptops, so the 'size' parameter should be set to 10 to get the top 10 results. The 'min_price' parameter should be set to 0 to get results from all price ranges. The 'max_price' parameter should be set to null to get results from all price ranges. The 'q' parameter should be set to 'laptop' to get results related to laptops. All of these parameters are present in the predicted query, so it is semantically the same as the original query. Final Grade: A\",\n",
|
||||
" ' The original query is asking for shoes, so the predicted query is asking for the same thing. The original query does not specify a size, so the predicted query is not adding any additional information. The original query does not specify a price range, so the predicted query is adding additional information that is not necessary. Therefore, the predicted query is not semantically the same as the original query and is likely to produce different results. Final Grade: D',\n",
|
||||
" ' The original query is asking for a skirt, so the predicted query is asking for the same thing. The predicted query also adds additional parameters such as size and price range, which could help narrow down the results. However, the size parameter is not necessary for the query to be successful, and the price range is too narrow. Therefore, the predicted query is not as effective as the original query. Final Grade: C',\n",
|
||||
" ' The first part of the query is asking for a Desktop PC, which is the same as the original query. The second part of the query is asking for a size of 10, which is not relevant to the original query. The third part of the query is asking for a minimum price of 0, which is not relevant to the original query. The fourth part of the query is asking for a maximum price of null, which is not relevant to the original query. Therefore, the Predicted Query does not semantically match the original query and is not likely to produce the same answer. Final Grade: F',\n",
|
||||
" ' The original query is asking for cameras with a maximum price of 300. The predicted query is asking for cameras with a maximum price of 500. This means that the predicted query is likely to return more results than the original query, which may include cameras that are not within the budget range. Therefore, the predicted query is not semantically the same as the original query and does not answer the original question. Final Grade: F',\n",
|
||||
" ' The user asked a question about what iPhone models are available, and the API returned a response with 10 different models. The response provided by the user accurately listed all 10 models, so the accuracy of the response is A+. The utility of the response is also A+ since the user was able to get the exact information they were looking for. Final Grade: A+',\n",
|
||||
" \" The API response provided a list of laptops with their prices and attributes. The user asked if there were any budget laptops, and the response provided a list of laptops that are all priced under $500. Therefore, the response was accurate and useful in answering the user's question. Final Grade: A\",\n",
|
||||
" \" The API response provided the name, price, and URL of the product, which is exactly what the user asked for. The response also provided additional information about the product's attributes, which is useful for the user to make an informed decision. Therefore, the response is accurate and useful. Final Grade: A\",\n",
|
||||
" \" The API response provided a list of tablets that are under $400. The response accurately answered the user's question. Additionally, the response provided useful information such as the product name, price, and attributes. Therefore, the response was accurate and useful. Final Grade: A\",\n",
|
||||
" \" The API response provided a list of headphones with their respective prices and attributes. The user asked for the best headphones, so the response should include the best headphones based on the criteria provided. The response provided a list of headphones that are all from the same brand (Apple) and all have the same type of headphone (True Wireless, In-Ear). This does not provide the user with enough information to make an informed decision about which headphones are the best. Therefore, the response does not accurately answer the user's question. Final Grade: F\",\n",
|
||||
" ' The API response provided a list of laptops with their attributes, which is exactly what the user asked for. The response provided a comprehensive list of the top rated laptops, which is what the user was looking for. The response was accurate and useful, providing the user with the information they needed. Final Grade: A',\n",
|
||||
" ' The API response provided a list of shoes from both Adidas and Nike, which is exactly what the user asked for. The response also included the product name, price, and attributes for each shoe, which is useful information for the user to make an informed decision. The response also included links to the products, which is helpful for the user to purchase the shoes. Therefore, the response was accurate and useful. Final Grade: A',\n",
|
||||
" \" The API response provided a list of skirts that could potentially meet the user's needs. The response also included the name, price, and attributes of each skirt. This is a great start, as it provides the user with a variety of options to choose from. However, the response does not provide any images of the skirts, which would have been helpful for the user to make a decision. Additionally, the response does not provide any information about the availability of the skirts, which could be important for the user. \\n\\nFinal Grade: B\",\n",
|
||||
" ' The user asked for a professional desktop PC with no budget constraints. The API response provided a list of products that fit the criteria, including the Skytech Archangel Gaming Computer PC Desktop, the CyberPowerPC Gamer Master Gaming Desktop, and the ASUS ROG Strix G10DK-RS756. The response accurately suggested these three products as they all offer powerful processors and plenty of RAM. Therefore, the response is accurate and useful. Final Grade: A',\n",
|
||||
" \" The API response provided a list of cameras with their prices, which is exactly what the user asked for. The response also included additional information such as features and memory cards, which is not necessary for the user's question but could be useful for further research. The response was accurate and provided the user with the information they needed. Final Grade: A\"]"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Run the grader chain\n",
|
||||
"response_eval_results = []\n",
|
||||
"for question, api_response, answer in list(zip(questions, api_responses, answers)):\n",
|
||||
" request_eval_results.append(eval_chain.run(question=question, api_response=api_response, answer=answer))\n",
|
||||
"request_eval_results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "a144aa9d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Reusing the rubric from above, parse the evaluation chain responses\n",
|
||||
"parsed_response_results = parse_eval_results(request_eval_results)\n",
|
||||
"# Collect the scores for a final evaluation table\n",
|
||||
"scores['result_synthesizer'].extend(parsed_response_results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "e95042bc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Metric \tMin \tMean \tMax \n",
|
||||
"completed \t1.00 \t1.00 \t1.00 \n",
|
||||
"request_synthesizer \t0.00 \t0.23 \t1.00 \n",
|
||||
"result_synthesizer \t0.00 \t0.55 \t1.00 \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Print out Score statistics for the evaluation session\n",
|
||||
"header = \"{:<20}\\t{:<10}\\t{:<10}\\t{:<10}\".format(\"Metric\", \"Min\", \"Mean\", \"Max\")\n",
|
||||
"print(header)\n",
|
||||
"for metric, metric_scores in scores.items():\n",
|
||||
" mean_scores = sum(metric_scores) / len(metric_scores) if len(metric_scores) > 0 else float('nan')\n",
|
||||
" row = \"{:<20}\\t{:<10.2f}\\t{:<10.2f}\\t{:<10.2f}\".format(metric, min(metric_scores), mean_scores, max(metric_scores))\n",
|
||||
" print(row)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "03fe96af",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Re-show the examples for which the chain failed to complete\n",
|
||||
"failed_examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2bb3636d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Generating Test Datasets\n",
|
||||
"\n",
|
||||
"To evaluate a chain against your own endpoint, you'll want to generate a test dataset that's conforms to the API.\n",
|
||||
"\n",
|
||||
"This section provides an overview of how to bootstrap the process.\n",
|
||||
"\n",
|
||||
"First, we'll parse the OpenAPI Spec. For this example, we'll [Speak](https://www.speak.com/)'s OpenAPI specification."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "a453eb93",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Load and parse the OpenAPI Spec\n",
|
||||
"spec = OpenAPISpec.from_url(\"https://api.speak.com/openapi.yaml\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "bb65ffe8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['/v1/public/openai/explain-phrase',\n",
|
||||
" '/v1/public/openai/explain-task',\n",
|
||||
" '/v1/public/openai/translate']"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# List the paths in the OpenAPI Spec\n",
|
||||
"paths = sorted(spec.paths.keys())\n",
|
||||
"paths"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "0988f01b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['post']"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# See which HTTP Methods are available for a given path\n",
|
||||
"methods = spec.get_methods_for_path('/v1/public/openai/explain-task')\n",
|
||||
"methods"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "e9ef0a77",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"type explainTask = (_: {\n",
|
||||
"/* Description of the task that the user wants to accomplish or do. For example, \"tell the waiter they messed up my order\" or \"compliment someone on their shirt\" */\n",
|
||||
" task_description?: string,\n",
|
||||
"/* The foreign language that the user is learning and asking about. The value can be inferred from question - for example, if the user asks \"how do i ask a girl out in mexico city\", the value should be \"Spanish\" because of Mexico City. Always use the full name of the language (e.g. Spanish, French). */\n",
|
||||
" learning_language?: string,\n",
|
||||
"/* The user's native language. Infer this value from the language the user asked their question in. Always use the full name of the language (e.g. Spanish, French). */\n",
|
||||
" native_language?: string,\n",
|
||||
"/* A description of any additional context in the user's question that could affect the explanation - e.g. setting, scenario, situation, tone, speaking style and formality, usage notes, or any other qualifiers. */\n",
|
||||
" additional_context?: string,\n",
|
||||
"/* Full text of the user's question. */\n",
|
||||
" full_query?: string,\n",
|
||||
"}) => any;\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Load a single endpoint operation\n",
|
||||
"operation = APIOperation.from_openapi_spec(spec, '/v1/public/openai/explain-task', 'post')\n",
|
||||
"\n",
|
||||
"# The operation can be serialized as typescript\n",
|
||||
"print(operation.to_typescript())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "f1186b6d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Compress the service definition to avoid leaking too much input structure to the sample data\n",
|
||||
"template = \"\"\"In 20 words or less, what does this service accomplish?\n",
|
||||
"{spec}\n",
|
||||
"\n",
|
||||
"Function: It's designed to \"\"\"\n",
|
||||
"prompt = PromptTemplate.from_template(template)\n",
|
||||
"generation_chain = LLMChain(llm=llm, prompt=prompt)\n",
|
||||
"purpose = generation_chain.run(spec=operation.to_typescript())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "a594406a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[\"Can you explain how to say 'hello' in Spanish?\",\n",
|
||||
" \"I need help understanding the French word for 'goodbye'.\",\n",
|
||||
" \"Can you tell me how to say 'thank you' in German?\",\n",
|
||||
" \"I'm trying to learn the Italian word for 'please'.\",\n",
|
||||
" \"Can you help me with the pronunciation of 'yes' in Portuguese?\",\n",
|
||||
" \"I'm looking for the Dutch word for 'no'.\",\n",
|
||||
" \"Can you explain the meaning of 'hello' in Japanese?\",\n",
|
||||
" \"I need help understanding the Russian word for 'thank you'.\",\n",
|
||||
" \"Can you tell me how to say 'goodbye' in Chinese?\",\n",
|
||||
" \"I'm trying to learn the Arabic word for 'please'.\"]"
|
||||
]
|
||||
},
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"template = \"\"\"Write a list of {num_to_generate} unique messages users might send to a service designed to{purpose} They must each be completely unique.\n",
|
||||
"\n",
|
||||
"1.\"\"\"\n",
|
||||
"def parse_list(text: str) -> List[str]:\n",
|
||||
" # Match lines starting with a number then period\n",
|
||||
" # Strip leading and trailing whitespace\n",
|
||||
" matches = re.findall(r'^\\d+\\. ', text)\n",
|
||||
" return [re.sub(r'^\\d+\\. ', '', q).strip().strip('\"') for q in text.split('\\n')]\n",
|
||||
"\n",
|
||||
"num_to_generate = 10 # How many examples to use for this test set.\n",
|
||||
"prompt = PromptTemplate.from_template(template)\n",
|
||||
"generation_chain = LLMChain(llm=llm, prompt=prompt)\n",
|
||||
"text = generation_chain.run(purpose=purpose,\n",
|
||||
" num_to_generate=num_to_generate)\n",
|
||||
"# Strip preceding numeric bullets\n",
|
||||
"queries = parse_list(text)\n",
|
||||
"queries\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "8dc60f43",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['{\"task_description\": \"say \\'hello\\'\", \"learning_language\": \"Spanish\", \"native_language\": \"English\", \"full_query\": \"Can you explain how to say \\'hello\\' in Spanish?\"}',\n",
|
||||
" '{\"task_description\": \"understanding the French word for \\'goodbye\\'\", \"learning_language\": \"French\", \"native_language\": \"English\", \"full_query\": \"I need help understanding the French word for \\'goodbye\\'.\"}',\n",
|
||||
" '{\"task_description\": \"say \\'thank you\\'\", \"learning_language\": \"German\", \"native_language\": \"English\", \"full_query\": \"Can you tell me how to say \\'thank you\\' in German?\"}',\n",
|
||||
" '{\"task_description\": \"Learn the Italian word for \\'please\\'\", \"learning_language\": \"Italian\", \"native_language\": \"English\", \"full_query\": \"I\\'m trying to learn the Italian word for \\'please\\'.\"}',\n",
|
||||
" '{\"task_description\": \"Help with pronunciation of \\'yes\\' in Portuguese\", \"learning_language\": \"Portuguese\", \"native_language\": \"English\", \"full_query\": \"Can you help me with the pronunciation of \\'yes\\' in Portuguese?\"}',\n",
|
||||
" '{\"task_description\": \"Find the Dutch word for \\'no\\'\", \"learning_language\": \"Dutch\", \"native_language\": \"English\", \"full_query\": \"I\\'m looking for the Dutch word for \\'no\\'.\"}',\n",
|
||||
" '{\"task_description\": \"Explain the meaning of \\'hello\\' in Japanese\", \"learning_language\": \"Japanese\", \"native_language\": \"English\", \"full_query\": \"Can you explain the meaning of \\'hello\\' in Japanese?\"}',\n",
|
||||
" '{\"task_description\": \"understanding the Russian word for \\'thank you\\'\", \"learning_language\": \"Russian\", \"native_language\": \"English\", \"full_query\": \"I need help understanding the Russian word for \\'thank you\\'.\"}',\n",
|
||||
" '{\"task_description\": \"say goodbye\", \"learning_language\": \"Chinese\", \"native_language\": \"English\", \"full_query\": \"Can you tell me how to say \\'goodbye\\' in Chinese?\"}',\n",
|
||||
" '{\"task_description\": \"Learn the Arabic word for \\'please\\'\", \"learning_language\": \"Arabic\", \"native_language\": \"English\", \"full_query\": \"I\\'m trying to learn the Arabic word for \\'please\\'.\"}']"
|
||||
]
|
||||
},
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Define the generation chain to get hypotheses\n",
|
||||
"api_chain = OpenAPIEndpointChain.from_api_operation(\n",
|
||||
" operation, \n",
|
||||
" llm, \n",
|
||||
" requests=Requests(), \n",
|
||||
" verbose=verbose,\n",
|
||||
" return_intermediate_steps=True # Return request and response text\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"predicted_outputs =[api_chain(query) for query in queries]\n",
|
||||
"request_args = [output[\"intermediate_steps\"][\"request_args\"] for output in predicted_outputs]\n",
|
||||
"\n",
|
||||
"# Show the generated request\n",
|
||||
"request_args"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "b727e28e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## AI Assisted Correction\n",
|
||||
"correction_template = \"\"\"Correct the following API request based on the user's feedback. If the user indicates no changes are needed, output the original without making any changes.\n",
|
||||
"\n",
|
||||
"REQUEST: {request}\n",
|
||||
"\n",
|
||||
"User Feedback / requested changes: {user_feedback}\n",
|
||||
"\n",
|
||||
"Finalized Request: \"\"\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate.from_template(correction_template)\n",
|
||||
"correction_chain = LLMChain(llm=llm, prompt=prompt)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "c1f4d71f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Query: Can you explain how to say 'hello' in Spanish?\n",
|
||||
"Request: {\"task_description\": \"say 'hello'\", \"learning_language\": \"Spanish\", \"native_language\": \"English\", \"full_query\": \"Can you explain how to say 'hello' in Spanish?\"}\n",
|
||||
"Requested changes: \n",
|
||||
"Query: I need help understanding the French word for 'goodbye'.\n",
|
||||
"Request: {\"task_description\": \"understanding the French word for 'goodbye'\", \"learning_language\": \"French\", \"native_language\": \"English\", \"full_query\": \"I need help understanding the French word for 'goodbye'.\"}\n",
|
||||
"Requested changes: \n",
|
||||
"Query: Can you tell me how to say 'thank you' in German?\n",
|
||||
"Request: {\"task_description\": \"say 'thank you'\", \"learning_language\": \"German\", \"native_language\": \"English\", \"full_query\": \"Can you tell me how to say 'thank you' in German?\"}\n",
|
||||
"Requested changes: \n",
|
||||
"Query: I'm trying to learn the Italian word for 'please'.\n",
|
||||
"Request: {\"task_description\": \"Learn the Italian word for 'please'\", \"learning_language\": \"Italian\", \"native_language\": \"English\", \"full_query\": \"I'm trying to learn the Italian word for 'please'.\"}\n",
|
||||
"Requested changes: \n",
|
||||
"Query: Can you help me with the pronunciation of 'yes' in Portuguese?\n",
|
||||
"Request: {\"task_description\": \"Help with pronunciation of 'yes' in Portuguese\", \"learning_language\": \"Portuguese\", \"native_language\": \"English\", \"full_query\": \"Can you help me with the pronunciation of 'yes' in Portuguese?\"}\n",
|
||||
"Requested changes: \n",
|
||||
"Query: I'm looking for the Dutch word for 'no'.\n",
|
||||
"Request: {\"task_description\": \"Find the Dutch word for 'no'\", \"learning_language\": \"Dutch\", \"native_language\": \"English\", \"full_query\": \"I'm looking for the Dutch word for 'no'.\"}\n",
|
||||
"Requested changes: \n",
|
||||
"Query: Can you explain the meaning of 'hello' in Japanese?\n",
|
||||
"Request: {\"task_description\": \"Explain the meaning of 'hello' in Japanese\", \"learning_language\": \"Japanese\", \"native_language\": \"English\", \"full_query\": \"Can you explain the meaning of 'hello' in Japanese?\"}\n",
|
||||
"Requested changes: \n",
|
||||
"Query: I need help understanding the Russian word for 'thank you'.\n",
|
||||
"Request: {\"task_description\": \"understanding the Russian word for 'thank you'\", \"learning_language\": \"Russian\", \"native_language\": \"English\", \"full_query\": \"I need help understanding the Russian word for 'thank you'.\"}\n",
|
||||
"Requested changes: \n",
|
||||
"Query: Can you tell me how to say 'goodbye' in Chinese?\n",
|
||||
"Request: {\"task_description\": \"say goodbye\", \"learning_language\": \"Chinese\", \"native_language\": \"English\", \"full_query\": \"Can you tell me how to say 'goodbye' in Chinese?\"}\n",
|
||||
"Requested changes: \n",
|
||||
"Query: I'm trying to learn the Arabic word for 'please'.\n",
|
||||
"Request: {\"task_description\": \"Learn the Arabic word for 'please'\", \"learning_language\": \"Arabic\", \"native_language\": \"English\", \"full_query\": \"I'm trying to learn the Arabic word for 'please'.\"}\n",
|
||||
"Requested changes: \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ground_truth = []\n",
|
||||
"for query, request_arg in list(zip(queries, request_args)):\n",
|
||||
" feedback = input(f\"Query: {query}\\nRequest: {request_arg}\\nRequested changes: \")\n",
|
||||
" if feedback == 'n' or feedback == 'none' or not feedback:\n",
|
||||
" ground_truth.append(request_arg)\n",
|
||||
" continue\n",
|
||||
" resolved = correction_chain.run(request=request_arg,\n",
|
||||
" user_feedback=feedback)\n",
|
||||
" ground_truth.append(resolved.strip())\n",
|
||||
" print(\"Updated request:\", resolved)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "19d68882",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Now you can use the `ground_truth` as shown above in [Evaluate the Requests Chain](#Evaluate-the-requests-chain)!**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "5a596176",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['{\"task_description\": \"say \\'hello\\'\", \"learning_language\": \"Spanish\", \"native_language\": \"English\", \"full_query\": \"Can you explain how to say \\'hello\\' in Spanish?\"}',\n",
|
||||
" '{\"task_description\": \"understanding the French word for \\'goodbye\\'\", \"learning_language\": \"French\", \"native_language\": \"English\", \"full_query\": \"I need help understanding the French word for \\'goodbye\\'.\"}',\n",
|
||||
" '{\"task_description\": \"say \\'thank you\\'\", \"learning_language\": \"German\", \"native_language\": \"English\", \"full_query\": \"Can you tell me how to say \\'thank you\\' in German?\"}',\n",
|
||||
" '{\"task_description\": \"Learn the Italian word for \\'please\\'\", \"learning_language\": \"Italian\", \"native_language\": \"English\", \"full_query\": \"I\\'m trying to learn the Italian word for \\'please\\'.\"}',\n",
|
||||
" '{\"task_description\": \"Help with pronunciation of \\'yes\\' in Portuguese\", \"learning_language\": \"Portuguese\", \"native_language\": \"English\", \"full_query\": \"Can you help me with the pronunciation of \\'yes\\' in Portuguese?\"}',\n",
|
||||
" '{\"task_description\": \"Find the Dutch word for \\'no\\'\", \"learning_language\": \"Dutch\", \"native_language\": \"English\", \"full_query\": \"I\\'m looking for the Dutch word for \\'no\\'.\"}',\n",
|
||||
" '{\"task_description\": \"Explain the meaning of \\'hello\\' in Japanese\", \"learning_language\": \"Japanese\", \"native_language\": \"English\", \"full_query\": \"Can you explain the meaning of \\'hello\\' in Japanese?\"}',\n",
|
||||
" '{\"task_description\": \"understanding the Russian word for \\'thank you\\'\", \"learning_language\": \"Russian\", \"native_language\": \"English\", \"full_query\": \"I need help understanding the Russian word for \\'thank you\\'.\"}',\n",
|
||||
" '{\"task_description\": \"say goodbye\", \"learning_language\": \"Chinese\", \"native_language\": \"English\", \"full_query\": \"Can you tell me how to say \\'goodbye\\' in Chinese?\"}',\n",
|
||||
" '{\"task_description\": \"Learn the Arabic word for \\'please\\'\", \"learning_language\": \"Arabic\", \"native_language\": \"English\", \"full_query\": \"I\\'m trying to learn the Arabic word for \\'please\\'.\"}']"
|
||||
]
|
||||
},
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Now you have a new ground truth set to use as shown above!\n",
|
||||
"ground_truth"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b7fe9dfa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -130,7 +130,7 @@
|
||||
"source": [
|
||||
"## Evaluation\n",
|
||||
"\n",
|
||||
"We can see that if we tried to just do exact match on the answer answers (`11` and `No`) they would not match what the language model answered. However, semantically the language model is correct in both cases. In order to account for this, we can use a language model itself to evaluate the answers."
|
||||
"We can see that if we tried to just do exact match on the answer answers (`11` and `No`) they would not match what the lanuage model answered. However, semantically the language model is correct in both cases. In order to account for this, we can use a language model itself to evaluate the answers."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -234,93 +234,6 @@
|
||||
"evalchain.evaluate(examples, predictions, question_key=\"question\", answer_key=\"answer\", prediction_key=\"text\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cb1cf335",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Evaluation without Ground Truth\n",
|
||||
"Its possible to evaluate question answering systems without ground truth. You would need a `\"context\"` input that reflects what the information the LLM uses to answer the question. This context can be obtained by any retreival system. Here's an example of how it works:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6c59293f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"context_examples = [\n",
|
||||
" {\n",
|
||||
" \"question\": \"How old am I?\",\n",
|
||||
" \"context\": \"I am 30 years old. I live in New York and take the train to work everyday.\",\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"question\": 'Who won the NFC championship game in 2023?\"',\n",
|
||||
" \"context\": \"NFC Championship Game 2023: Philadelphia Eagles 31, San Francisco 49ers 7\"\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"QA_PROMPT = \"Answer the question based on the context\\nContext:{context}\\nQuestion:{question}\\nAnswer:\"\n",
|
||||
"template = PromptTemplate(input_variables=[\"context\", \"question\"], template=QA_PROMPT)\n",
|
||||
"qa_chain = LLMChain(llm=llm, prompt=template)\n",
|
||||
"predictions = qa_chain.apply(context_examples)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "e500d0cc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'text': 'You are 30 years old.'},\n",
|
||||
" {'text': ' The Philadelphia Eagles won the NFC championship game in 2023.'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"predictions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "6d8cbc1d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.evaluation.qa import ContextQAEvalChain\n",
|
||||
"eval_chain = ContextQAEvalChain.from_llm(llm)\n",
|
||||
"graded_outputs = eval_chain.evaluate(context_examples, predictions, question_key=\"question\", prediction_key=\"text\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "6c5262d0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'text': ' CORRECT'}, {'text': ' CORRECT'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"graded_outputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "aaa61f0c",
|
||||
@@ -416,7 +329,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.9.1"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Personal Assistants (Agents)
|
||||
# Personal Assistants
|
||||
|
||||
> [Conceptual Guide](https://docs.langchain.com/docs/use-cases/personal-assistants)
|
||||
|
||||
@@ -16,10 +16,3 @@ Highlighting specific parts:
|
||||
- [Agent Documentation](../modules/agents.rst) (for interacting with the outside world)
|
||||
- [Index Documentation](../modules/indexes.rst) (for giving them knowledge of your data)
|
||||
- [Memory](../modules/memory.rst) (for helping them remember interactions)
|
||||
|
||||
Specific examples of this include:
|
||||
|
||||
- [Baby AGI](agents/baby_agi.ipynb): a notebook implementing [BabyAGI](https://github.com/yoheinakajima/babyagi) by Yohei Nakajima as LLM Chains
|
||||
- [Baby AGI with Tools](agents/baby_agi_with_agent.ipynb): building off the above notebook, this example substitutes in an agent with tools as the execution tools, allowing it to actually take actions.
|
||||
- [CAMEL](agents/camel_role_playing.ipynb): an implementation of the CAMEL (Communicative Agents for “Mind” Exploration of Large Scale Language Model Society) paper, where two agents communicate with eachother.
|
||||
- [AI Plugins](agents/custom_agent_with_plugin_retrieval.ipynb): an implementation of an agent that is designed to be able to use all AI Plugins.
|
||||
|
||||
@@ -51,8 +51,8 @@ chain.run(input_documents=docs, question=query)
|
||||
```
|
||||
|
||||
The following resources exist:
|
||||
- [Question Answering Notebook](../modules/chains/index_examples/question_answering.ipynb): A notebook walking through how to accomplish this task.
|
||||
- [VectorDB Question Answering Notebook](../modules/chains/index_examples/vector_db_qa.ipynb): A notebook walking through how to do question answering over a vector database. This can often be useful for when you have a LOT of documents, and you don't want to pass them all to the LLM, but rather first want to do some semantic search over embeddings.
|
||||
- [Question Answering Notebook](/modules/indexes/chain_examples/question_answering.ipynb): A notebook walking through how to accomplish this task.
|
||||
- [VectorDB Question Answering Notebook](/modules/indexes/chain_examples/vector_db_qa.ipynb): A notebook walking through how to do question answering over a vector database. This can often be useful for when you have a LOT of documents, and you don't want to pass them all to the LLM, but rather first want to do some semantic search over embeddings.
|
||||
|
||||
## Adding in sources
|
||||
|
||||
@@ -67,8 +67,8 @@ chain({"input_documents": docs, "question": query}, return_only_outputs=True)
|
||||
```
|
||||
|
||||
The following resources exist:
|
||||
- [QA With Sources Notebook](../modules/chains/index_examples/qa_with_sources.ipynb): A notebook walking through how to accomplish this task.
|
||||
- [VectorDB QA With Sources Notebook](../modules/chains/index_examples/vector_db_qa_with_sources.ipynb): A notebook walking through how to do question answering with sources over a vector database. This can often be useful for when you have a LOT of documents, and you don't want to pass them all to the LLM, but rather first want to do some semantic search over embeddings.
|
||||
- [QA With Sources Notebook](/modules/indexes/chain_examples/qa_with_sources.ipynb): A notebook walking through how to accomplish this task.
|
||||
- [VectorDB QA With Sources Notebook](/modules/indexes/chain_examples/vector_db_qa_with_sources.ipynb): A notebook walking through how to do question answering with sources over a vector database. This can often be useful for when you have a LOT of documents, and you don't want to pass them all to the LLM, but rather first want to do some semantic search over embeddings.
|
||||
|
||||
## Additional Related Resources
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
"""Chain that takes in an input and produces an action and action input."""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
@@ -30,7 +29,7 @@ from langchain.schema import (
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utilities.asyncio import asyncio_timeout
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
class BaseSingleActionAgent(BaseModel):
|
||||
@@ -99,16 +98,6 @@ class BaseSingleActionAgent(BaseModel):
|
||||
f"Got unsupported early_stopping_method `{early_stopping_method}`"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_tools(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
tools: Sequence[BaseTool],
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
**kwargs: Any,
|
||||
) -> BaseSingleActionAgent:
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def _agent_type(self) -> str:
|
||||
"""Return Identifier of agent type."""
|
||||
@@ -336,7 +325,6 @@ class Agent(BaseSingleActionAgent):
|
||||
"""
|
||||
|
||||
llm_chain: LLMChain
|
||||
output_parser: AgentOutputParser
|
||||
allowed_tools: Optional[List[str]] = None
|
||||
|
||||
def get_allowed_tools(self) -> Optional[List[str]]:
|
||||
@@ -346,6 +334,10 @@ class Agent(BaseSingleActionAgent):
|
||||
def return_values(self) -> List[str]:
|
||||
return ["output"]
|
||||
|
||||
@abstractmethod
|
||||
def _extract_tool_and_input(self, text: str) -> Optional[Tuple[str, str]]:
|
||||
"""Extract tool and tool input from llm output."""
|
||||
|
||||
def _fix_text(self, text: str) -> str:
|
||||
"""Fix the text."""
|
||||
raise ValueError("fix_text not implemented for this agent.")
|
||||
@@ -367,6 +359,32 @@ class Agent(BaseSingleActionAgent):
|
||||
thoughts += f"\n{self.observation_prefix}{observation}\n{self.llm_prefix}"
|
||||
return thoughts
|
||||
|
||||
def _get_next_action(self, full_inputs: Dict[str, str]) -> AgentAction:
|
||||
full_output = self.llm_chain.predict(**full_inputs)
|
||||
parsed_output = self._extract_tool_and_input(full_output)
|
||||
while parsed_output is None:
|
||||
full_output = self._fix_text(full_output)
|
||||
full_inputs["agent_scratchpad"] += full_output
|
||||
output = self.llm_chain.predict(**full_inputs)
|
||||
full_output += output
|
||||
parsed_output = self._extract_tool_and_input(full_output)
|
||||
return AgentAction(
|
||||
tool=parsed_output[0], tool_input=parsed_output[1], log=full_output
|
||||
)
|
||||
|
||||
async def _aget_next_action(self, full_inputs: Dict[str, str]) -> AgentAction:
|
||||
full_output = await self.llm_chain.apredict(**full_inputs)
|
||||
parsed_output = self._extract_tool_and_input(full_output)
|
||||
while parsed_output is None:
|
||||
full_output = self._fix_text(full_output)
|
||||
full_inputs["agent_scratchpad"] += full_output
|
||||
output = await self.llm_chain.apredict(**full_inputs)
|
||||
full_output += output
|
||||
parsed_output = self._extract_tool_and_input(full_output)
|
||||
return AgentAction(
|
||||
tool=parsed_output[0], tool_input=parsed_output[1], log=full_output
|
||||
)
|
||||
|
||||
def plan(
|
||||
self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any
|
||||
) -> Union[AgentAction, AgentFinish]:
|
||||
@@ -381,8 +399,10 @@ class Agent(BaseSingleActionAgent):
|
||||
Action specifying what tool to use.
|
||||
"""
|
||||
full_inputs = self.get_full_inputs(intermediate_steps, **kwargs)
|
||||
full_output = self.llm_chain.predict(**full_inputs)
|
||||
return self.output_parser.parse(full_output)
|
||||
action = self._get_next_action(full_inputs)
|
||||
if action.tool == self.finish_tool_name:
|
||||
return AgentFinish({"output": action.tool_input}, action.log)
|
||||
return action
|
||||
|
||||
async def aplan(
|
||||
self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any
|
||||
@@ -398,8 +418,10 @@ class Agent(BaseSingleActionAgent):
|
||||
Action specifying what tool to use.
|
||||
"""
|
||||
full_inputs = self.get_full_inputs(intermediate_steps, **kwargs)
|
||||
full_output = await self.llm_chain.apredict(**full_inputs)
|
||||
return self.output_parser.parse(full_output)
|
||||
action = await self._aget_next_action(full_inputs)
|
||||
if action.tool == self.finish_tool_name:
|
||||
return AgentFinish({"output": action.tool_input}, action.log)
|
||||
return action
|
||||
|
||||
def get_full_inputs(
|
||||
self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any
|
||||
@@ -410,6 +432,11 @@ class Agent(BaseSingleActionAgent):
|
||||
full_inputs = {**kwargs, **new_inputs}
|
||||
return full_inputs
|
||||
|
||||
@property
|
||||
def finish_tool_name(self) -> str:
|
||||
"""Name of the tool to use to finish the chain."""
|
||||
return "Final Answer"
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the input keys.
|
||||
@@ -456,18 +483,12 @@ class Agent(BaseSingleActionAgent):
|
||||
"""Validate that appropriate tools are passed in."""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def _get_default_output_parser(cls, **kwargs: Any) -> AgentOutputParser:
|
||||
"""Get default output parser for this class."""
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_tools(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
tools: Sequence[BaseTool],
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
output_parser: Optional[AgentOutputParser] = None,
|
||||
**kwargs: Any,
|
||||
) -> Agent:
|
||||
"""Construct an agent from an LLM and tools."""
|
||||
@@ -478,13 +499,7 @@ class Agent(BaseSingleActionAgent):
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
_output_parser = output_parser or cls._get_default_output_parser()
|
||||
return cls(
|
||||
llm_chain=llm_chain,
|
||||
allowed_tools=tool_names,
|
||||
output_parser=_output_parser,
|
||||
**kwargs,
|
||||
)
|
||||
return cls(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
|
||||
|
||||
def return_stopped_response(
|
||||
self,
|
||||
@@ -514,10 +529,14 @@ class Agent(BaseSingleActionAgent):
|
||||
full_inputs = {**kwargs, **new_inputs}
|
||||
full_output = self.llm_chain.predict(**full_inputs)
|
||||
# We try to extract a final answer
|
||||
parsed_output = self.output_parser.parse(full_output)
|
||||
if isinstance(parsed_output, AgentFinish):
|
||||
parsed_output = self._extract_tool_and_input(full_output)
|
||||
if parsed_output is None:
|
||||
# If we cannot extract, we just return the full output
|
||||
return AgentFinish({"output": full_output}, full_output)
|
||||
tool, tool_input = parsed_output
|
||||
if tool == self.finish_tool_name:
|
||||
# If we can extract, we send the correct stuff
|
||||
return parsed_output
|
||||
return AgentFinish({"output": tool_input}, full_output)
|
||||
else:
|
||||
# If we can extract, but the tool is not the final tool,
|
||||
# we just return the full output
|
||||
@@ -730,10 +749,8 @@ class AgentExecutor(Chain):
|
||||
actions = [output]
|
||||
else:
|
||||
actions = output
|
||||
|
||||
async def _aperform_agent_action(
|
||||
agent_action: AgentAction,
|
||||
) -> Tuple[AgentAction, str]:
|
||||
result = []
|
||||
for agent_action in actions:
|
||||
if self.callback_manager.is_async:
|
||||
await self.callback_manager.on_agent_action(
|
||||
agent_action, verbose=self.verbose, color="green"
|
||||
@@ -765,14 +782,9 @@ class AgentExecutor(Chain):
|
||||
color=None,
|
||||
**tool_run_kwargs,
|
||||
)
|
||||
return agent_action, observation
|
||||
result.append((agent_action, observation))
|
||||
|
||||
# Use asyncio.gather to run multiple tool.arun() calls concurrently
|
||||
result = await asyncio.gather(
|
||||
*[_aperform_agent_action(agent_action) for agent_action in actions]
|
||||
)
|
||||
|
||||
return list(result)
|
||||
return result
|
||||
|
||||
def _call(self, inputs: Dict[str, str]) -> Dict[str, Any]:
|
||||
"""Run text through and get agent response."""
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
from langchain.agents.agent_toolkits.csv.base import create_csv_agent
|
||||
from langchain.agents.agent_toolkits.json.base import create_json_agent
|
||||
from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit
|
||||
from langchain.agents.agent_toolkits.nla.toolkit import NLAToolkit
|
||||
from langchain.agents.agent_toolkits.openapi.base import create_openapi_agent
|
||||
from langchain.agents.agent_toolkits.openapi.toolkit import OpenAPIToolkit
|
||||
from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent
|
||||
@@ -29,7 +28,6 @@ __all__ = [
|
||||
"create_vectorstore_agent",
|
||||
"JsonToolkit",
|
||||
"SQLDatabaseToolkit",
|
||||
"NLAToolkit",
|
||||
"OpenAPIToolkit",
|
||||
"VectorStoreToolkit",
|
||||
"create_vectorstore_router_agent",
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
"""Tool for interacting with a single API with natural language efinition."""
|
||||
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
from langchain.agents.tools import Tool
|
||||
from langchain.chains.api.openapi.chain import OpenAPIEndpointChain
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.requests import Requests
|
||||
from langchain.tools.openapi.utils.api_models import APIOperation
|
||||
from langchain.tools.openapi.utils.openapi_utils import OpenAPISpec
|
||||
|
||||
|
||||
class NLATool(Tool):
|
||||
"""Natural Language API Tool."""
|
||||
|
||||
@classmethod
|
||||
def from_open_api_endpoint_chain(
|
||||
cls, chain: OpenAPIEndpointChain, api_title: str
|
||||
) -> "NLATool":
|
||||
"""Convert an endpoint chain to an API endpoint tool."""
|
||||
expanded_name = (
|
||||
f'{api_title.replace(" ", "_")}.{chain.api_operation.operation_id}'
|
||||
)
|
||||
description = (
|
||||
f"I'm an AI from {api_title}. Instruct what you want,"
|
||||
" and I'll assist via an API with description:"
|
||||
f" {chain.api_operation.description}"
|
||||
)
|
||||
return cls(name=expanded_name, func=chain.run, description=description)
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_method(
|
||||
cls,
|
||||
llm: BaseLLM,
|
||||
path: str,
|
||||
method: str,
|
||||
spec: OpenAPISpec,
|
||||
requests: Optional[Requests] = None,
|
||||
verbose: bool = False,
|
||||
return_intermediate_steps: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> "NLATool":
|
||||
"""Instantiate the tool from the specified path and method."""
|
||||
api_operation = APIOperation.from_openapi_spec(spec, path, method)
|
||||
chain = OpenAPIEndpointChain.from_api_operation(
|
||||
api_operation,
|
||||
llm,
|
||||
requests=requests,
|
||||
verbose=verbose,
|
||||
return_intermediate_steps=return_intermediate_steps,
|
||||
**kwargs,
|
||||
)
|
||||
return cls.from_open_api_endpoint_chain(chain, spec.info.title)
|
||||
@@ -1,116 +0,0 @@
|
||||
"""Toolkit for interacting with API's using natural language."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, List, Optional, Sequence
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from langchain.agents.agent_toolkits.base import BaseToolkit
|
||||
from langchain.agents.agent_toolkits.nla.tool import NLATool
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.requests import Requests
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.tools.openapi.utils.openapi_utils import OpenAPISpec
|
||||
from langchain.tools.plugin import AIPlugin
|
||||
|
||||
|
||||
class NLAToolkit(BaseToolkit):
|
||||
"""Natural Language API Toolkit Definition."""
|
||||
|
||||
nla_tools: Sequence[NLATool] = Field(...)
|
||||
"""List of API Endpoint Tools."""
|
||||
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
"""Get the tools for all the API operations."""
|
||||
return list(self.nla_tools)
|
||||
|
||||
@staticmethod
|
||||
def _get_http_operation_tools(
|
||||
llm: BaseLLM,
|
||||
spec: OpenAPISpec,
|
||||
requests: Optional[Requests] = None,
|
||||
verbose: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> List[NLATool]:
|
||||
"""Get the tools for all the API operations."""
|
||||
if not spec.paths:
|
||||
return []
|
||||
http_operation_tools = []
|
||||
for path in spec.paths:
|
||||
for method in spec.get_methods_for_path(path):
|
||||
endpoint_tool = NLATool.from_llm_and_method(
|
||||
llm=llm,
|
||||
path=path,
|
||||
method=method,
|
||||
spec=spec,
|
||||
requests=requests,
|
||||
verbose=verbose,
|
||||
**kwargs,
|
||||
)
|
||||
http_operation_tools.append(endpoint_tool)
|
||||
return http_operation_tools
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_spec(
|
||||
cls,
|
||||
llm: BaseLLM,
|
||||
spec: OpenAPISpec,
|
||||
requests: Optional[Requests] = None,
|
||||
verbose: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> NLAToolkit:
|
||||
"""Instantiate the toolkit by creating tools for each operation."""
|
||||
http_operation_tools = cls._get_http_operation_tools(
|
||||
llm=llm, spec=spec, requests=requests, verbose=verbose, **kwargs
|
||||
)
|
||||
return cls(nla_tools=http_operation_tools)
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_url(
|
||||
cls,
|
||||
llm: BaseLLM,
|
||||
open_api_url: str,
|
||||
requests: Optional[Requests] = None,
|
||||
verbose: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> NLAToolkit:
|
||||
"""Instantiate the toolkit from an OpenAPI Spec URL"""
|
||||
spec = OpenAPISpec.from_url(open_api_url)
|
||||
return cls.from_llm_and_spec(
|
||||
llm=llm, spec=spec, requests=requests, verbose=verbose, **kwargs
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_ai_plugin(
|
||||
cls,
|
||||
llm: BaseLLM,
|
||||
ai_plugin: AIPlugin,
|
||||
requests: Optional[Requests] = None,
|
||||
verbose: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> NLAToolkit:
|
||||
"""Instantiate the toolkit from an OpenAPI Spec URL"""
|
||||
spec = OpenAPISpec.from_url(ai_plugin.api.url)
|
||||
# TODO: Merge optional Auth information with the `requests` argument
|
||||
return cls.from_llm_and_spec(
|
||||
llm=llm,
|
||||
spec=spec,
|
||||
requests=requests,
|
||||
verbose=verbose,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_ai_plugin_url(
|
||||
cls,
|
||||
llm: BaseLLM,
|
||||
ai_plugin_url: str,
|
||||
requests: Optional[Requests] = None,
|
||||
verbose: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> NLAToolkit:
|
||||
"""Instantiate the toolkit from an OpenAPI Spec URL"""
|
||||
plugin = AIPlugin.from_url(ai_plugin_url)
|
||||
return cls.from_llm_and_ai_plugin(
|
||||
llm=llm, ai_plugin=plugin, requests=requests, verbose=verbose, **kwargs
|
||||
)
|
||||
@@ -22,9 +22,6 @@ def create_openapi_agent(
|
||||
suffix: str = OPENAPI_SUFFIX,
|
||||
format_instructions: str = FORMAT_INSTRUCTIONS,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
max_iterations: Optional[int] = 15,
|
||||
max_execution_time: Optional[float] = None,
|
||||
early_stopping_method: str = "force",
|
||||
verbose: bool = False,
|
||||
return_intermediate_steps: bool = False,
|
||||
**kwargs: Any,
|
||||
@@ -50,7 +47,4 @@ def create_openapi_agent(
|
||||
tools=toolkit.get_tools(),
|
||||
verbose=verbose,
|
||||
return_intermediate_steps=return_intermediate_steps,
|
||||
max_iterations=max_iterations,
|
||||
max_execution_time=max_execution_time,
|
||||
early_stopping_method=early_stopping_method,
|
||||
)
|
||||
|
||||
@@ -14,13 +14,9 @@ from langchain.agents.agent_toolkits.openapi.planner_prompt import (
|
||||
API_PLANNER_PROMPT,
|
||||
API_PLANNER_TOOL_DESCRIPTION,
|
||||
API_PLANNER_TOOL_NAME,
|
||||
PARSING_DELETE_PROMPT,
|
||||
PARSING_GET_PROMPT,
|
||||
PARSING_PATCH_PROMPT,
|
||||
PARSING_POST_PROMPT,
|
||||
REQUESTS_DELETE_TOOL_DESCRIPTION,
|
||||
REQUESTS_GET_TOOL_DESCRIPTION,
|
||||
REQUESTS_PATCH_TOOL_DESCRIPTION,
|
||||
REQUESTS_POST_TOOL_DESCRIPTION,
|
||||
)
|
||||
from langchain.agents.agent_toolkits.openapi.spec import ReducedOpenAPISpec
|
||||
@@ -28,7 +24,6 @@ from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.agents.tools import Tool
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.llms.openai import OpenAI
|
||||
from langchain.memory import ReadOnlySharedMemory
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.requests import RequestsWrapper
|
||||
from langchain.schema import BaseLanguageModel
|
||||
@@ -58,8 +53,7 @@ class RequestsGetToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
data = json.loads(text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise e
|
||||
data_params = data.get("params")
|
||||
response = self.requests_wrapper.get(data["url"], params=data_params)
|
||||
response = self.requests_wrapper.get(data["url"])
|
||||
response = response[: self.response_length]
|
||||
return self.llm_chain.predict(
|
||||
response=response, instructions=data["output_instructions"]
|
||||
@@ -94,56 +88,6 @@ class RequestsPostToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class RequestsPatchToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
name = "requests_patch"
|
||||
description = REQUESTS_PATCH_TOOL_DESCRIPTION
|
||||
|
||||
response_length: Optional[int] = MAX_RESPONSE_LENGTH
|
||||
llm_chain = LLMChain(
|
||||
llm=OpenAI(),
|
||||
prompt=PARSING_PATCH_PROMPT,
|
||||
)
|
||||
|
||||
def _run(self, text: str) -> str:
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise e
|
||||
response = self.requests_wrapper.patch(data["url"], data["data"])
|
||||
response = response[: self.response_length]
|
||||
return self.llm_chain.predict(
|
||||
response=response, instructions=data["output_instructions"]
|
||||
).strip()
|
||||
|
||||
async def _arun(self, text: str) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class RequestsDeleteToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
name = "requests_delete"
|
||||
description = REQUESTS_DELETE_TOOL_DESCRIPTION
|
||||
|
||||
response_length: Optional[int] = MAX_RESPONSE_LENGTH
|
||||
llm_chain = LLMChain(
|
||||
llm=OpenAI(),
|
||||
prompt=PARSING_DELETE_PROMPT,
|
||||
)
|
||||
|
||||
def _run(self, text: str) -> str:
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise e
|
||||
response = self.requests_wrapper.delete(data["url"])
|
||||
response = response[: self.response_length]
|
||||
return self.llm_chain.predict(
|
||||
response=response, instructions=data["output_instructions"]
|
||||
).strip()
|
||||
|
||||
async def _arun(self, text: str) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
#
|
||||
# Orchestrator, planner, controller.
|
||||
#
|
||||
@@ -211,7 +155,7 @@ def _create_api_controller_tool(
|
||||
base_url = api_spec.servers[0]["url"] # TODO: do better.
|
||||
|
||||
def _create_and_run_api_controller_agent(plan_str: str) -> str:
|
||||
pattern = r"\b(GET|POST|PATCH|DELETE)\s+(/\S+)*"
|
||||
pattern = r"\b(GET|POST)\s+(/\S+)*"
|
||||
matches = re.findall(pattern, plan_str)
|
||||
endpoint_names = [
|
||||
"{method} {route}".format(method=method, route=route.split("?")[0])
|
||||
@@ -239,8 +183,6 @@ def create_openapi_agent(
|
||||
api_spec: ReducedOpenAPISpec,
|
||||
requests_wrapper: RequestsWrapper,
|
||||
llm: BaseLanguageModel,
|
||||
shared_memory: Optional[ReadOnlySharedMemory] = None,
|
||||
verbose: bool = True,
|
||||
) -> AgentExecutor:
|
||||
"""Instantiate API planner and controller for a given spec.
|
||||
|
||||
@@ -265,7 +207,7 @@ def create_openapi_agent(
|
||||
},
|
||||
)
|
||||
agent = ZeroShotAgent(
|
||||
llm_chain=LLMChain(llm=llm, prompt=prompt, memory=shared_memory),
|
||||
llm_chain=LLMChain(llm=llm, prompt=prompt),
|
||||
allowed_tools=[tool.name for tool in tools],
|
||||
)
|
||||
return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=verbose)
|
||||
return AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user