mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-04 16:20:16 +00:00
Compare commits
124 Commits
vwp/openap
...
harrison/a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f8bd49021e | ||
|
|
5af1da7b38 | ||
|
|
d3c92ed203 | ||
|
|
e438969ab7 | ||
|
|
e12e00df12 | ||
|
|
8b9e02da9d | ||
|
|
4c02f4bc30 | ||
|
|
7302787a7b | ||
|
|
69698be3e6 | ||
|
|
32db2a2c2f | ||
|
|
1e655d5ffd | ||
|
|
88d3ce12b8 | ||
|
|
db0a9c14cf | ||
|
|
21a1ac36b5 | ||
|
|
5ca7ce77cd | ||
|
|
2a0f65f7af | ||
|
|
aead062a70 | ||
|
|
51894ddd98 | ||
|
|
706ebd8f9c | ||
|
|
9a03f00e6c | ||
|
|
9d8ab28837 | ||
|
|
4ffc58e07b | ||
|
|
57f4309fa8 | ||
|
|
b9db20481f | ||
|
|
fea5619ce9 | ||
|
|
f7bf917baf | ||
|
|
94c83fa5d1 | ||
|
|
961ce77f8d | ||
|
|
b634489b2e | ||
|
|
274b25c010 | ||
|
|
baf350e32b | ||
|
|
a38c992703 | ||
|
|
36aa7f30e4 | ||
|
|
7c73e9df5d | ||
|
|
b3a5b51728 | ||
|
|
c4ae8c1d24 | ||
|
|
ad3973a3b8 | ||
|
|
cf2789d86d | ||
|
|
0aa828b1dc | ||
|
|
3564568b4a | ||
|
|
ec59e9d886 | ||
|
|
13a0ed064b | ||
|
|
9860c09fa2 | ||
|
|
392f1b3218 | ||
|
|
66bef1d7ed | ||
|
|
7ee87eb0c8 | ||
|
|
634358db5e | ||
|
|
30573b2e30 | ||
|
|
a508afa91c | ||
|
|
7e525a3b91 | ||
|
|
ccacf804a8 | ||
|
|
86189cdcf9 | ||
|
|
8fef69296d | ||
|
|
0a38bbc750 | ||
|
|
203c0eb2ae | ||
|
|
1a44b71ddf | ||
|
|
3c7204d604 | ||
|
|
1e9378d0a8 | ||
|
|
07d7096de6 | ||
|
|
5565f56273 | ||
|
|
9907cb0485 | ||
|
|
1cc7ea333c | ||
|
|
705596b46a | ||
|
|
8a98e5b50b | ||
|
|
dcb17503f2 | ||
|
|
74abeb8c53 | ||
|
|
0226b375d9 | ||
|
|
04c458a270 | ||
|
|
016738e676 | ||
|
|
8cfec2c5fe | ||
|
|
bf0887c486 | ||
|
|
ed2ef5cbe4 | ||
|
|
6be5d7c612 | ||
|
|
c26a259ba6 | ||
|
|
f3180f05f9 | ||
|
|
ecc1a0c051 | ||
|
|
70ffe470aa | ||
|
|
be4fb24b32 | ||
|
|
82d1d5f24e | ||
|
|
53dc157145 | ||
|
|
1609950597 | ||
|
|
7688bf9182 | ||
|
|
2db9b7a45d | ||
|
|
802363eb6a | ||
|
|
2a89dc8c1c | ||
|
|
a6f767ae7a | ||
|
|
4f231b46ee | ||
|
|
414dc803b6 | ||
|
|
61858c5a08 | ||
|
|
9a96691803 | ||
|
|
324e9c83d5 | ||
|
|
ed03e965de | ||
|
|
64596b23b9 | ||
|
|
1bb0706955 | ||
|
|
b2bc5ef56a | ||
|
|
abfca72c0b | ||
|
|
f0be3b0689 | ||
|
|
e081c62aac | ||
|
|
a094b7f807 | ||
|
|
1c7fb31bba | ||
|
|
0e763677e4 | ||
|
|
e49f1e628c | ||
|
|
425c437cd3 | ||
|
|
a2d729e537 | ||
|
|
7adbc4fbb4 | ||
|
|
1bea9ea4be | ||
|
|
819d72614a | ||
|
|
fa0c9390c2 | ||
|
|
59d054308c | ||
|
|
789cc314c5 | ||
|
|
b92a89e29f | ||
|
|
94a92abf24 | ||
|
|
b5bbe601fb | ||
|
|
b38a6ea7df | ||
|
|
dd59193757 | ||
|
|
933dfac583 | ||
|
|
507cee5ee5 | ||
|
|
744c25cd0a | ||
|
|
0ab364404e | ||
|
|
4bdcedab54 | ||
|
|
c1521ddbdb | ||
|
|
0806951c07 | ||
|
|
446c3d586c | ||
|
|
8073bc849f |
@@ -2,7 +2,7 @@
|
||||
|
||||
⚡ Building applications with LLMs through composability ⚡
|
||||
|
||||
[](https://github.com/hwchase17/langchain/actions/workflows/lint.yml) [](https://github.com/hwchase17/langchain/actions/workflows/test.yml) [](https://github.com/hwchase17/langchain/actions/workflows/linkcheck.yml) [](https://opensource.org/licenses/MIT) [](https://twitter.com/langchainai) [](https://discord.gg/6adMQxSpJS)
|
||||
[](https://github.com/hwchase17/langchain/actions/workflows/lint.yml) [](https://github.com/hwchase17/langchain/actions/workflows/test.yml) [](https://github.com/hwchase17/langchain/actions/workflows/linkcheck.yml) [](https://pepy.tech/project/langchain) [](https://opensource.org/licenses/MIT) [](https://twitter.com/langchainai) [](https://discord.gg/6adMQxSpJS)
|
||||
|
||||
**Production Support:** As you move your LangChains into production, we'd love to offer more comprehensive support.
|
||||
Please fill out [this form](https://forms.gle/57d8AmXBYp8PP8tZA) and we'll set up a dedicated support Slack channel.
|
||||
|
||||
4
docs/_static/css/custom.css
vendored
4
docs/_static/css/custom.css
vendored
@@ -11,3 +11,7 @@ pre {
|
||||
max-width: 2560px !important;
|
||||
}
|
||||
}
|
||||
|
||||
#my-component-root *, #headlessui-portal-root * {
|
||||
z-index: 1000000000000;
|
||||
}
|
||||
|
||||
58
docs/_static/js/mendablesearch.js
vendored
Normal file
58
docs/_static/js/mendablesearch.js
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
// Load the external dependencies
|
||||
function loadScript(src, onLoadCallback) {
|
||||
const script = document.createElement('script');
|
||||
script.src = src;
|
||||
script.onload = onLoadCallback;
|
||||
document.head.appendChild(script);
|
||||
}
|
||||
|
||||
function createRootElement() {
|
||||
const rootElement = document.createElement('div');
|
||||
rootElement.id = 'my-component-root';
|
||||
document.body.appendChild(rootElement);
|
||||
return rootElement;
|
||||
}
|
||||
|
||||
|
||||
|
||||
function initializeMendable() {
|
||||
const rootElement = createRootElement();
|
||||
const { MendableFloatingButton } = Mendable;
|
||||
|
||||
|
||||
const iconSpan1 = React.createElement('span', {
|
||||
}, '🦜');
|
||||
|
||||
const iconSpan2 = React.createElement('span', {
|
||||
}, '🔗');
|
||||
|
||||
const icon = React.createElement('p', {
|
||||
style: { color: '#ffffff', fontSize: '22px',width: '48px', height: '48px', margin: '0px', padding: '0px', display: 'flex', alignItems: 'center', justifyContent: 'center', textAlign: 'center' },
|
||||
}, [iconSpan1, iconSpan2]);
|
||||
|
||||
|
||||
|
||||
|
||||
const mendableFloatingButton = React.createElement(
|
||||
MendableFloatingButton,
|
||||
{
|
||||
style: { darkMode: false, accentColor: '#010810' },
|
||||
floatingButtonStyle: { color: '#ffffff', backgroundColor: '#010810' },
|
||||
anon_key: '82842b36-3ea6-49b2-9fb8-52cfc4bde6bf', // Mendable Search Public ANON key, ok to be public
|
||||
messageSettings: {
|
||||
openSourcesInNewTab: false,
|
||||
},
|
||||
icon: icon,
|
||||
}
|
||||
);
|
||||
|
||||
ReactDOM.render(mendableFloatingButton, rootElement);
|
||||
}
|
||||
|
||||
loadScript('https://unpkg.com/react@17/umd/react.production.min.js', () => {
|
||||
loadScript('https://unpkg.com/react-dom@17/umd/react-dom.production.min.js', () => {
|
||||
loadScript('https://unpkg.com/@mendable/search@0.0.83/dist/umd/mendable.min.js', initializeMendable);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -103,5 +103,10 @@ html_static_path = ["_static"]
|
||||
html_css_files = [
|
||||
"css/custom.css",
|
||||
]
|
||||
|
||||
html_js_files = [
|
||||
"js/mendablesearch.js",
|
||||
]
|
||||
|
||||
nb_execution_mode = "off"
|
||||
myst_enable_extensions = ["colon_fence"]
|
||||
|
||||
@@ -33,12 +33,17 @@ It implements a Question Answering app and contains instructions for deploying t
|
||||
|
||||
A minimal example on how to run LangChain on Vercel using Flask.
|
||||
|
||||
## [Digitalocean App Platform](https://github.com/homanp/digitalocean-langchain)
|
||||
|
||||
A minimal example on how to deploy LangChain to DigitalOcean App Platform.
|
||||
|
||||
## [SteamShip](https://github.com/steamship-core/steamship-langchain/)
|
||||
|
||||
This repository contains LangChain adapters for Steamship, enabling LangChain developers to rapidly deploy their apps on Steamship.
|
||||
This includes: production ready endpoints, horizontal scaling across dependencies, persistant storage of app state, multi-tenancy support, etc.
|
||||
|
||||
## [Langchain-serve](https://github.com/jina-ai/langchain-serve)
|
||||
|
||||
This repository allows users to serve local chains and agents as RESTful, gRPC, or Websocket APIs thanks to [Jina](https://docs.jina.ai/). Deploy your chains & agents with ease and enjoy independent scaling, serverless and autoscaling APIs, as well as a Streamlit playground on Jina AI Cloud.
|
||||
|
||||
## [BentoML](https://github.com/ssheng/BentoChain)
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Getting API Credentials\n",
|
||||
"## Getting API Credentials\n",
|
||||
"\n",
|
||||
"We'll be using quite some APIs in this notebook, here is a list and where to get them:\n",
|
||||
"\n",
|
||||
@@ -47,7 +47,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setting Up"
|
||||
"## Setting Up"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -103,7 +103,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Scenario 1: Just an LLM\n",
|
||||
"## Scenario 1: Just an LLM\n",
|
||||
"\n",
|
||||
"First, let's just run a single LLM a few times and capture the resulting prompt-answer conversation in ClearML"
|
||||
]
|
||||
@@ -361,7 +361,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Scenario 2: Creating a agent with tools\n",
|
||||
"## Scenario 2: Creating an agent with tools\n",
|
||||
"\n",
|
||||
"To show a more advanced workflow, let's create an agent with access to tools. The way ClearML tracks the results is not different though, only the table will look slightly different as there are other types of actions taken when compared to the earlier, simpler example.\n",
|
||||
"\n",
|
||||
@@ -542,7 +542,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tips and Next Steps\n",
|
||||
"## Tips and Next Steps\n",
|
||||
"\n",
|
||||
"- Make sure you always use a unique `name` argument for the `clearml_callback.flush_tracker` function. If not, the model parameters used for a run will override the previous run!\n",
|
||||
"\n",
|
||||
|
||||
352
docs/ecosystem/comet_tracking.ipynb
Normal file
352
docs/ecosystem/comet_tracking.ipynb
Normal file
@@ -0,0 +1,352 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Comet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this guide we will demonstrate how to track your Langchain Experiments, Evaluation Metrics, and LLM Sessions with [Comet](https://www.comet.com/site/?utm_source=langchain&utm_medium=referral&utm_campaign=comet_notebook). \n",
|
||||
"\n",
|
||||
"<a target=\"_blank\" href=\"https://colab.research.google.com/github/hwchase17/langchain/blob/master/docs/ecosystem/comet_tracking.ipynb\">\n",
|
||||
" <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
|
||||
"</a>\n",
|
||||
"\n",
|
||||
"**Example Project:** [Comet with LangChain](https://www.comet.com/examples/comet-example-langchain/view/b5ZThK6OFdhKWVSP3fDfRtrNF/panels?utm_source=langchain&utm_medium=referral&utm_campaign=comet_notebook)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<img width=\"1280\" alt=\"comet-langchain\" src=\"https://user-images.githubusercontent.com/7529846/230326720-a9711435-9c6f-4edb-a707-94b67271ab25.png\">\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Install Comet and Dependencies"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install comet_ml langchain openai google-search-results spacy textstat pandas\n",
|
||||
"\n",
|
||||
"import sys\n",
|
||||
"!{sys.executable} -m spacy download en_core_web_sm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialize Comet and Set your Credentials"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can grab your [Comet API Key here](https://www.comet.com/signup?utm_source=langchain&utm_medium=referral&utm_campaign=comet_notebook) or click the link after intializing Comet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import comet_ml\n",
|
||||
"\n",
|
||||
"comet_ml.init(project_name=\"comet-example-langchain\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Set OpenAI and SerpAPI credentials"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You will need an [OpenAI API Key](https://platform.openai.com/account/api-keys) and a [SerpAPI API Key](https://serpapi.com/dashboard) to run the following examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"...\"\n",
|
||||
"#os.environ[\"OPENAI_ORGANIZATION\"] = \"...\"\n",
|
||||
"os.environ[\"SERPAPI_API_KEY\"] = \"...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Scenario 1: Using just an LLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"\n",
|
||||
"from langchain.callbacks import CometCallbackHandler, StdOutCallbackHandler\n",
|
||||
"from langchain.callbacks.base import CallbackManager\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"\n",
|
||||
"comet_callback = CometCallbackHandler(\n",
|
||||
" project_name=\"comet-example-langchain\",\n",
|
||||
" complexity_metrics=True,\n",
|
||||
" stream_logs=True,\n",
|
||||
" tags=[\"llm\"],\n",
|
||||
" visualizations=[\"dep\"],\n",
|
||||
")\n",
|
||||
"manager = CallbackManager([StdOutCallbackHandler(), comet_callback])\n",
|
||||
"llm = OpenAI(temperature=0.9, callback_manager=manager, verbose=True)\n",
|
||||
"\n",
|
||||
"llm_result = llm.generate([\"Tell me a joke\", \"Tell me a poem\", \"Tell me a fact\"] * 3)\n",
|
||||
"print(\"LLM result\", llm_result)\n",
|
||||
"comet_callback.flush_tracker(llm, finish=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Scenario 2: Using an LLM in a Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks import CometCallbackHandler, StdOutCallbackHandler\n",
|
||||
"from langchain.callbacks.base import CallbackManager\n",
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"comet_callback = CometCallbackHandler(\n",
|
||||
" complexity_metrics=True,\n",
|
||||
" project_name=\"comet-example-langchain\",\n",
|
||||
" stream_logs=True,\n",
|
||||
" tags=[\"synopsis-chain\"],\n",
|
||||
")\n",
|
||||
"manager = CallbackManager([StdOutCallbackHandler(), comet_callback])\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0.9, callback_manager=manager, verbose=True)\n",
|
||||
"\n",
|
||||
"template = \"\"\"You are a playwright. Given the title of play, it is your job to write a synopsis for that title.\n",
|
||||
"Title: {title}\n",
|
||||
"Playwright: This is a synopsis for the above play:\"\"\"\n",
|
||||
"prompt_template = PromptTemplate(input_variables=[\"title\"], template=template)\n",
|
||||
"synopsis_chain = LLMChain(llm=llm, prompt=prompt_template, callback_manager=manager)\n",
|
||||
"\n",
|
||||
"test_prompts = [{\"title\": \"Documentary about Bigfoot in Paris\"}]\n",
|
||||
"print(synopsis_chain.apply(test_prompts))\n",
|
||||
"comet_callback.flush_tracker(synopsis_chain, finish=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Scenario 3: Using An Agent with Tools "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import initialize_agent, load_tools\n",
|
||||
"from langchain.callbacks import CometCallbackHandler, StdOutCallbackHandler\n",
|
||||
"from langchain.callbacks.base import CallbackManager\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"\n",
|
||||
"comet_callback = CometCallbackHandler(\n",
|
||||
" project_name=\"comet-example-langchain\",\n",
|
||||
" complexity_metrics=True,\n",
|
||||
" stream_logs=True,\n",
|
||||
" tags=[\"agent\"],\n",
|
||||
")\n",
|
||||
"manager = CallbackManager([StdOutCallbackHandler(), comet_callback])\n",
|
||||
"llm = OpenAI(temperature=0.9, callback_manager=manager, verbose=True)\n",
|
||||
"\n",
|
||||
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm, callback_manager=manager)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
" agent=\"zero-shot-react-description\",\n",
|
||||
" callback_manager=manager,\n",
|
||||
" verbose=True,\n",
|
||||
")\n",
|
||||
"agent.run(\n",
|
||||
" \"Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\"\n",
|
||||
")\n",
|
||||
"comet_callback.flush_tracker(agent, finish=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Scenario 4: Using Custom Evaluation Metrics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The `CometCallbackManager` also allows you to define and use Custom Evaluation Metrics to assess generated outputs from your model. Let's take a look at how this works. \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"In the snippet below, we will use the [ROUGE](https://huggingface.co/spaces/evaluate-metric/rouge) metric to evaluate the quality of a generated summary of an input prompt. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install rouge-score"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from rouge_score import rouge_scorer\n",
|
||||
"\n",
|
||||
"from langchain.callbacks import CometCallbackHandler, StdOutCallbackHandler\n",
|
||||
"from langchain.callbacks.base import CallbackManager\n",
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Rouge:\n",
|
||||
" def __init__(self, reference):\n",
|
||||
" self.reference = reference\n",
|
||||
" self.scorer = rouge_scorer.RougeScorer([\"rougeLsum\"], use_stemmer=True)\n",
|
||||
"\n",
|
||||
" def compute_metric(self, generation, prompt_idx, gen_idx):\n",
|
||||
" prediction = generation.text\n",
|
||||
" results = self.scorer.score(target=self.reference, prediction=prediction)\n",
|
||||
"\n",
|
||||
" return {\n",
|
||||
" \"rougeLsum_score\": results[\"rougeLsum\"].fmeasure,\n",
|
||||
" \"reference\": self.reference,\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"reference = \"\"\"\n",
|
||||
"The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building.\n",
|
||||
"It was the first structure to reach a height of 300 metres.\n",
|
||||
"\n",
|
||||
"It is now taller than the Chrysler Building in New York City by 5.2 metres (17 ft)\n",
|
||||
"Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France .\n",
|
||||
"\"\"\"\n",
|
||||
"rouge_score = Rouge(reference=reference)\n",
|
||||
"\n",
|
||||
"template = \"\"\"Given the following article, it is your job to write a summary.\n",
|
||||
"Article:\n",
|
||||
"{article}\n",
|
||||
"Summary: This is the summary for the above article:\"\"\"\n",
|
||||
"prompt_template = PromptTemplate(input_variables=[\"article\"], template=template)\n",
|
||||
"\n",
|
||||
"comet_callback = CometCallbackHandler(\n",
|
||||
" project_name=\"comet-example-langchain\",\n",
|
||||
" complexity_metrics=False,\n",
|
||||
" stream_logs=True,\n",
|
||||
" tags=[\"custom_metrics\"],\n",
|
||||
" custom_metrics=rouge_score.compute_metric,\n",
|
||||
")\n",
|
||||
"manager = CallbackManager([StdOutCallbackHandler(), comet_callback])\n",
|
||||
"llm = OpenAI(temperature=0.9, callback_manager=manager, verbose=True)\n",
|
||||
"\n",
|
||||
"synopsis_chain = LLMChain(llm=llm, prompt=prompt_template, callback_manager=manager)\n",
|
||||
"\n",
|
||||
"test_prompts = [\n",
|
||||
" {\n",
|
||||
" \"article\": \"\"\"\n",
|
||||
" The tower is 324 metres (1,063 ft) tall, about the same height as\n",
|
||||
" an 81-storey building, and the tallest structure in Paris. Its base is square,\n",
|
||||
" measuring 125 metres (410 ft) on each side.\n",
|
||||
" During its construction, the Eiffel Tower surpassed the\n",
|
||||
" Washington Monument to become the tallest man-made structure in the world,\n",
|
||||
" a title it held for 41 years until the Chrysler Building\n",
|
||||
" in New York City was finished in 1930.\n",
|
||||
"\n",
|
||||
" It was the first structure to reach a height of 300 metres.\n",
|
||||
" Due to the addition of a broadcasting aerial at the top of the tower in 1957,\n",
|
||||
" it is now taller than the Chrysler Building by 5.2 metres (17 ft).\n",
|
||||
"\n",
|
||||
" Excluding transmitters, the Eiffel Tower is the second tallest\n",
|
||||
" free-standing structure in France after the Millau Viaduct.\n",
|
||||
" \"\"\"\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"print(synopsis_chain.apply(test_prompts))\n",
|
||||
"comet_callback.flush_tracker(synopsis_chain, finish=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -36,7 +36,7 @@ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
||||
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
||||
model = GPT4All(model="./models/gpt4all-model.bin", n_ctx=512, n_threads=8, callback_handler=callback_handler, verbose=True)
|
||||
|
||||
# Generate text. Tokens are streamed throught the callback manager.
|
||||
# Generate text. Tokens are streamed through the callback manager.
|
||||
model("Once upon a time, ")
|
||||
```
|
||||
|
||||
@@ -44,4 +44,4 @@ model("Once upon a time, ")
|
||||
|
||||
You can find links to model file downloads in the [pyllamacpp](https://github.com/nomic-ai/pyllamacpp) repository.
|
||||
|
||||
For a more detailed walkthrough of this, see [this notebook](../modules/models/llms/integrations/gpt4all.ipynb)
|
||||
For a more detailed walkthrough of this, see [this notebook](../modules/models/llms/integrations/gpt4all.ipynb)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
LangChain Gallery
|
||||
=============
|
||||
=================
|
||||
|
||||
Lots of people have built some pretty awesome stuff with LangChain.
|
||||
This is a collection of our favorites.
|
||||
@@ -223,7 +223,7 @@ Open Source
|
||||
Answer questions about the documentation of any project
|
||||
|
||||
Misc. Colab Notebooks
|
||||
~~~~~~~~~~~~~~~
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. panels::
|
||||
:body: text-center
|
||||
|
||||
@@ -77,7 +77,7 @@
|
||||
" Returns:\n",
|
||||
" Action specifying what tool to use.\n",
|
||||
" \"\"\"\n",
|
||||
" return AgentAction(tool=\"Search\", tool_input=\"foo\", log=\"\")\n",
|
||||
" return AgentAction(tool=\"Search\", tool_input=kwargs[\"input\"], log=\"\")\n",
|
||||
"\n",
|
||||
" async def aplan(\n",
|
||||
" self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any\n",
|
||||
@@ -92,7 +92,7 @@
|
||||
" Returns:\n",
|
||||
" Action specifying what tool to use.\n",
|
||||
" \"\"\"\n",
|
||||
" return AgentAction(tool=\"Search\", tool_input=\"foo\", log=\"\")"
|
||||
" return AgentAction(tool=\"Search\", tool_input=kwargs[\"input\"], log=\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -315,7 +315,7 @@
|
||||
" log=llm_output,\n",
|
||||
" )\n",
|
||||
" # Parse out the action and action input\n",
|
||||
" regex = r\"Action: (.*?)[\\n]*Action Input:[\\s]*(.*)\"\n",
|
||||
" regex = r\"Action\\s*\\d*\\s*:(.*?)\\nAction\\s*\\d*\\s*Input\\s*\\d*\\s*:[\\s]*(.*)\"\n",
|
||||
" match = re.search(regex, llm_output, re.DOTALL)\n",
|
||||
" if not match:\n",
|
||||
" raise ValueError(f\"Could not parse LLM output: `{llm_output}`\")\n",
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 1,
|
||||
"id": "9af9734e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -67,7 +67,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 2,
|
||||
"id": "becda2a1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -99,7 +99,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 3,
|
||||
"id": "339b1bb8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -128,7 +128,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 4,
|
||||
"id": "fd969d31",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -159,7 +159,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 5,
|
||||
"id": "798ef9fb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -187,7 +187,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 6,
|
||||
"id": "7c6fe0d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -204,7 +204,7 @@
|
||||
" log=llm_output,\n",
|
||||
" )\n",
|
||||
" # Parse out the action and action input\n",
|
||||
" regex = r\"Action: (.*?)[\\n]*Action Input:[\\s]*(.*)\"\n",
|
||||
" regex = r\"Action\\s*\\d*\\s*:(.*?)\\nAction\\s*\\d*\\s*Input\\s*\\d*\\s*:[\\s]*(.*)\"\n",
|
||||
" match = re.search(regex, llm_output, re.DOTALL)\n",
|
||||
" if not match:\n",
|
||||
" raise ValueError(f\"Could not parse LLM output: `{llm_output}`\")\n",
|
||||
@@ -216,7 +216,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 7,
|
||||
"id": "d278706a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -236,7 +236,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 8,
|
||||
"id": "f9d4c374",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -268,7 +268,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 9,
|
||||
"id": "9b1cc2a2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -279,7 +279,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 10,
|
||||
"id": "e4f5092f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -305,7 +305,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 11,
|
||||
"id": "490604e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -315,7 +315,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 12,
|
||||
"id": "653b1617",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -326,11 +326,12 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction: Search\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out the population of Canada in 2023\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Population of Canada in 2023\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3m38,648,380\u001b[0m\u001b[32;1m\u001b[1;3m That's a lot of people!\n",
|
||||
"Final Answer: Arrr, there be 38,648,380 people livin' in Canada come 2023!\u001b[0m\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3mThe current population of Canada is 38,658,314 as of Wednesday, April 12, 2023, based on Worldometer elaboration of the latest United Nations data.\u001b[0m\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Arrr, there be 38,658,314 people livin' in Canada as of 2023!\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -338,10 +339,165 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Arrr, there be 38,648,380 people livin' in Canada come 2023!\""
|
||||
"\"Arrr, there be 38,658,314 people livin' in Canada as of 2023!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"How many people live in canada as of 2023?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d5b4a078",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Adding Memory\n",
|
||||
"\n",
|
||||
"If you want to add memory to the agent, you'll need to:\n",
|
||||
"\n",
|
||||
"1. Add a place in the custom prompt for the chat_history\n",
|
||||
"2. Add a memory object to the agent executor."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "94fffda1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set up the base template\n",
|
||||
"template_with_history = \"\"\"Answer the following questions as best you can, but speaking as a pirate might speak. You have access to the following tools:\n",
|
||||
"\n",
|
||||
"{tools}\n",
|
||||
"\n",
|
||||
"Use the following format:\n",
|
||||
"\n",
|
||||
"Question: the input question you must answer\n",
|
||||
"Thought: you should always think about what to do\n",
|
||||
"Action: the action to take, should be one of [{tool_names}]\n",
|
||||
"Action Input: the input to the action\n",
|
||||
"Observation: the result of the action\n",
|
||||
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
|
||||
"Thought: I now know the final answer\n",
|
||||
"Final Answer: the final answer to the original input question\n",
|
||||
"\n",
|
||||
"Begin! Remember to speak as a pirate when giving your final answer. Use lots of \"Arg\"s\n",
|
||||
"\n",
|
||||
"Previous conversation history:\n",
|
||||
"{history}\n",
|
||||
"\n",
|
||||
"New question: {input}\n",
|
||||
"{agent_scratchpad}\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "f58488d7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt_with_history = CustomPromptTemplate(\n",
|
||||
" template=template_with_history,\n",
|
||||
" tools=tools,\n",
|
||||
" # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically\n",
|
||||
" # This includes the `intermediate_steps` variable because that is needed\n",
|
||||
" input_variables=[\"input\", \"intermediate_steps\", \"history\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "d28d4b5a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_chain = LLMChain(llm=llm, prompt=prompt_with_history)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "3e37b32a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tool_names = [tool.name for tool in tools]\n",
|
||||
"agent = LLMSingleActionAgent(\n",
|
||||
" llm_chain=llm_chain, \n",
|
||||
" output_parser=output_parser,\n",
|
||||
" stop=[\"\\nObservation:\"], \n",
|
||||
" allowed_tools=tool_names\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "97ea1bce",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import ConversationBufferWindowMemory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"id": "b5ad69ce",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"memory=ConversationBufferWindowMemory(k=2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"id": "b7b5c9b1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"id": "5ec4c39b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out the population of Canada in 2023\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Population of Canada in 2023\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3mThe current population of Canada is 38,658,314 as of Wednesday, April 12, 2023, based on Worldometer elaboration of the latest United Nations data.\u001b[0m\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Arrr, there be 38,658,314 people livin' in Canada as of 2023!\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Arrr, there be 38,658,314 people livin' in Canada as of 2023!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -350,10 +506,48 @@
|
||||
"agent_executor.run(\"How many people live in canada as of 2023?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"id": "b2ba45bb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out how many people live in Mexico.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: How many people live in Mexico as of 2023?\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3mThe current population of Mexico is 132,679,922 as of Tuesday, April 11, 2023, based on Worldometer elaboration of the latest United Nations data. Mexico 2020 ...\u001b[0m\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: Arrr, there be 132,679,922 people livin' in Mexico as of 2023!\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Arrr, there be 132,679,922 people livin' in Mexico as of 2023!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 45,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"how about in mexico?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "adefb4c2",
|
||||
"id": "bd820a7a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
|
||||
@@ -206,7 +206,7 @@
|
||||
" log=llm_output,\n",
|
||||
" )\n",
|
||||
" # Parse out the action and action input\n",
|
||||
" regex = r\"Action: (.*?)[\\n]*Action Input:[\\s]*(.*)\"\n",
|
||||
" regex = r\"Action\\s*\\d*\\s*:(.*?)\\nAction\\s*\\d*\\s*Input\\s*\\d*\\s*:[\\s]*(.*)\"\n",
|
||||
" match = re.search(regex, llm_output, re.DOTALL)\n",
|
||||
" if not match:\n",
|
||||
" raise ValueError(f\"Could not parse LLM output: `{llm_output}`\")\n",
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 1,
|
||||
"id": "9af9734e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -53,7 +53,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 2,
|
||||
"id": "becda2a1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -70,7 +70,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 3,
|
||||
"id": "339b1bb8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -99,7 +99,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 4,
|
||||
"id": "e21d2098",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -145,7 +145,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 5,
|
||||
"id": "9b1cc2a2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -155,7 +155,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 6,
|
||||
"id": "e4f5092f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -166,7 +166,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 7,
|
||||
"id": "490604e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -176,7 +176,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"execution_count": 8,
|
||||
"id": "653b1617",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -190,9 +190,9 @@
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out the population of Canada\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Population of Canada 2023\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe current population of Canada is 38,610,447 as of Saturday, February 18, 2023, based on Worldometer elaboration of the latest United Nations data. Canada 2020 population is estimated at 37,742,154 people at mid year according to UN data.\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe current population of Canada is 38,661,927 as of Sunday, April 16, 2023, based on Worldometer elaboration of the latest United Nations data.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Arrr, Canada be havin' 38,610,447 scallywags livin' there as of 2023!\u001b[0m\n",
|
||||
"Final Answer: Arrr, Canada be havin' 38,661,927 people livin' there as of 2023!\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -200,10 +200,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Arrr, Canada be havin' 38,610,447 scallywags livin' there as of 2023!\""
|
||||
"\"Arrr, Canada be havin' 38,661,927 people livin' there as of 2023!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -223,7 +223,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"execution_count": 9,
|
||||
"id": "43dbfa2f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -244,7 +244,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"execution_count": 10,
|
||||
"id": "0f087313",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -254,7 +254,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"execution_count": 11,
|
||||
"id": "92c75a10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -264,7 +264,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"execution_count": 12,
|
||||
"id": "ac5b83bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -274,7 +274,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"execution_count": 13,
|
||||
"id": "c960e4ff",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -285,12 +285,16 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out the population of Canada in 2023.\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I should look for recent population estimates.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Population of Canada in 2023\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe current population of Canada is 38,610,447 as of Saturday, February 18, 2023, based on Worldometer elaboration of the latest United Nations data. Canada 2020 population is estimated at 37,742,154 people at mid year according to UN data.\u001b[0m\n",
|
||||
"Action Input: Canada population 2023\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m39,566,248\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I should double check this number.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Canada population estimates 2023\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mCanada's population was estimated at 39,566,248 on January 1, 2023, after a record population growth of 1,050,110 people from January 1, 2022, to January 1, 2023.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: La popolazione del Canada nel 2023 è stimata in 38.610.447 persone.\u001b[0m\n",
|
||||
"Final Answer: La popolazione del Canada è stata stimata a 39.566.248 il 1° gennaio 2023, dopo un record di crescita demografica di 1.050.110 persone dal 1° gennaio 2022 al 1° gennaio 2023.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -298,10 +302,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'La popolazione del Canada nel 2023 è stimata in 38.610.447 persone.'"
|
||||
"'La popolazione del Canada è stata stimata a 39.566.248 il 1° gennaio 2023, dopo un record di crescita demografica di 1.050.110 persone dal 1° gennaio 2022 al 1° gennaio 2023.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 36,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
||||
@@ -28,7 +28,15 @@
|
||||
"execution_count": 2,
|
||||
"id": "f65308ab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to default session, using empty session: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /sessions (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x10a1767c0>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.agents import Tool\n",
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
@@ -88,7 +96,20 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /chain-runs (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x13fab40d0>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Hello Bob! How can I assist you today?\"\n",
|
||||
@@ -124,7 +145,20 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /chain-runs (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x13fab44f0>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Your name is Bob.\"\n",
|
||||
@@ -167,10 +201,24 @@
|
||||
" \"action\": \"Current Search\",\n",
|
||||
" \"action_input\": \"Thai food dinner recipes\"\n",
|
||||
"}\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m59 easy Thai recipes for any night of the week · Marion Grasby's Thai spicy chilli and basil fried rice · Thai curry noodle soup · Marion Grasby's ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m{\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m59 easy Thai recipes for any night of the week · Marion Grasby's Thai spicy chilli and basil fried rice · Thai curry noodle soup · Marion Grasby's Thai Spicy ...\u001b[0m\n",
|
||||
"Thought:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /chain-runs (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x13fae8be0>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Here are some Thai food dinner recipes you can make this week: Thai spicy chilli and basil fried rice, Thai curry noodle soup, and many more. You can find 59 easy Thai recipes for any night of the week on Marion Grasby's website.\"\n",
|
||||
" \"action_input\": \"Here are some Thai food dinner recipes you can make this week: Thai spicy chilli and basil fried rice, Thai curry noodle soup, and Thai Spicy ... (59 recipes in total).\"\n",
|
||||
"}\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
@@ -179,7 +227,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Here are some Thai food dinner recipes you can make this week: Thai spicy chilli and basil fried rice, Thai curry noodle soup, and many more. You can find 59 easy Thai recipes for any night of the week on Marion Grasby's website.\""
|
||||
"'Here are some Thai food dinner recipes you can make this week: Thai spicy chilli and basil fried rice, Thai curry noodle soup, and Thai Spicy ... (59 recipes in total).'"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
@@ -210,11 +258,25 @@
|
||||
" \"action_input\": \"who won the world cup in 1978\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe Argentina national football team represents Argentina in men's international football and is administered by the Argentine Football Association, the governing body for football in Argentina. Nicknamed La Albiceleste, they are the reigning world champions, having won the most recent World Cup in 2022.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mArgentina national football team\u001b[0m\n",
|
||||
"Thought:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /chain-runs (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x13fae86d0>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"The last letter in your name is 'b'. The Argentina national football team won the World Cup in 1978.\"\n",
|
||||
" \"action_input\": \"The last letter in your name is 'b', and the winner of the 1978 World Cup was the Argentina national football team.\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
@@ -224,7 +286,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"The last letter in your name is 'b'. The Argentina national football team won the World Cup in 1978.\""
|
||||
"\"The last letter in your name is 'b', and the winner of the 1978 World Cup was the Argentina national football team.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
@@ -253,10 +315,24 @@
|
||||
" \"action\": \"Current Search\",\n",
|
||||
" \"action_input\": \"weather in pomfret\"\n",
|
||||
"}\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mMostly cloudy with gusty winds developing during the afternoon. A few flurries or snow showers possible. High near 40F. Winds NNW at 20 to 30 mph.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m{\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m10 Day Weather-Pomfret, CT ; Sun 16. 64° · 50°. 24% · NE 7 mph ; Mon 17. 58° · 45°. 70% · ESE 8 mph ; Tue 18. 57° · 37°. 8% · WSW 15 mph.\u001b[0m\n",
|
||||
"Thought:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /chain-runs (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x13fa9d7f0>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"The weather in Pomfret is mostly cloudy with gusty winds developing during the afternoon. A few flurries or snow showers are possible. High near 40F. Winds NNW at 20 to 30 mph.\"\n",
|
||||
" \"action_input\": \"The weather in Pomfret, CT for the next 10 days is as follows: Sun 16. 64° · 50°. 24% · NE 7 mph ; Mon 17. 58° · 45°. 70% · ESE 8 mph ; Tue 18. 57° · 37°. 8% · WSW 15 mph.\"\n",
|
||||
"}\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
@@ -265,7 +341,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The weather in Pomfret is mostly cloudy with gusty winds developing during the afternoon. A few flurries or snow showers are possible. High near 40F. Winds NNW at 20 to 30 mph.'"
|
||||
"'The weather in Pomfret, CT for the next 10 days is as follows: Sun 16. 64° · 50°. 24% · NE 7 mph ; Mon 17. 58° · 45°. 70% · ESE 8 mph ; Tue 18. 57° · 37°. 8% · WSW 15 mph.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
"from langchain.agents import AgentType\n",
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain import OpenAI\n",
|
||||
"from langchain.utilities import GoogleSearchAPIWrapper\n",
|
||||
"from langchain.utilities import SerpAPIWrapper\n",
|
||||
"from langchain.agents import initialize_agent"
|
||||
]
|
||||
},
|
||||
@@ -34,7 +34,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"search = GoogleSearchAPIWrapper()\n",
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name = \"Current Search\",\n",
|
||||
@@ -149,8 +149,12 @@
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Thought: Do I need to use a tool? No\n",
|
||||
"AI: If you like Thai food, some great dinner options this week could include Thai green curry, Pad Thai, or a Thai-style stir-fry. You could also try making a Thai-style soup or salad. Enjoy!\u001b[0m\n",
|
||||
"Thought: Do I need to use a tool? Yes\n",
|
||||
"Action: Current Search\n",
|
||||
"Action Input: Thai food dinner recipes\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m59 easy Thai recipes for any night of the week · Marion Grasby's Thai spicy chilli and basil fried rice · Thai curry noodle soup · Marion Grasby's Thai Spicy ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m Do I need to use a tool? No\n",
|
||||
"AI: Here are some great Thai dinner recipes you can try this week: Marion Grasby's Thai Spicy Chilli and Basil Fried Rice, Thai Curry Noodle Soup, Thai Green Curry with Coconut Rice, Thai Red Curry with Vegetables, and Thai Coconut Soup. I hope you enjoy them!\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -158,7 +162,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'If you like Thai food, some great dinner options this week could include Thai green curry, Pad Thai, or a Thai-style stir-fry. You could also try making a Thai-style soup or salad. Enjoy!'"
|
||||
"\"Here are some great Thai dinner recipes you can try this week: Marion Grasby's Thai Spicy Chilli and Basil Fried Rice, Thai Curry Noodle Soup, Thai Green Curry with Coconut Rice, Thai Red Curry with Vegetables, and Thai Coconut Soup. I hope you enjoy them!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
@@ -187,9 +191,9 @@
|
||||
"Thought: Do I need to use a tool? Yes\n",
|
||||
"Action: Current Search\n",
|
||||
"Action Input: Who won the World Cup in 1978\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe Cup was won by the host nation, Argentina, who defeated the Netherlands 3–1 in the final, after extra time. The final was held at River Plate's home stadium ... Amid Argentina's celebrations, there was sympathy for the Netherlands, runners-up for the second tournament running, following a 3-1 final defeat at the Estadio ... The match was won by the Argentine squad in extra time by a score of 3–1. Mario Kempes, who finished as the tournament's top scorer, was named the man of the ... May 21, 2022 ... Argentina won the World Cup for the first time in their history, beating Netherlands 3-1 in the final. This edition of the World Cup was full of ... The adidas Golden Ball is presented to the best player at each FIFA World Cup finals. Those who finish as runners-up in the vote receive the adidas Silver ... Holders West Germany failed to beat Holland and Italy and were eliminated when Berti Vogts' own goal gave Austria a 3-2 victory. Holland thrashed the Austrians ... Jun 14, 2018 ... On a clear afternoon on 1 June 1978 at the revamped El Monumental stadium in Buenos Aires' Belgrano barrio, several hundred children in white ... Dec 15, 2022 ... The tournament couldn't have gone better for the ruling junta. Argentina went on to win the championship, defeating the Netherlands, 3-1, in the ... Nov 9, 2022 ... Host: Argentina Teams: 16. Format: Group stage, second round, third-place playoff, final. Matches: 38. Goals: 102. Winner: Argentina Feb 19, 2009 ... Argentina sealed their first World Cup win on home soil when they defeated the Netherlands in an exciting final that went to extra-time. For the ...\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mArgentina national football team\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m Do I need to use a tool? No\n",
|
||||
"AI: The last letter in your name is 'b'. Argentina won the World Cup in 1978.\u001b[0m\n",
|
||||
"AI: The last letter in your name is \"b\" and the winner of the 1978 World Cup was the Argentina national football team.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -197,7 +201,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"The last letter in your name is 'b'. Argentina won the World Cup in 1978.\""
|
||||
"'The last letter in your name is \"b\" and the winner of the 1978 World Cup was the Argentina national football team.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
@@ -226,9 +230,9 @@
|
||||
"Thought: Do I need to use a tool? Yes\n",
|
||||
"Action: Current Search\n",
|
||||
"Action Input: Current temperature in Pomfret\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mA mixture of rain and snow showers. High 39F. Winds NNW at 5 to 10 mph. Chance of precip 50%. Snow accumulations less than one inch. Pomfret, CT Weather Forecast, with current conditions, wind, air quality, and what to expect for the next 3 days. Pomfret Center Weather Forecasts. ... Pomfret Center, CT Weather Conditionsstar_ratehome ... Tomorrow's temperature is forecast to be COOLER than today. It is 46 degrees fahrenheit, or 8 degrees celsius and feels like 46 degrees fahrenheit. The barometric pressure is 29.78 - measured by inch of mercury units - ... Pomfret Weather Forecasts. ... Pomfret, MD Weather Conditionsstar_ratehome ... Tomorrow's temperature is forecast to be MUCH COOLER than today. Additional Headlines. En Español · Share |. Current conditions at ... Pomfret CT. Tonight ... Past Weather Information · Interactive Forecast Map. Pomfret MD detailed current weather report for 20675 in Charles county, Maryland. ... Pomfret, MD weather condition is Mostly Cloudy and 43°F. Mostly Cloudy. Hazardous Weather Conditions. Hazardous Weather Outlook · En Español · Share |. Current conditions at ... South Pomfret VT. Tonight. Pomfret Center, CT Weather. Current Report for Thu Jan 5 2023. As of 2:00 PM EST. 5-Day Forecast | Road Conditions. 45°F 7°c. Feels Like 44°F. Pomfret Center CT. Today. Today: Areas of fog before 9am. Otherwise, cloudy, with a ... Otherwise, cloudy, with a temperature falling to around 33 by 5pm.\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mPartly cloudy skies. High around 70F. Winds W at 5 to 10 mph. Humidity41%.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m Do I need to use a tool? No\n",
|
||||
"AI: The current temperature in Pomfret is 45°F (7°C) and it feels like 44°F.\u001b[0m\n",
|
||||
"AI: The current temperature in Pomfret is around 70F with partly cloudy skies and winds W at 5 to 10 mph. The humidity is 41%.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -236,7 +240,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The current temperature in Pomfret is 45°F (7°C) and it feels like 44°F.'"
|
||||
"'The current temperature in Pomfret is around 70F with partly cloudy skies and winds W at 5 to 10 mph. The humidity is 41%.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "07e96d99",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -41,7 +41,7 @@
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"llm_math_chain = LLMMathChain(llm=llm, verbose=True)\n",
|
||||
"db = SQLDatabase.from_uri(\"sqlite:///../../../../notebooks/Chinook.db\")\n",
|
||||
"db = SQLDatabase.from_uri(\"sqlite:///../../../../../notebooks/Chinook.db\")\n",
|
||||
"db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
@@ -64,7 +64,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "a069c4b6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -74,7 +74,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "e603cd7d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -88,30 +88,24 @@
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out who Leo DiCaprio's girlfriend is and then calculate her age raised to the 0.43 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Who is Leo DiCaprio's girlfriend?\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mCamila Morrone\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out Camila Morrone's age\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"How old is Camila Morrone?\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m25 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate 25 raised to the 0.43 power\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mDiCaprio met actor Camila Morrone in December 2017, when she was 20 and he was 43. They were spotted at Coachella and went on multiple vacations together. Some reports suggested that DiCaprio was ready to ask Morrone to marry him. The couple made their red carpet debut at the 2020 Academy Awards.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate Camila Morrone's age raised to the 0.43 power.\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 25^0.43\u001b[0m\n",
|
||||
"Action Input: 21^0.43\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMMathChain chain...\u001b[0m\n",
|
||||
"25^0.43\u001b[32;1m\u001b[1;3m\n",
|
||||
"```python\n",
|
||||
"import math\n",
|
||||
"print(math.pow(25, 0.43))\n",
|
||||
"21^0.43\u001b[32;1m\u001b[1;3m\n",
|
||||
"```text\n",
|
||||
"21**0.43\n",
|
||||
"```\n",
|
||||
"...numexpr.evaluate(\"21**0.43\")...\n",
|
||||
"\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m3.991298452658078\n",
|
||||
"\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m3.7030049853137306\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 3.991298452658078\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Camila Morrone is 25 years old and her age raised to the 0.43 power is 3.991298452658078.\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 3.7030049853137306\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: Camila Morrone is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is 3.7030049853137306.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -119,10 +113,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Camila Morrone is 25 years old and her age raised to the 0.43 power is 3.991298452658078.'"
|
||||
"\"Camila Morrone is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is 3.7030049853137306.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -133,7 +127,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "a5c07010",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -147,21 +141,36 @@
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out the artist's full name and then search the FooBar database for their albums.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"The Storm Before the Calm\" artist\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe Storm Before the Calm (stylized in all lowercase) is the tenth (and eighth international) studio album by Canadian-American singer-songwriter Alanis ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now need to search the FooBar database for Alanis Morissette's albums\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe Storm Before the Calm (stylized in all lowercase) is the tenth (and eighth international) studio album by Canadian-American singer-songwriter Alanis Morissette, released June 17, 2022, via Epiphany Music and Thirty Tigers, as well as by RCA Records in Europe.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now need to search the FooBar database for Alanis Morissette's albums.\n",
|
||||
"Action: FooBar DB\n",
|
||||
"Action Input: What albums by Alanis Morissette are in the FooBar database?\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
||||
"What albums by Alanis Morissette are in the FooBar database? \n",
|
||||
"SQLQuery:\u001b[32;1m\u001b[1;3m SELECT Title FROM Album INNER JOIN Artist ON Album.ArtistId = Artist.ArtistId WHERE Artist.Name = 'Alanis Morissette' LIMIT 5;\u001b[0m\n",
|
||||
"What albums by Alanis Morissette are in the FooBar database?\n",
|
||||
"SQLQuery:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/harrisonchase/workplace/langchain/langchain/sql_database.py:191: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.\n",
|
||||
" sample_rows = connection.execute(command)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m SELECT \"Title\" FROM \"Album\" INNER JOIN \"Artist\" ON \"Album\".\"ArtistId\" = \"Artist\".\"ArtistId\" WHERE \"Name\" = 'Alanis Morissette' LIMIT 5;\u001b[0m\n",
|
||||
"SQLResult: \u001b[33;1m\u001b[1;3m[('Jagged Little Pill',)]\u001b[0m\n",
|
||||
"Answer:\u001b[32;1m\u001b[1;3m The albums by Alanis Morissette in the FooBar database are Jagged Little Pill.\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3m The albums by Alanis Morissette in the FooBar database are Jagged Little Pill.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The artist who released the album The Storm Before the Calm is Alanis Morissette and the albums of theirs in the FooBar database are Jagged Little Pill.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: The artist who released the album 'The Storm Before the Calm' is Alanis Morissette and the albums of hers in the FooBar database are Jagged Little Pill.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -169,10 +178,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The artist who released the album The Storm Before the Calm is Alanis Morissette and the albums of theirs in the FooBar database are Jagged Little Pill.'"
|
||||
"\"The artist who released the album 'The Storm Before the Calm' is Alanis Morissette and the albums of hers in the FooBar database are Jagged Little Pill.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 8,
|
||||
"id": "ac561cc4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -34,7 +34,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 10,
|
||||
"id": "07e96d99",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -43,7 +43,7 @@
|
||||
"llm1 = OpenAI(temperature=0)\n",
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"llm_math_chain = LLMMathChain(llm=llm1, verbose=True)\n",
|
||||
"db = SQLDatabase.from_uri(\"sqlite:///../../../../notebooks/Chinook.db\")\n",
|
||||
"db = SQLDatabase.from_uri(\"sqlite:///../../../../../notebooks/Chinook.db\")\n",
|
||||
"db_chain = SQLDatabaseChain(llm=llm1, database=db, verbose=True)\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
@@ -66,7 +66,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 11,
|
||||
"id": "a069c4b6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -76,7 +76,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 12,
|
||||
"id": "e603cd7d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -92,37 +92,34 @@
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Search\",\n",
|
||||
" \"action_input\": \"Who is Leo DiCaprio's girlfriend?\"\n",
|
||||
" \"action_input\": \"Leo DiCaprio girlfriend\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mCamila Morrone\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mFor the second question, I need to use the calculator tool to raise her current age to the 0.43 power.\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mGigi Hadid: 2022 Leo and Gigi were first linked back in September 2022, when a source told Us Weekly that Leo had his “sights set\" on her (alarming way to put it, but okay).\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mFor the second question, I need to calculate the age raised to the 0.43 power. I will use the calculator tool.\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Calculator\",\n",
|
||||
" \"action_input\": \"22.0^(0.43)\"\n",
|
||||
" \"action_input\": \"((2022-1995)^0.43)\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMMathChain chain...\u001b[0m\n",
|
||||
"22.0^(0.43)\u001b[32;1m\u001b[1;3m\n",
|
||||
"```python\n",
|
||||
"import math\n",
|
||||
"print(math.pow(22.0, 0.43))\n",
|
||||
"((2022-1995)^0.43)\u001b[32;1m\u001b[1;3m\n",
|
||||
"```text\n",
|
||||
"(2022-1995)**0.43\n",
|
||||
"```\n",
|
||||
"...numexpr.evaluate(\"(2022-1995)**0.43\")...\n",
|
||||
"\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m3.777824273683966\n",
|
||||
"\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m4.125593352125936\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 3.777824273683966\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 4.125593352125936\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI now know the final answer.\n",
|
||||
"Final Answer: Camila Morrone, 3.777824273683966.\u001b[0m\n",
|
||||
"Final Answer: Gigi Hadid is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is approximately 4.13.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -130,10 +127,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Camila Morrone, 3.777824273683966.'"
|
||||
"\"Gigi Hadid is Leo DiCaprio's girlfriend and her current age raised to the 0.43 power is approximately 4.13.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -144,7 +141,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 13,
|
||||
"id": "a5c07010",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -156,7 +153,7 @@
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mQuestion: What is the full name of the artist who recently released an album called 'The Storm Before the Calm' and are they in the FooBar database? If so, what albums of theirs are in the FooBar database?\n",
|
||||
"Thought: I should use the Search tool to find the answer to the first part of the question and then use the FooBar DB tool to find the answer to the second part of the question.\n",
|
||||
"Thought: I should use the Search tool to find the answer to the first part of the question and then use the FooBar DB tool to find the answer to the second part.\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
@@ -166,7 +163,7 @@
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mAlanis Morissette\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mNow that I have the name of the artist, I can use the FooBar DB tool to find their albums in the database.\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mNow that I know the artist's name, I can use the FooBar DB tool to find out if they are in the database and what albums of theirs are in it.\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
@@ -178,7 +175,7 @@
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
||||
"What albums does Alanis Morissette have in the database? \n",
|
||||
"What albums does Alanis Morissette have in the database?\n",
|
||||
"SQLQuery:"
|
||||
]
|
||||
},
|
||||
@@ -186,7 +183,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/harrisonchase/workplace/langchain/langchain/sql_database.py:141: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.\n",
|
||||
"/Users/harrisonchase/workplace/langchain/langchain/sql_database.py:191: SAWarning: Dialect sqlite+pysqlite does *not* support Decimal objects natively, and SQLAlchemy must convert from floating point - rounding errors and other issues may occur. Please consider storing Decimal numbers as strings or integers on this platform for lossless storage.\n",
|
||||
" sample_rows = connection.execute(command)\n"
|
||||
]
|
||||
},
|
||||
@@ -194,14 +191,14 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m SELECT Title FROM Album WHERE ArtistId IN (SELECT ArtistId FROM Artist WHERE Name = 'Alanis Morissette') LIMIT 5;\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m SELECT \"Title\" FROM \"Album\" WHERE \"ArtistId\" IN (SELECT \"ArtistId\" FROM \"Artist\" WHERE \"Name\" = 'Alanis Morissette') LIMIT 5;\u001b[0m\n",
|
||||
"SQLResult: \u001b[33;1m\u001b[1;3m[('Jagged Little Pill',)]\u001b[0m\n",
|
||||
"Answer:\u001b[32;1m\u001b[1;3m Alanis Morissette has the album 'Jagged Little Pill' in the database.\u001b[0m\n",
|
||||
"Answer:\u001b[32;1m\u001b[1;3m Alanis Morissette has the album Jagged Little Pill in the database.\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3m Alanis Morissette has the album 'Jagged Little Pill' in the database.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI have found the answer to both parts of the question.\n",
|
||||
"Final Answer: The artist who recently released an album called 'The Storm Before the Calm' is Alanis Morissette. The album 'Jagged Little Pill' is in the FooBar database.\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3m Alanis Morissette has the album Jagged Little Pill in the database.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe artist Alanis Morissette is in the FooBar database and has the album Jagged Little Pill in it.\n",
|
||||
"Final Answer: Alanis Morissette is in the FooBar database and has the album Jagged Little Pill in it.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -209,10 +206,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"The artist who recently released an album called 'The Storm Before the Calm' is Alanis Morissette. The album 'Jagged Little Pill' is in the FooBar database.\""
|
||||
"'Alanis Morissette is in the FooBar database and has the album Jagged Little Pill in it.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"id": "7e3b513e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -25,11 +25,12 @@
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m Yes.\n",
|
||||
"Follow up: Who is the reigning men's U.S. Open champion?\u001b[0m\n",
|
||||
"Intermediate answer: \u001b[36;1m\u001b[1;3mCarlos Alcaraz won the 2022 Men's single title while Poland's Iga Swiatek won the Women's single title defeating Tunisian's Ons Jabeur.\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mFollow up: Where is Carlos Alcaraz from?\u001b[0m\n",
|
||||
"Intermediate answer: \u001b[36;1m\u001b[1;3mCarlos Alcaraz Garfia\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mFollow up: Where is Carlos Alcaraz Garfia from?\u001b[0m\n",
|
||||
"Intermediate answer: \u001b[36;1m\u001b[1;3mEl Palmar, Spain\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mSo the final answer is: El Palmar, Spain\u001b[0m\n",
|
||||
"\u001b[1m> Finished AgentExecutor chain.\u001b[0m\n"
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -38,7 +39,7 @@
|
||||
"'El Palmar, Spain'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -61,6 +62,14 @@
|
||||
"self_ask_with_search = initialize_agent(tools, llm, agent=AgentType.SELF_ASK_WITH_SEARCH, verbose=True)\n",
|
||||
"self_ask_with_search.run(\"What is the hometown of the reigning men's U.S. Open champion?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b2e4d6bc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -79,7 +88,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.9.1"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
484
docs/modules/agents/auto_agents/examples/autogpt.ipynb
Normal file
484
docs/modules/agents/auto_agents/examples/autogpt.ipynb
Normal file
@@ -0,0 +1,484 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "14f8b67b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AutoGPT\n",
|
||||
"\n",
|
||||
"Implementation of https://github.com/Significant-Gravitas/Auto-GPT but with LangChain primitives (LLMs, PromptTemplates, VectorStores, Embeddings, Tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "192496a7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up tools\n",
|
||||
"\n",
|
||||
"We'll set up an AutoGPT with a search tool, and write-file tool, and a read-file tool"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "7c2c9b54",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.utilities import SerpAPIWrapper\n",
|
||||
"from langchain.agents import Tool\n",
|
||||
"from langchain.tools.file_management.write import WriteFileTool\n",
|
||||
"from langchain.tools.file_management.read import ReadFileTool\n",
|
||||
"\n",
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name = \"search\",\n",
|
||||
" func=search.run,\n",
|
||||
" description=\"useful for when you need to answer questions about current events. You should ask targeted questions\"\n",
|
||||
" ),\n",
|
||||
" WriteFileTool(),\n",
|
||||
" ReadFileTool(),\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8e39ee28",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up memory\n",
|
||||
"\n",
|
||||
"The memory here is used for the agents intermediate steps"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "72bc204d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.vectorstores import FAISS\n",
|
||||
"from langchain.docstore import InMemoryDocstore\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "1df7b724",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define your embedding model\n",
|
||||
"embeddings_model = OpenAIEmbeddings()\n",
|
||||
"# Initialize the vectorstore as empty\n",
|
||||
"import faiss\n",
|
||||
"\n",
|
||||
"embedding_size = 1536\n",
|
||||
"index = faiss.IndexFlatL2(embedding_size)\n",
|
||||
"vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e40fd657",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup model and AutoGPT\n",
|
||||
"\n",
|
||||
"Initialize everything! We will use ChatOpenAI model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3393bc23",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.auto_agents.autogpt.agent import AutoGPT\n",
|
||||
"from langchain.chat_models import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "709c08c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = AutoGPT.from_llm_and_tools(\n",
|
||||
" ai_name=\"Tom\",\n",
|
||||
" ai_role=\"Assistant\",\n",
|
||||
" tools=tools,\n",
|
||||
" llm=ChatOpenAI(temperature=0),\n",
|
||||
" memory=vectorstore.as_retriever()\n",
|
||||
")\n",
|
||||
"# Set verbose to be true\n",
|
||||
"agent.chain.verbose = True"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fc9b51ba",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run an example\n",
|
||||
"\n",
|
||||
"Here we will make it write a weather report for SF"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "c032b182",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are Tom, Assistant\n",
|
||||
"Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications. If you have completed all your tasks, make sure to use the \"finish\" command.\n",
|
||||
"\n",
|
||||
"GOALS:\n",
|
||||
"\n",
|
||||
"1. write a weather report for SF today\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Constraints:\n",
|
||||
"1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.\n",
|
||||
"2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.\n",
|
||||
"3. No user assistance\n",
|
||||
"4. Exclusively use the commands listed in double quotes e.g. \"command name\"\n",
|
||||
"\n",
|
||||
"Commands:\n",
|
||||
"1. search: useful for when you need to answer questions about current events. You should ask targeted questions, args: \"tool_input\": \"\"\n",
|
||||
"2. write_file: Write file to disk, args: \"file_path\": \"name of file\", \"text\": \"text to write to file\"\n",
|
||||
"3. read_file: Read file from disk, args: \"file_path\": \"name of file\"\n",
|
||||
"4. finish: use this to signal that you have finished all your objectives, args: \"response\": \"final response to let people know you have finished your objectives\"\n",
|
||||
"\n",
|
||||
"Resources:\n",
|
||||
"1. Internet access for searches and information gathering.\n",
|
||||
"2. Long Term memory management.\n",
|
||||
"3. GPT-3.5 powered Agents for delegation of simple tasks.\n",
|
||||
"4. File output.\n",
|
||||
"\n",
|
||||
"Performance Evaluation:\n",
|
||||
"1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\n",
|
||||
"2. Constructively self-criticize your big-picture behavior constantly.\n",
|
||||
"3. Reflect on past decisions and strategies to refine your approach.\n",
|
||||
"4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.\n",
|
||||
"\n",
|
||||
"You should only respond in JSON format as described below \n",
|
||||
"Response Format: \n",
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"thought\",\n",
|
||||
" \"reasoning\": \"reasoning\",\n",
|
||||
" \"plan\": \"- short bulleted\\n- list that conveys\\n- long-term plan\",\n",
|
||||
" \"criticism\": \"constructive self-criticism\",\n",
|
||||
" \"speak\": \"thoughts summary to say to user\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"command name\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"arg name\": \"value\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"} \n",
|
||||
"Ensure the response can be parsed by Python json.loads\n",
|
||||
"System: The current time and date is Sun Apr 16 14:07:39 2023\n",
|
||||
"System: This reminds you of these events from your past:\n",
|
||||
"[]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Human: Determine which next command to use, and respond using the format specified above:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I will start by writing a weather report for San Francisco today. I will use the 'search' command to find the current weather conditions.\",\n",
|
||||
" \"reasoning\": \"I need to gather information about the current weather conditions in San Francisco to write an accurate weather report.\",\n",
|
||||
" \"plan\": \"- Use the 'search' command to find the current weather conditions in San Francisco\\n- Write a weather report based on the information gathered\",\n",
|
||||
" \"criticism\": \"I need to make sure that the information I gather is accurate and up-to-date.\",\n",
|
||||
" \"speak\": \"I will use the 'search' command to find the current weather conditions in San Francisco.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"search\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"tool_input\": \"current weather conditions in San Francisco\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are Tom, Assistant\n",
|
||||
"Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications. If you have completed all your tasks, make sure to use the \"finish\" command.\n",
|
||||
"\n",
|
||||
"GOALS:\n",
|
||||
"\n",
|
||||
"1. write a weather report for SF today\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Constraints:\n",
|
||||
"1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.\n",
|
||||
"2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.\n",
|
||||
"3. No user assistance\n",
|
||||
"4. Exclusively use the commands listed in double quotes e.g. \"command name\"\n",
|
||||
"\n",
|
||||
"Commands:\n",
|
||||
"1. search: useful for when you need to answer questions about current events. You should ask targeted questions, args: \"tool_input\": \"\"\n",
|
||||
"2. write_file: Write file to disk, args: \"file_path\": \"name of file\", \"text\": \"text to write to file\"\n",
|
||||
"3. read_file: Read file from disk, args: \"file_path\": \"name of file\"\n",
|
||||
"4. finish: use this to signal that you have finished all your objectives, args: \"response\": \"final response to let people know you have finished your objectives\"\n",
|
||||
"\n",
|
||||
"Resources:\n",
|
||||
"1. Internet access for searches and information gathering.\n",
|
||||
"2. Long Term memory management.\n",
|
||||
"3. GPT-3.5 powered Agents for delegation of simple tasks.\n",
|
||||
"4. File output.\n",
|
||||
"\n",
|
||||
"Performance Evaluation:\n",
|
||||
"1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\n",
|
||||
"2. Constructively self-criticize your big-picture behavior constantly.\n",
|
||||
"3. Reflect on past decisions and strategies to refine your approach.\n",
|
||||
"4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.\n",
|
||||
"\n",
|
||||
"You should only respond in JSON format as described below \n",
|
||||
"Response Format: \n",
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"thought\",\n",
|
||||
" \"reasoning\": \"reasoning\",\n",
|
||||
" \"plan\": \"- short bulleted\\n- list that conveys\\n- long-term plan\",\n",
|
||||
" \"criticism\": \"constructive self-criticism\",\n",
|
||||
" \"speak\": \"thoughts summary to say to user\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"command name\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"arg name\": \"value\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"} \n",
|
||||
"Ensure the response can be parsed by Python json.loads\n",
|
||||
"System: The current time and date is Sun Apr 16 14:07:48 2023\n",
|
||||
"System: This reminds you of these events from your past:\n",
|
||||
"['Assistant Reply: {\\n \"thoughts\": {\\n \"text\": \"I will start by writing a weather report for San Francisco today. I will use the \\'search\\' command to find the current weather conditions.\",\\n \"reasoning\": \"I need to gather information about the current weather conditions in San Francisco to write an accurate weather report.\",\\n \"plan\": \"- Use the \\'search\\' command to find the current weather conditions in San Francisco\\\\n- Write a weather report based on the information gathered\",\\n \"criticism\": \"I need to make sure that the information I gather is accurate and up-to-date.\",\\n \"speak\": \"I will use the \\'search\\' command to find the current weather conditions in San Francisco.\"\\n },\\n \"command\": {\\n \"name\": \"search\",\\n \"args\": {\\n \"tool_input\": \"current weather conditions in San Francisco\"\\n }\\n }\\n} \\nResult: Command search returned: Cloudy skies early, followed by partial clearing. High 56F. Winds W at 10 to 20 mph. PRECIPITATION. ']\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Human: Determine which next command to use, and respond using the format specified above:\n",
|
||||
"AI: {\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I will start by writing a weather report for San Francisco today. I will use the 'search' command to find the current weather conditions.\",\n",
|
||||
" \"reasoning\": \"I need to gather information about the current weather conditions in San Francisco to write an accurate weather report.\",\n",
|
||||
" \"plan\": \"- Use the 'search' command to find the current weather conditions in San Francisco\\n- Write a weather report based on the information gathered\",\n",
|
||||
" \"criticism\": \"I need to make sure that the information I gather is accurate and up-to-date.\",\n",
|
||||
" \"speak\": \"I will use the 'search' command to find the current weather conditions in San Francisco.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"search\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"tool_input\": \"current weather conditions in San Francisco\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"System: Command search returned: Cloudy skies early, followed by partial clearing. High 56F. Winds W at 10 to 20 mph. PRECIPITATION.\n",
|
||||
"Human: Determine which next command to use, and respond using the format specified above:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I will now write a weather report for San Francisco based on the information gathered. I will use the 'write_file' command to save the report to a file.\",\n",
|
||||
" \"reasoning\": \"I need to write a weather report based on the information gathered from the 'search' command.\",\n",
|
||||
" \"plan\": \"- Use the 'write_file' command to save the weather report to a file\",\n",
|
||||
" \"criticism\": \"I need to make sure that the weather report is accurate and well-written.\",\n",
|
||||
" \"speak\": \"I will use the 'write_file' command to save the weather report to a file.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"write_file\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"file_path\": \"weather_report_sf.txt\",\n",
|
||||
" \"text\": \"San Francisco Weather Report:\\n\\nCloudy skies early, followed by partial clearing. High 56F. Winds W at 10 to 20 mph. PRECIPITATION.\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mSystem: You are Tom, Assistant\n",
|
||||
"Your decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications. If you have completed all your tasks, make sure to use the \"finish\" command.\n",
|
||||
"\n",
|
||||
"GOALS:\n",
|
||||
"\n",
|
||||
"1. write a weather report for SF today\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Constraints:\n",
|
||||
"1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.\n",
|
||||
"2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.\n",
|
||||
"3. No user assistance\n",
|
||||
"4. Exclusively use the commands listed in double quotes e.g. \"command name\"\n",
|
||||
"\n",
|
||||
"Commands:\n",
|
||||
"1. search: useful for when you need to answer questions about current events. You should ask targeted questions, args: \"tool_input\": \"\"\n",
|
||||
"2. write_file: Write file to disk, args: \"file_path\": \"name of file\", \"text\": \"text to write to file\"\n",
|
||||
"3. read_file: Read file from disk, args: \"file_path\": \"name of file\"\n",
|
||||
"4. finish: use this to signal that you have finished all your objectives, args: \"response\": \"final response to let people know you have finished your objectives\"\n",
|
||||
"\n",
|
||||
"Resources:\n",
|
||||
"1. Internet access for searches and information gathering.\n",
|
||||
"2. Long Term memory management.\n",
|
||||
"3. GPT-3.5 powered Agents for delegation of simple tasks.\n",
|
||||
"4. File output.\n",
|
||||
"\n",
|
||||
"Performance Evaluation:\n",
|
||||
"1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\n",
|
||||
"2. Constructively self-criticize your big-picture behavior constantly.\n",
|
||||
"3. Reflect on past decisions and strategies to refine your approach.\n",
|
||||
"4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.\n",
|
||||
"\n",
|
||||
"You should only respond in JSON format as described below \n",
|
||||
"Response Format: \n",
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"thought\",\n",
|
||||
" \"reasoning\": \"reasoning\",\n",
|
||||
" \"plan\": \"- short bulleted\\n- list that conveys\\n- long-term plan\",\n",
|
||||
" \"criticism\": \"constructive self-criticism\",\n",
|
||||
" \"speak\": \"thoughts summary to say to user\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"command name\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"arg name\": \"value\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"} \n",
|
||||
"Ensure the response can be parsed by Python json.loads\n",
|
||||
"System: The current time and date is Sun Apr 16 14:07:57 2023\n",
|
||||
"System: This reminds you of these events from your past:\n",
|
||||
"['Assistant Reply: {\\n \"thoughts\": {\\n \"text\": \"I will now write a weather report for San Francisco based on the information gathered. I will use the \\'write_file\\' command to save the report to a file.\",\\n \"reasoning\": \"I need to write a weather report based on the information gathered from the \\'search\\' command.\",\\n \"plan\": \"- Use the \\'write_file\\' command to save the weather report to a file\",\\n \"criticism\": \"I need to make sure that the weather report is accurate and well-written.\",\\n \"speak\": \"I will use the \\'write_file\\' command to save the weather report to a file.\"\\n },\\n \"command\": {\\n \"name\": \"write_file\",\\n \"args\": {\\n \"file_path\": \"weather_report_sf.txt\",\\n \"text\": \"San Francisco Weather Report:\\\\n\\\\nCloudy skies early, followed by partial clearing. High 56F. Winds W at 10 to 20 mph. PRECIPITATION.\"\\n }\\n }\\n} \\nResult: Command write_file returned: File written to successfully. ', 'Assistant Reply: {\\n \"thoughts\": {\\n \"text\": \"I will start by writing a weather report for San Francisco today. I will use the \\'search\\' command to find the current weather conditions.\",\\n \"reasoning\": \"I need to gather information about the current weather conditions in San Francisco to write an accurate weather report.\",\\n \"plan\": \"- Use the \\'search\\' command to find the current weather conditions in San Francisco\\\\n- Write a weather report based on the information gathered\",\\n \"criticism\": \"I need to make sure that the information I gather is accurate and up-to-date.\",\\n \"speak\": \"I will use the \\'search\\' command to find the current weather conditions in San Francisco.\"\\n },\\n \"command\": {\\n \"name\": \"search\",\\n \"args\": {\\n \"tool_input\": \"current weather conditions in San Francisco\"\\n }\\n }\\n} \\nResult: Command search returned: Cloudy skies early, followed by partial clearing. High 56F. Winds W at 10 to 20 mph. PRECIPITATION. ']\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Human: Determine which next command to use, and respond using the format specified above:\n",
|
||||
"AI: {\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I will start by writing a weather report for San Francisco today. I will use the 'search' command to find the current weather conditions.\",\n",
|
||||
" \"reasoning\": \"I need to gather information about the current weather conditions in San Francisco to write an accurate weather report.\",\n",
|
||||
" \"plan\": \"- Use the 'search' command to find the current weather conditions in San Francisco\\n- Write a weather report based on the information gathered\",\n",
|
||||
" \"criticism\": \"I need to make sure that the information I gather is accurate and up-to-date.\",\n",
|
||||
" \"speak\": \"I will use the 'search' command to find the current weather conditions in San Francisco.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"search\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"tool_input\": \"current weather conditions in San Francisco\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"System: Command search returned: Cloudy skies early, followed by partial clearing. High 56F. Winds W at 10 to 20 mph. PRECIPITATION.\n",
|
||||
"Human: Determine which next command to use, and respond using the format specified above:\n",
|
||||
"AI: {\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I will now write a weather report for San Francisco based on the information gathered. I will use the 'write_file' command to save the report to a file.\",\n",
|
||||
" \"reasoning\": \"I need to write a weather report based on the information gathered from the 'search' command.\",\n",
|
||||
" \"plan\": \"- Use the 'write_file' command to save the weather report to a file\",\n",
|
||||
" \"criticism\": \"I need to make sure that the weather report is accurate and well-written.\",\n",
|
||||
" \"speak\": \"I will use the 'write_file' command to save the weather report to a file.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"write_file\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"file_path\": \"weather_report_sf.txt\",\n",
|
||||
" \"text\": \"San Francisco Weather Report:\\n\\nCloudy skies early, followed by partial clearing. High 56F. Winds W at 10 to 20 mph. PRECIPITATION.\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"System: Command write_file returned: File written to successfully.\n",
|
||||
"Human: Determine which next command to use, and respond using the format specified above:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I have completed all my tasks. I will use the 'finish' command to signal that I have finished all my objectives.\",\n",
|
||||
" \"reasoning\": \"I have completed the task of writing a weather report for San Francisco and there are no other tasks assigned to me.\",\n",
|
||||
" \"plan\": \"- Use the 'finish' command to signal that I have finished all my objectives\",\n",
|
||||
" \"criticism\": \"I need to make sure that I have completed all my tasks before using the 'finish' command.\",\n",
|
||||
" \"speak\": \"I will use the 'finish' command to signal that I have finished all my objectives.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"finish\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"response\": \"I have completed all my objectives.\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'I have completed all my objectives.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run([\"write a weather report for SF today\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aa264f26",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -35,7 +35,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 3,
|
||||
"id": "16c4dc59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -45,7 +45,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 4,
|
||||
"id": "46b9489d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -72,7 +72,7 @@
|
||||
"'There are 891 rows in the dataframe.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -83,7 +83,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"id": "a96309be",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -110,7 +110,7 @@
|
||||
"'30 people have more than 3 siblings.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -121,7 +121,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"id": "964a09f7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -143,7 +143,7 @@
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to import the math library\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: import math\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mNone\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I can now calculate the square root\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: math.sqrt(df['Age'].mean())\u001b[0m\n",
|
||||
@@ -160,7 +160,7 @@
|
||||
"'5.449689683556195'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
||||
@@ -80,8 +80,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(temperature=0,)\n",
|
||||
"tools = load_tools([\"requests\"] )\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"tools = load_tools([\"requests_all\"] )\n",
|
||||
"tools += [tool]\n",
|
||||
"\n",
|
||||
"agent_chain = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)\n",
|
||||
|
||||
@@ -144,6 +144,160 @@
|
||||
"\u001b[32;1m\u001b[1;3mYou are a helpful AI Assistant. Please provide JSON arguments to agentFunc() based on the user's instructions.\n",
|
||||
"\n",
|
||||
"API_SCHEMA: ```typescript\n",
|
||||
"/* API for fetching Klarna product information */\n",
|
||||
"type productsUsingGET = (_: {\n",
|
||||
"/* A precise query that matches one very small category or product that needs to be searched for to find the products the user is looking for. If the user explicitly stated what they want, use that as a query. The query is as specific as possible to the product name or category mentioned by the user in its singular form, and don't contain any clarifiers like latest, newest, cheapest, budget, premium, expensive or similar. The query is always taken from the latest topic, if there is a new topic a new query is started. */\n",
|
||||
"\t\tq: string,\n",
|
||||
"/* number of products returned */\n",
|
||||
"\t\tsize?: number,\n",
|
||||
"/* (Optional) Minimum price in local currency for the product searched for. Either explicitly stated by the user or implicitly inferred from a combination of the user's request and the kind of product searched for. */\n",
|
||||
"\t\tmin_price?: number,\n",
|
||||
"/* (Optional) Maximum price in local currency for the product searched for. Either explicitly stated by the user or implicitly inferred from a combination of the user's request and the kind of product searched for. */\n",
|
||||
"\t\tmax_price?: number,\n",
|
||||
"}) => any;\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"USER_INSTRUCTIONS: \"whats the most expensive shirt?\"\n",
|
||||
"\n",
|
||||
"Your arguments must be plain json provided in a markdown block:\n",
|
||||
"\n",
|
||||
"ARGS: ```json\n",
|
||||
"{valid json conforming to API_SCHEMA}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Example\n",
|
||||
"-----\n",
|
||||
"\n",
|
||||
"ARGS: ```json\n",
|
||||
"{\"foo\": \"bar\", \"baz\": {\"qux\": \"quux\"}}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"The block must be no more than 1 line long, and all arguments must be valid JSON. All string arguments must be wrapped in double quotes.\n",
|
||||
"You MUST strictly comply to the types indicated by the provided schema, including all required args.\n",
|
||||
"\n",
|
||||
"If you don't have sufficient information to call the function due to things like requiring specific uuid's, you can reply with the following message:\n",
|
||||
"\n",
|
||||
"Message: ```text\n",
|
||||
"Concise response requesting the additional information that would make calling the function successful.\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Begin\n",
|
||||
"-----\n",
|
||||
"ARGS:\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m{\"q\": \"shirt\", \"size\": 1, \"max_price\": null}\u001b[0m\n",
|
||||
"\u001b[36;1m\u001b[1;3m{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]}]}\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new APIResponderChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mYou are a helpful AI assistant trained to answer user queries from API responses.\n",
|
||||
"You attempted to call an API, which resulted in:\n",
|
||||
"API_RESPONSE: {\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]}]}\n",
|
||||
"\n",
|
||||
"USER_COMMENT: \"whats the most expensive shirt?\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"If the API_RESPONSE can answer the USER_COMMENT respond with the following markdown json block:\n",
|
||||
"Response: ```json\n",
|
||||
"{\"response\": \"Human-understandable synthesis of the API_RESPONSE\"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Otherwise respond with the following markdown json block:\n",
|
||||
"Response Error: ```json\n",
|
||||
"{\"response\": \"What you did and a concise statement of the resulting error. If it can be easily fixed, provide a suggestion.\"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You MUST respond as a markdown json code block. The person you are responding to CANNOT see the API_RESPONSE, so if there is any relevant information there you must include it in your response.\n",
|
||||
"\n",
|
||||
"Begin:\n",
|
||||
"---\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001b[33;1m\u001b[1;3mThe most expensive shirt in the API response is the Burberry Check Poplin Shirt, which costs $360.00.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = chain(\"whats the most expensive shirt?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "c000295e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'request_args': '{\"q\": \"shirt\", \"size\": 1, \"max_price\": null}',\n",
|
||||
" 'response_text': '{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]}]}'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# View intermediate steps\n",
|
||||
"output[\"intermediate_steps\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "092bdb4d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Return raw response\n",
|
||||
"\n",
|
||||
"We can also run this chain without synthesizing the response. This will have the effect of just returning the raw API output."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "4dff3849",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = OpenAPIEndpointChain.from_api_operation(\n",
|
||||
" operation, \n",
|
||||
" llm, \n",
|
||||
" requests=Requests(), \n",
|
||||
" verbose=True,\n",
|
||||
" return_intermediate_steps=True, # Return request and response text\n",
|
||||
" raw_response=True # Return raw response\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "762499a9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new OpenAPIEndpointChain chain...\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new APIRequesterChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mYou are a helpful AI Assistant. Please provide JSON arguments to agentFunc() based on the user's instructions.\n",
|
||||
"\n",
|
||||
"API_SCHEMA: ```typescript\n",
|
||||
"/* API for fetching Klarna product information */\n",
|
||||
"type productsUsingGET = (_: {\n",
|
||||
"/* A precise query that matches one very small category or product that needs to be searched for to find the products the user is looking for. If the user explicitly stated what they want, use that as a query. The query is as specific as possible to the product name or category mentioned by the user in its singular form, and don't contain any clarifiers like latest, newest, cheapest, budget, premium, expensive or similar. The query is always taken from the latest topic, if there is a new topic a new query is started. */\n",
|
||||
"\t\tq: string,\n",
|
||||
@@ -187,36 +341,7 @@
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m{\"q\": \"shirt\", \"max_price\": null}\u001b[0m\n",
|
||||
"\u001b[36;1m\u001b[1;3m{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Cotton Shirt - Beige\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl359/3200280807/Children-s-Clothing/Burberry-Vintage-Check-Cotton-Shirt-Beige/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$196.30\",\"attributes\":[\"Material:Cotton,Elastane\",\"Color:Beige\",\"Model:Boy\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Somerton Check Shirt - Camel\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201112728/Clothing/Burberry-Somerton-Check-Shirt-Camel/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$450.00\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Man\",\"Color:Beige\"]},{\"name\":\"Calvin Klein Slim Fit Oxford Dress Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201839169/Clothing/Calvin-Klein-Slim-Fit-Oxford-Dress-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$24.91\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,White,Blue,Black\",\"Pattern:Solid Color\"]},{\"name\":\"Magellan Outdoors Laguna Madre Solid Short Sleeve Fishing Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3203102142/Clothing/Magellan-Outdoors-Laguna-Madre-Solid-Short-Sleeve-Fishing-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$19.99\",\"attributes\":[\"Material:Polyester,Nylon\",\"Target Group:Man\",\"Color:Red,Pink,White,Blue,Purple,Beige,Black,Green\",\"Properties:Pockets\",\"Pattern:Solid Color\"]}]}\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new APIResponderChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mYou are a helpful AI assistant trained to answer user queries from API responses.\n",
|
||||
"You attempted to call an API, which resulted in:\n",
|
||||
"API_RESPONSE: {\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Cotton Shirt - Beige\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl359/3200280807/Children-s-Clothing/Burberry-Vintage-Check-Cotton-Shirt-Beige/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$196.30\",\"attributes\":[\"Material:Cotton,Elastane\",\"Color:Beige\",\"Model:Boy\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Somerton Check Shirt - Camel\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201112728/Clothing/Burberry-Somerton-Check-Shirt-Camel/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$450.00\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Man\",\"Color:Beige\"]},{\"name\":\"Calvin Klein Slim Fit Oxford Dress Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201839169/Clothing/Calvin-Klein-Slim-Fit-Oxford-Dress-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$24.91\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,White,Blue,Black\",\"Pattern:Solid Color\"]},{\"name\":\"Magellan Outdoors Laguna Madre Solid Short Sleeve Fishing Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3203102142/Clothing/Magellan-Outdoors-Laguna-Madre-Solid-Short-Sleeve-Fishing-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$19.99\",\"attributes\":[\"Material:Polyester,Nylon\",\"Target Group:Man\",\"Color:Red,Pink,White,Blue,Purple,Beige,Black,Green\",\"Properties:Pockets\",\"Pattern:Solid Color\"]}]}\n",
|
||||
"\n",
|
||||
"USER_COMMENT: \"whats the most expensive shirt?\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"If the API_RESPONSE can answer the USER_COMMENT respond with the following markdown json block:\n",
|
||||
"Response: ```json\n",
|
||||
"{\"response\": \"Concise response to USER_COMMENT based on API_RESPONSE.\"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Otherwise respond with the following markdown json block:\n",
|
||||
"Response Error: ```json\n",
|
||||
"{\"response\": \"What you did and a concise statement of the resulting error. If it can be easily fixed, provide a suggestion.\"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You MUST respond as a markdown json code block.\n",
|
||||
"\n",
|
||||
"Begin:\n",
|
||||
"---\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001b[33;1m\u001b[1;3mThe most expensive shirt in this list is the 'Burberry Somerton Check Shirt - Camel' which is priced at $450.00\u001b[0m\n",
|
||||
"\u001b[36;1m\u001b[1;3m{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Cotton Shirt - Beige\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl359/3200280807/Children-s-Clothing/Burberry-Vintage-Check-Cotton-Shirt-Beige/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$229.02\",\"attributes\":[\"Material:Cotton,Elastane\",\"Color:Beige\",\"Model:Boy\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Stretch Cotton Twill Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3202342515/Clothing/Burberry-Vintage-Check-Stretch-Cotton-Twill-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$309.99\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Woman\",\"Color:Beige\",\"Properties:Stretch\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Somerton Check Shirt - Camel\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201112728/Clothing/Burberry-Somerton-Check-Shirt-Camel/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$450.00\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Man\",\"Color:Beige\"]},{\"name\":\"Magellan Outdoors Laguna Madre Solid Short Sleeve Fishing Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3203102142/Clothing/Magellan-Outdoors-Laguna-Madre-Solid-Short-Sleeve-Fishing-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$19.99\",\"attributes\":[\"Material:Polyester,Nylon\",\"Target Group:Man\",\"Color:Red,Pink,White,Blue,Purple,Beige,Black,Green\",\"Properties:Pockets\",\"Pattern:Solid Color\"]}]}\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -228,25 +353,26 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "c000295e",
|
||||
"execution_count": 12,
|
||||
"id": "4afc021a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['{\"q\": \"shirt\", \"max_price\": null}',\n",
|
||||
" '{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Cotton Shirt - Beige\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl359/3200280807/Children-s-Clothing/Burberry-Vintage-Check-Cotton-Shirt-Beige/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$196.30\",\"attributes\":[\"Material:Cotton,Elastane\",\"Color:Beige\",\"Model:Boy\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Somerton Check Shirt - Camel\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201112728/Clothing/Burberry-Somerton-Check-Shirt-Camel/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$450.00\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Man\",\"Color:Beige\"]},{\"name\":\"Calvin Klein Slim Fit Oxford Dress Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201839169/Clothing/Calvin-Klein-Slim-Fit-Oxford-Dress-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$24.91\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,White,Blue,Black\",\"Pattern:Solid Color\"]},{\"name\":\"Magellan Outdoors Laguna Madre Solid Short Sleeve Fishing Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3203102142/Clothing/Magellan-Outdoors-Laguna-Madre-Solid-Short-Sleeve-Fishing-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$19.99\",\"attributes\":[\"Material:Polyester,Nylon\",\"Target Group:Man\",\"Color:Red,Pink,White,Blue,Purple,Beige,Black,Green\",\"Properties:Pockets\",\"Pattern:Solid Color\"]}]}']"
|
||||
"{'instructions': 'whats the most expensive shirt?',\n",
|
||||
" 'output': '{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Cotton Shirt - Beige\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl359/3200280807/Children-s-Clothing/Burberry-Vintage-Check-Cotton-Shirt-Beige/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$229.02\",\"attributes\":[\"Material:Cotton,Elastane\",\"Color:Beige\",\"Model:Boy\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Stretch Cotton Twill Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3202342515/Clothing/Burberry-Vintage-Check-Stretch-Cotton-Twill-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$309.99\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Woman\",\"Color:Beige\",\"Properties:Stretch\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Somerton Check Shirt - Camel\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201112728/Clothing/Burberry-Somerton-Check-Shirt-Camel/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$450.00\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Man\",\"Color:Beige\"]},{\"name\":\"Magellan Outdoors Laguna Madre Solid Short Sleeve Fishing Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3203102142/Clothing/Magellan-Outdoors-Laguna-Madre-Solid-Short-Sleeve-Fishing-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$19.99\",\"attributes\":[\"Material:Polyester,Nylon\",\"Target Group:Man\",\"Color:Red,Pink,White,Blue,Purple,Beige,Black,Green\",\"Properties:Pockets\",\"Pattern:Solid Color\"]}]}',\n",
|
||||
" 'intermediate_steps': {'request_args': '{\"q\": \"shirt\", \"max_price\": null}',\n",
|
||||
" 'response_text': '{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Cotton Shirt - Beige\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl359/3200280807/Children-s-Clothing/Burberry-Vintage-Check-Cotton-Shirt-Beige/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$229.02\",\"attributes\":[\"Material:Cotton,Elastane\",\"Color:Beige\",\"Model:Boy\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Stretch Cotton Twill Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3202342515/Clothing/Burberry-Vintage-Check-Stretch-Cotton-Twill-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$309.99\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Woman\",\"Color:Beige\",\"Properties:Stretch\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Somerton Check Shirt - Camel\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201112728/Clothing/Burberry-Somerton-Check-Shirt-Camel/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$450.00\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Man\",\"Color:Beige\"]},{\"name\":\"Magellan Outdoors Laguna Madre Solid Short Sleeve Fishing Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3203102142/Clothing/Magellan-Outdoors-Laguna-Madre-Solid-Short-Sleeve-Fishing-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$19.99\",\"attributes\":[\"Material:Polyester,Nylon\",\"Target Group:Man\",\"Color:Red,Pink,White,Blue,Purple,Beige,Black,Green\",\"Properties:Pockets\",\"Pattern:Solid Color\"]}]}'}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# View intermediate steps\n",
|
||||
"output[\"intermediate_steps\"]"
|
||||
"output"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -448,7 +574,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -9,9 +9,9 @@
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# SQLite example\n",
|
||||
"# SQL Chain example\n",
|
||||
"\n",
|
||||
"This example showcases hooking up an LLM to answer questions over a database."
|
||||
"This example demonstrates the use of the `SQLDatabaseChain` for answering questions over a database."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -23,8 +23,10 @@
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"This uses the example Chinook database.\n",
|
||||
"To set it up follow the instructions on https://database.guide/2-sample-databases-sqlite/, placing the `.db` file in a notebooks folder at the root of this repository."
|
||||
"Under the hood, LangChain uses SQLAlchemy to connect to SQL databases. The `SQLDatabaseChain` can therefore be used with any SQL dialect supported by SQLAlchemy, such as MS SQL, MySQL, MariaDB, PostgreSQL, Oracle SQL, and SQLite. Please refer to the SQLAlchemy documentation for more information about requirements for connecting to your database. For example, a connection to MySQL requires an appropriate connector such as PyMySQL. A URI for a MySQL connection might look like: `mysql+pymysql://user:pass@some_mysql_db_address/db_name`\n",
|
||||
"\n",
|
||||
"This demonstration uses SQLite and the example Chinook database.\n",
|
||||
"To set it up, follow the instructions on https://database.guide/2-sample-databases-sqlite/, placing the `.db` file in a notebooks folder at the root of this repository."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -679,7 +681,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -106,7 +106,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Specify a column to be used identify the document source\n",
|
||||
"## Specify a column to be used identify the document source\n",
|
||||
"\n",
|
||||
"Use the `source_column` argument to specify a column to be set as the source for the document created from each row. Otherwise `file_path` will be used as the source for all documents created from the csv file.\n",
|
||||
"\n",
|
||||
|
||||
99
docs/modules/indexes/document_loaders/examples/diffbot.ipynb
Normal file
99
docs/modules/indexes/document_loaders/examples/diffbot.ipynb
Normal file
File diff suppressed because one or more lines are too long
Submodule docs/modules/indexes/document_loaders/examples/example_data/test_repo1 added at 7e525a3b91
@@ -8,4 +8,5 @@
|
||||
1/23/23, 3:02 AM - User 1: I thought you were selling the blue one!
|
||||
1/23/23, 3:18 AM - User 2: No Im sorry it was my mistake, the blue one is not for sale
|
||||
1/23/23, 3:19 AM - User 1: Oh no worries! Bye
|
||||
1/23/23, 3:19 AM - User 2: Bye!
|
||||
1/23/23, 3:19 AM - User 2: Bye!
|
||||
1/23/23, 3:22_AM - User 1: And let me know if anything changes
|
||||
192
docs/modules/indexes/document_loaders/examples/git.ipynb
Normal file
192
docs/modules/indexes/document_loaders/examples/git.ipynb
Normal file
@@ -0,0 +1,192 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Git\n",
|
||||
"\n",
|
||||
"This notebook shows how to load text files from Git repository."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load existing repository from disk"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from git import Repo\n",
|
||||
"\n",
|
||||
"repo = Repo.clone_from(\n",
|
||||
" \"https://github.com/hwchase17/langchain\", to_path=\"./example_data/test_repo1\"\n",
|
||||
")\n",
|
||||
"branch = repo.head.reference"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import GitLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GitLoader(repo_path=\"./example_data/test_repo1/\", branch=branch)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"len(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='.venv\\n.github\\n.git\\n.mypy_cache\\n.pytest_cache\\nDockerfile' metadata={'file_path': '.dockerignore', 'file_name': '.dockerignore', 'file_type': ''}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(data[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Clone repository from url"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import GitLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GitLoader(\n",
|
||||
" clone_url=\"https://github.com/hwchase17/langchain\",\n",
|
||||
" repo_path=\"./example_data/test_repo2/\",\n",
|
||||
" branch=\"master\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1074"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Filtering files to load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import GitLoader\n",
|
||||
"\n",
|
||||
"# eg. loading only python files\n",
|
||||
"loader = GitLoader(repo_path=\"./example_data/test_repo1/\", file_filter=lambda file_path: file_path.endswith(\".py\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -376,7 +376,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"id": "a5525fb0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -386,12 +386,115 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"id": "dac7ff68",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
"data = loader.load()[0] # entire pdf is loaded as a single Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "0ba9f645",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"soup = BeautifulSoup(data.page_content,'html.parser')\n",
|
||||
"content = soup.find_all('div')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "35304e21",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"cur_fs = None\n",
|
||||
"cur_text = ''\n",
|
||||
"snippets = [] # first collect all snippets that have the same font size\n",
|
||||
"for c in content:\n",
|
||||
" sp = c.find('span')\n",
|
||||
" if not sp:\n",
|
||||
" continue\n",
|
||||
" st = sp.get('style')\n",
|
||||
" if not st:\n",
|
||||
" continue\n",
|
||||
" fs = re.findall('font-size:(\\d+)px',st)\n",
|
||||
" if not fs:\n",
|
||||
" continue\n",
|
||||
" fs = int(fs[0])\n",
|
||||
" if not cur_fs:\n",
|
||||
" cur_fs = fs\n",
|
||||
" if fs == cur_fs:\n",
|
||||
" cur_text += c.text\n",
|
||||
" else:\n",
|
||||
" snippets.append((cur_text,cur_fs))\n",
|
||||
" cur_fs = fs\n",
|
||||
" cur_text = c.text\n",
|
||||
"snippets.append((cur_text,cur_fs))\n",
|
||||
"# Note: The above logic is very straightforward. One can also add more strategies such as removing duplicate snippets (as\n",
|
||||
"# headers/footers in a PDF appear on multiple pages so if we find duplicatess safe to assume that it is redundant info)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "af8adf2f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"cur_idx = -1\n",
|
||||
"semantic_snippets = []\n",
|
||||
"# Assumption: headings have higher font size than their respective content\n",
|
||||
"for s in snippets:\n",
|
||||
" # if current snippet's font size > previous section's heading => it is a new heading\n",
|
||||
" if not semantic_snippets or s[1] > semantic_snippets[cur_idx].metadata['heading_font']:\n",
|
||||
" metadata={'heading':s[0], 'content_font': 0, 'heading_font': s[1]}\n",
|
||||
" metadata.update(data.metadata)\n",
|
||||
" semantic_snippets.append(Document(page_content='',metadata=metadata))\n",
|
||||
" cur_idx += 1\n",
|
||||
" continue\n",
|
||||
" \n",
|
||||
" # if current snippet's font size <= previous section's content => content belongs to the same section (one can also create\n",
|
||||
" # a tree like structure for sub sections if needed but that may require some more thinking and may be data specific)\n",
|
||||
" if not semantic_snippets[cur_idx].metadata['content_font'] or s[1] <= semantic_snippets[cur_idx].metadata['content_font']:\n",
|
||||
" semantic_snippets[cur_idx].page_content += s[0]\n",
|
||||
" semantic_snippets[cur_idx].metadata['content_font'] = max(s[1], semantic_snippets[cur_idx].metadata['content_font'])\n",
|
||||
" continue\n",
|
||||
" \n",
|
||||
" # if current snippet's font size > previous section's content but less tha previous section's heading than also make a new \n",
|
||||
" # section (e.g. title of a pdf will have the highest font size but we don't want it to subsume all sections)\n",
|
||||
" metadata={'heading':s[0], 'content_font': 0, 'heading_font': s[1]}\n",
|
||||
" metadata.update(data.metadata)\n",
|
||||
" semantic_snippets.append(Document(page_content='',metadata=metadata))\n",
|
||||
" cur_idx += 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "db7f6674",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='Recently, various DL models and datasets have been developed for layout analysis\\ntasks. The dhSegment [22] utilizes fully convolutional networks [20] for segmen-\\ntation tasks on historical documents. Object detection-based methods like Faster\\nR-CNN [28] and Mask R-CNN [12] are used for identifying document elements [38]\\nand detecting tables [30, 26]. Most recently, Graph Neural Networks [29] have also\\nbeen used in table detection [27]. However, these models are usually implemented\\nindividually and there is no unified framework to load and use such models.\\nThere has been a surge of interest in creating open-source tools for document\\nimage processing: a search of document image analysis in Github leads to 5M\\nrelevant code pieces 6; yet most of them rely on traditional rule-based methods\\nor provide limited functionalities. The closest prior research to our work is the\\nOCR-D project7, which also tries to build a complete toolkit for DIA. However,\\nsimilar to the platform developed by Neudecker et al. [21], it is designed for\\nanalyzing historical documents, and provides no supports for recent DL models.\\nThe DocumentLayoutAnalysis project8 focuses on processing born-digital PDF\\ndocuments via analyzing the stored PDF data. Repositories like DeepLayout9\\nand Detectron2-PubLayNet10 are individual deep learning models trained on\\nlayout analysis datasets without support for the full DIA pipeline. The Document\\nAnalysis and Exploitation (DAE) platform [15] and the DeepDIVA project [2]\\naim to improve the reproducibility of DIA methods (or DL models), yet they\\nare not actively maintained. OCR engines like Tesseract [14], easyOCR11 and\\npaddleOCR12 usually do not come with comprehensive functionalities for other\\nDIA tasks like layout analysis.\\nRecent years have also seen numerous efforts to create libraries for promoting\\nreproducibility and reusability in the field of DL. Libraries like Dectectron2 [35],\\n6 The number shown is obtained by specifying the search type as ‘code’.\\n7 https://ocr-d.de/en/about\\n8 https://github.com/BobLd/DocumentLayoutAnalysis\\n9 https://github.com/leonlulu/DeepLayout\\n10 https://github.com/hpanwar08/detectron2\\n11 https://github.com/JaidedAI/EasyOCR\\n12 https://github.com/PaddlePaddle/PaddleOCR\\n4\\nZ. Shen et al.\\nFig. 1: The overall architecture of LayoutParser. For an input document image,\\nthe core LayoutParser library provides a set of off-the-shelf tools for layout\\ndetection, OCR, visualization, and storage, backed by a carefully designed layout\\ndata structure. LayoutParser also supports high level customization via efficient\\nlayout annotation and model training functions. These improve model accuracy\\non the target samples. The community platform enables the easy sharing of DIA\\nmodels and whole digitization pipelines to promote reusability and reproducibility.\\nA collection of detailed documentation, tutorials and exemplar projects make\\nLayoutParser easy to learn and use.\\nAllenNLP [8] and transformers [34] have provided the community with complete\\nDL-based support for developing and deploying models for general computer\\nvision and natural language processing problems. LayoutParser, on the other\\nhand, specializes specifically in DIA tasks. LayoutParser is also equipped with a\\ncommunity platform inspired by established model hubs such as Torch Hub [23]\\nand TensorFlow Hub [1]. It enables the sharing of pretrained models as well as\\nfull document processing pipelines that are unique to DIA tasks.\\nThere have been a variety of document data collections to facilitate the\\ndevelopment of DL models. Some examples include PRImA [3](magazine layouts),\\nPubLayNet [38](academic paper layouts), Table Bank [18](tables in academic\\npapers), Newspaper Navigator Dataset [16, 17](newspaper figure layouts) and\\nHJDataset [31](historical Japanese document layouts). A spectrum of models\\ntrained on these datasets are currently available in the LayoutParser model zoo\\nto support different use cases.\\n', metadata={'heading': '2 Related Work\\n', 'content_font': 9, 'heading_font': 11, 'source': 'example_data/layout-parser-paper.pdf'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"semantic_snippets[4]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -474,9 +577,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "langchain_dev",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "langchain_dev"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -0,0 +1,81 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1dc7df1d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Slack (Local Exported Zipfile)\n",
|
||||
"\n",
|
||||
"This notebook covers how to load documents from a Zipfile generated from a Slack export.\n",
|
||||
"\n",
|
||||
"In order to get this Slack export, follow these instructions:\n",
|
||||
"\n",
|
||||
"## 🧑 Instructions for ingesting your own dataset\n",
|
||||
"\n",
|
||||
"Export your Slack data. You can do this by going to your Workspace Management page and clicking the Import/Export option ({your_slack_domain}.slack.com/services/export). Then, choose the right date range and click `Start export`. Slack will send you an email and a DM when the export is ready.\n",
|
||||
"\n",
|
||||
"The download will produce a `.zip` file in your Downloads folder (or wherever your downloads can be found, depending on your OS configuration).\n",
|
||||
"\n",
|
||||
"Copy the path to the `.zip` file, and assign it as `LOCAL_ZIPFILE` below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "007c5cbf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import SlackDirectoryLoader "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a1caec59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Optionally set your Slack URL. This will give you proper URLs in the docs sources.\n",
|
||||
"SLACK_WORKSPACE_URL = \"https://xxx.slack.com\"\n",
|
||||
"LOCAL_ZIPFILE = \"\" # Paste the local paty to your Slack zip file here.\n",
|
||||
"\n",
|
||||
"loader = SlackDirectoryLoader(LOCAL_ZIPFILE, SLACK_WORKSPACE_URL)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b1c30ff7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -112,6 +112,79 @@
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "a2c1c79f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Playwright URL Loader\n",
|
||||
"\n",
|
||||
"This covers how to load HTML documents from a list of URLs using the `PlaywrightURLLoader`.\n",
|
||||
"\n",
|
||||
"As in the Selenium case, Playwright allows us to load pages that need JavaScript to render.\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To use the `PlaywrightURLLoader`, you will need to install `playwright` and `unstructured`. Additionally, you will need to install the Playwright Chromium browser:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "53158417",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install playwright\n",
|
||||
"!pip install \"playwright\"\n",
|
||||
"!pip install \"unstructured\"\n",
|
||||
"!playwright install"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0ab4e115",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PlaywrightURLLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ce5a9a0a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"urls = [\n",
|
||||
" \"https://www.youtube.com/watch?v=dQw4w9WgXcQ\",\n",
|
||||
" \"https://goo.gl/maps/NDSHwePEyaHMFGwh8\"\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2dc3e0bc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = PlaywrightURLLoader(urls=urls, remove_selectors=[\"header\", \"footer\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "10b79f80",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -130,7 +203,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -89,7 +89,7 @@
|
||||
"id": "150988e6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Loading multiple webpages\n",
|
||||
"## Loading multiple webpages\n",
|
||||
"\n",
|
||||
"You can also load multiple webpages at once by passing in a list of urls to the loader. This will return a list of documents in the same order as the urls passed in."
|
||||
]
|
||||
@@ -123,7 +123,7 @@
|
||||
"id": "641be294",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load multiple urls concurrently\n",
|
||||
"### Load multiple urls concurrently\n",
|
||||
"\n",
|
||||
"You can speed up the scraping process by scraping and parsing multiple urls concurrently.\n",
|
||||
"\n",
|
||||
|
||||
@@ -99,7 +99,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"loader = TextLoader('../state_of_the_union.txt')"
|
||||
"loader = TextLoader('../state_of_the_union.txt', encoding='utf8')"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "ab66dd43",
|
||||
"metadata": {},
|
||||
@@ -9,12 +10,12 @@
|
||||
"\n",
|
||||
"This notebook goes over how to use a retriever that under the hood uses Pinecone and Hybrid Search.\n",
|
||||
"\n",
|
||||
"The logic of this retriever is largely taken from [this blog post](https://www.pinecone.io/learn/hybrid-search-intro/)"
|
||||
"The logic of this retriever is taken from [this documentaion](https://docs.pinecone.io/docs/hybrid-search)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 75,
|
||||
"id": "393ac030",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -31,43 +32,61 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "15390796",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pinecone # !pip install pinecone-client\n",
|
||||
"\n",
|
||||
"pinecone.init(\n",
|
||||
" api_key=\"...\", # API key here\n",
|
||||
" environment=\"...\" # find next to api key in console\n",
|
||||
")\n",
|
||||
"# choose a name for your index\n",
|
||||
"index_name = \"...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "95d5d7f9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You should only have to do this part once."
|
||||
"You should only have to do this part once.\n",
|
||||
"\n",
|
||||
"Note: it's important to make sure that the \"context\" field that holds the document text in the metadata is not indexed. Currently you need to specify explicitly the fields you do want to index. For more information checkout Pinecone's [docs](https://docs.pinecone.io/docs/manage-indexes#selective-metadata-indexing)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 76,
|
||||
"id": "3b8f7697",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"WhoAmIResponse(username='load', user_label='label', projectname='load-test')"
|
||||
]
|
||||
},
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import pinecone\n",
|
||||
"\n",
|
||||
"api_key = os.getenv(\"PINECONE_API_KEY\") or \"PINECONE_API_KEY\"\n",
|
||||
"# find environment next to your API key in the Pinecone console\n",
|
||||
"env = os.getenv(\"PINECONE_ENVIRONMENT\") or \"PINECONE_ENVIRONMENT\"\n",
|
||||
"\n",
|
||||
"index_name = \"langchain-pinecone-hybrid-search\"\n",
|
||||
"\n",
|
||||
"pinecone.init(api_key=api_key, enviroment=env)\n",
|
||||
"pinecone.whoami()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"id": "cfa3a8d8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# create the index\n",
|
||||
" # create the index\n",
|
||||
"pinecone.create_index(\n",
|
||||
" name = index_name,\n",
|
||||
" dimension = 1536, # dimensionality of dense model\n",
|
||||
" metric = \"dotproduct\",\n",
|
||||
" pod_type = \"s1\"\n",
|
||||
" metric = \"dotproduct\", # sparse values supported only for dotproduct\n",
|
||||
" pod_type = \"s1\",\n",
|
||||
" metadata_config={\"indexed\": []} # see explaination above\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -81,7 +100,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 78,
|
||||
"id": "bcb3c8c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -90,18 +109,19 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "dbc025d6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get embeddings and tokenizers\n",
|
||||
"## Get embeddings and sparse encoders\n",
|
||||
"\n",
|
||||
"Embeddings are used for the dense vectors, tokenizer is used for the sparse vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 79,
|
||||
"id": "2f63c911",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -110,19 +130,51 @@
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "96bf8879",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To encode the text to sparse values you can either choose SPLADE or BM25. For out of domain tasks we recommend using BM25.\n",
|
||||
"\n",
|
||||
"For more information about the sparse encoders you can checkout pinecone-text library [docs](https://pinecone-io.github.io/pinecone-text/pinecone_text.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 80,
|
||||
"id": "c3f030e5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from transformers import BertTokenizerFast # !pip install transformers\n",
|
||||
"from pinecone_text.sparse import BM25Encoder\n",
|
||||
"# or from pinecone_text.sparse import SpladeEncoder if you wish to work with SPLADE\n",
|
||||
"\n",
|
||||
"# load bert tokenizer from huggingface\n",
|
||||
"tokenizer = BertTokenizerFast.from_pretrained(\n",
|
||||
" 'bert-base-uncased'\n",
|
||||
")"
|
||||
"# use default tf-idf values\n",
|
||||
"bm25_encoder = BM25Encoder().default()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "23601ddb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above code is using default tfids values. It's highly recommended to fit the tf-idf values to your own corpus. You can do it as follow:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"corpus = [\"foo\", \"bar\", \"world\", \"hello\"]\n",
|
||||
"\n",
|
||||
"# fit tf-idf values on your corpus\n",
|
||||
"bm25_encoder.fit(corpus)\n",
|
||||
"\n",
|
||||
"# store the values to a json file\n",
|
||||
"bm25_encoder.dump(\"bm25_values.json\")\n",
|
||||
"\n",
|
||||
"# load to your BM25Encoder object\n",
|
||||
"bm25_encoder = BM25Encoder().load(\"bm25_values.json\")\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -137,12 +189,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 81,
|
||||
"id": "ac77d835",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = PineconeHybridSearchRetriever(embeddings=embeddings, index=index, tokenizer=tokenizer)"
|
||||
"retriever = PineconeHybridSearchRetriever(embeddings=embeddings, sparse_encoder=bm25_encoder, index=index)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -157,23 +209,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 82,
|
||||
"id": "98b1c017",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "4d6f3ee7ca754d07a1a18d100d99e0cd",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/1 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████| 1/1 [00:02<00:00, 2.27s/it]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -192,7 +237,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 83,
|
||||
"id": "c0455218",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -202,7 +247,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 84,
|
||||
"id": "7dfa5c29",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -212,7 +257,7 @@
|
||||
"Document(page_content='foo', metadata={})"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 84,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -220,19 +265,11 @@
|
||||
"source": [
|
||||
"result[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "74bd9256",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -246,7 +283,12 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.9.13"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "7ec0d8babd8cabf695a1d94b1e586d626e046c9df609f6bad065d15d49f67f54"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
128
docs/modules/indexes/retrievers/examples/svm_retriever.ipynb
Normal file
128
docs/modules/indexes/retrievers/examples/svm_retriever.ipynb
Normal file
@@ -0,0 +1,128 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ab66dd43",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SVM Retriever\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use a retriever that under the hood uses an SVM using scikit-learn.\n",
|
||||
"\n",
|
||||
"Largely based on https://github.com/karpathy/randomfun/blob/master/knn_vs_svm.ipynb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "393ac030",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.retrievers import SVMRetriever\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a801b57c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install scikit-learn"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "aaf80e7f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create New Retriever with Texts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "98b1c017",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = SVMRetriever.from_texts([\"foo\", \"bar\", \"world\", \"hello\", \"foo bar\"], OpenAIEmbeddings())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "08437fa2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use Retriever\n",
|
||||
"\n",
|
||||
"We can now use the retriever!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "c0455218",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = retriever.get_relevant_documents(\"foo\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "7dfa5c29",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='foo', metadata={}),\n",
|
||||
" Document(page_content='foo bar', metadata={}),\n",
|
||||
" Document(page_content='hello', metadata={}),\n",
|
||||
" Document(page_content='world', metadata={})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "74bd9256",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -17,34 +17,36 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!python3 -m pip install openai deeplake"
|
||||
"!python3 -m pip install openai deeplake tiktoken"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import DeepLake\n",
|
||||
"from langchain.document_loaders import TextLoader"
|
||||
"from langchain.vectorstores import DeepLake"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"os.environ['OPENAI_API_KEY'] = 'sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'"
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -60,9 +62,38 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"mem://langchain loaded successfully.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Evaluating ingest: 100%|██████████| 1/1 [00:04<00:00\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='mem://langchain', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (4, 1536) float32 None \n",
|
||||
" ids text (4, 1) str None \n",
|
||||
" metadata json (4, 1) str None \n",
|
||||
" text text (4, 1) str None \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db = DeepLake.from_documents(docs, embeddings)\n",
|
||||
"\n",
|
||||
@@ -72,9 +103,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
@@ -89,9 +134,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/media/sdb/davit/.local/lib/python3.10/site-packages/langchain/llms/openai.py:624: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI`\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.llms import OpenAIChat\n",
|
||||
@@ -101,9 +155,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The president nominated Circuit Court of Appeals Judge Ketanji Brown Jackson for the United States Supreme Court and praised her qualifications and broad support from both Democrats and Republicans.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = 'What did the president say about Ketanji Brown Jackson'\n",
|
||||
"qa.run(query)"
|
||||
@@ -119,9 +184,43 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"mem://langchain loaded successfully.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Evaluating ingest: 100%|██████████| 1/1 [00:04<00:00\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='mem://langchain', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (42, 1536) float32 None \n",
|
||||
" ids text (42, 1) str None \n",
|
||||
" metadata json (42, 1) str None \n",
|
||||
" text text (42, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": []
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import random\n",
|
||||
"\n",
|
||||
@@ -133,9 +232,30 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████| 42/42 [00:00<00:00, 3456.17it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='Vice President Harris and I ran for office with a new economic vision for America. \\n\\nInvest in America. Educate Americans. Grow the workforce. Build the economy from the bottom up \\nand the middle out, not from the top down. \\n\\nBecause we know that when the middle class grows, the poor have a ladder up and the wealthy do very well. \\n\\nAmerica used to have the best roads, bridges, and airports on Earth. \\n\\nNow our infrastructure is ranked 13th in the world. \\n\\nWe won’t be able to compete for the jobs of the 21st Century if we don’t fix that. \\n\\nThat’s why it was so important to pass the Bipartisan Infrastructure Law—the most sweeping investment to rebuild America in history. \\n\\nThis was a bipartisan effort, and I want to thank the members of both parties who worked to make it happen. \\n\\nWe’re done talking about infrastructure weeks. \\n\\nWe’re going to have an infrastructure decade.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='It is going to transform America and put us on a path to win the economic competition of the 21st Century that we face with the rest of the world—particularly with China. \\n\\nAs I’ve told Xi Jinping, it is never a good bet to bet against the American people. \\n\\nWe’ll create good jobs for millions of Americans, modernizing roads, airports, ports, and waterways all across America. \\n\\nAnd we’ll do it all to withstand the devastating effects of the climate crisis and promote environmental justice. \\n\\nWe’ll build a national network of 500,000 electric vehicle charging stations, begin to replace poisonous lead pipes—so every child—and every American—has clean water to drink at home and at school, provide affordable high-speed internet for every American—urban, suburban, rural, and tribal communities. \\n\\n4,000 projects have already been announced. \\n\\nAnd tonight, I’m announcing that this year we will start fixing over 65,000 miles of highway and 1,500 bridges in disrepair.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db.similarity_search('What did the president say about Ketanji Brown Jackson', filter={'year': 2013})"
|
||||
]
|
||||
@@ -151,9 +271,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
|
||||
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='Tonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWe’ll also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLet’s pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLet’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2014})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db.similarity_search('What did the president say about Ketanji Brown Jackson?', distance_metric='cos')"
|
||||
]
|
||||
@@ -169,9 +303,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
|
||||
" Document(page_content='One was stationed at bases and breathing in toxic smoke from “burn pits” that incinerated wastes of war—medical and hazard material, jet fuel, and more. \\n\\nWhen they came home, many of the world’s fittest and best trained warriors were never the same. \\n\\nHeadaches. Numbness. Dizziness. \\n\\nA cancer that would put them in a flag-draped coffin. \\n\\nI know. \\n\\nOne of those soldiers was my son Major Beau Biden. \\n\\nWe don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. \\n\\nBut I’m committed to finding out everything we can. \\n\\nCommitted to military families like Danielle Robinson from Ohio. \\n\\nThe widow of Sergeant First Class Heath Robinson. \\n\\nHe was born a soldier. Army National Guard. Combat medic in Kosovo and Iraq. \\n\\nStationed near Baghdad, just yards from burn pits the size of football fields. \\n\\nHeath’s widow Danielle is here with us tonight. They loved going to Ohio State football games. He loved building Legos with their daughter.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2014}),\n",
|
||||
" Document(page_content='As Ohio Senator Sherrod Brown says, “It’s time to bury the label “Rust Belt.” \\n\\nIt’s time. \\n\\nBut with all the bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up with the bills. \\n\\nInflation is robbing them of the gains they might otherwise feel. \\n\\nI get it. That’s why my top priority is getting prices under control. \\n\\nLook, our economy roared back faster than most predicted, but the pandemic meant that businesses had a hard time hiring enough workers to keep up production in their factories. \\n\\nThe pandemic also disrupted global supply chains. \\n\\nWhen factories close, it takes longer to make goods and get them from the warehouse to the store, and prices go up. \\n\\nLook at cars. \\n\\nLast year, there weren’t enough semiconductors to make all the cars that people wanted to buy. \\n\\nAnd guess what, prices of automobiles went up. \\n\\nSo—we have a choice. \\n\\nOne way to fight inflation is to drive down wages and make Americans poorer.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
|
||||
" Document(page_content='We can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \\n\\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \\n\\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \\n\\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \\n\\nI’ve worked on these issues a long time. \\n\\nI know what works: Investing in crime preventionand community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db.max_marginal_relevance_search('What did the president say about Ketanji Brown Jackson?')"
|
||||
]
|
||||
@@ -187,21 +335,87 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!activeloop login -t <token>"
|
||||
"os.environ['ACTIVELOOP_TOKEN'] = getpass.getpass('Activeloop Token:')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Your Deep Lake dataset has been successfully created!\n",
|
||||
"The dataset is private so make sure you are logged in!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\\"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/linkedin\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"hub://davitbun/linkedin loaded successfully.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Evaluating ingest: 100%|██████████| 1/1 [00:23<00:00\n",
|
||||
"/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://davitbun/linkedin', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (42, 1536) float32 None \n",
|
||||
" ids text (42, 1) str None \n",
|
||||
" metadata json (42, 1) str None \n",
|
||||
" text text (42, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Embed and store the texts\n",
|
||||
"dataset_path = \"hub://{username}/{dataset_name}\" # could be also ./local/path (much faster locally), s3://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
|
||||
"dataset_path = f\"hub://{USERNAME}/{DATASET_NAME}\" # could be also ./local/path (much faster locally), s3://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
|
||||
"\n",
|
||||
"embedding = OpenAIEmbeddings()\n",
|
||||
"vectordb = DeepLake.from_documents(documents=docs, embedding=embedding, dataset_path=dataset_path)"
|
||||
@@ -209,9 +423,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = db.similarity_search(query)\n",
|
||||
@@ -220,11 +448,35 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://davitbun/linkedin', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (42, 1536) float32 None \n",
|
||||
" ids text (42, 1) str None \n",
|
||||
" metadata json (42, 1) str None \n",
|
||||
" text text (42, 1) str None \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vectordb.ds.summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vectordb.ds.summary()"
|
||||
"embeddings = vectordb.ds.embedding.numpy()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -232,9 +484,7 @@
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = vectordb.ds.embedding.numpy()"
|
||||
]
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
"In order to add a memory with an external message store to an agent we are going to do the following steps:\n",
|
||||
"\n",
|
||||
"1. We are going to create a `RedisChatMessageHistory` to connect to an external database to store the messages in.\n",
|
||||
"2. We are going to create an `LLMChain` useing that chat history as memory.\n",
|
||||
"2. We are going to create an `LLMChain` using that chat history as memory.\n",
|
||||
"3. We are going to use that `LLMChain` to create a custom Agent.\n",
|
||||
"\n",
|
||||
"For the purposes of this exercise, we are going to create a simple custom Agent that has access to a search tool and utilizes the `ConversationBufferMemory` class."
|
||||
|
||||
368
docs/modules/memory/types/vectorstore_retriever_memory.ipynb
Normal file
368
docs/modules/memory/types/vectorstore_retriever_memory.ipynb
Normal file
@@ -0,0 +1,368 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ff4be5f3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# VectorStore-Backed Memory\n",
|
||||
"\n",
|
||||
"`VectorStoreRetrieverMemory` stores memories in a VectorDB and queries the top-K most \"salient\" docs every time it is called.\n",
|
||||
"\n",
|
||||
"This differs from most of the other Memory classes in that it doesn't explicitly track the order of interactions.\n",
|
||||
"\n",
|
||||
"In this case, the \"docs\" are previous conversation snippets. This can be useful to refer to relevant pieces of information that the AI was told earlier in the conversation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "da3384db",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.memory import VectorStoreRetrieverMemory\n",
|
||||
"from langchain.chains import ConversationChain\n",
|
||||
"from langchain.prompts import PromptTemplate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c2e7abdf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialize your VectorStore\n",
|
||||
"\n",
|
||||
"Depending on the store you choose, this step may look different. Consult the relevant VectorStore documentation for more details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "eef56f65",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import faiss\n",
|
||||
"\n",
|
||||
"from langchain.docstore import InMemoryDocstore\n",
|
||||
"from langchain.vectorstores import FAISS\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"embedding_size = 1536 # Dimensions of the OpenAIEmbeddings\n",
|
||||
"index = faiss.IndexFlatL2(embedding_size)\n",
|
||||
"embedding_fn = OpenAIEmbeddings().embed_query\n",
|
||||
"vectorstore = FAISS(embedding_fn, index, InMemoryDocstore({}), {})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8f4bdf92",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create your the VectorStoreRetrieverMemory\n",
|
||||
"\n",
|
||||
"The memory object is instantiated from any VectorStoreRetriever."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "e00d4938",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# In actual usage, you would set `k` to be a higher value, but we use k=1 to show that\n",
|
||||
"# the vector lookup still returns the semantically relevant information\n",
|
||||
"retriever = vectorstore.as_retriever(search_kwargs=dict(k=1))\n",
|
||||
"memory = VectorStoreRetrieverMemory(retriever=retriever)\n",
|
||||
"\n",
|
||||
"# When added to an agent, the memory object can save pertinent information from conversations or used tools\n",
|
||||
"memory.save_context({\"input\": \"My favorite food is pizza\"}, {\"output\": \"thats good to know\"})\n",
|
||||
"memory.save_context({\"input\": \"My favorite sport is soccer\"}, {\"output\": \"...\"})\n",
|
||||
"memory.save_context({\"input\": \"I don't the Celtics\"}, {\"output\": \"ok\"}) # "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "2fe28a28",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"input: My favorite sport is soccer\n",
|
||||
"output: ...\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Notice the first result returned is the memory pertaining to tax help, which the language model deems more semantically relevant\n",
|
||||
"# to a 1099 than the other documents, despite them both containing numbers.\n",
|
||||
"print(memory.load_memory_variables({\"prompt\": \"what sport should i watch?\"})[\"history\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a6d2569f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using in a chain\n",
|
||||
"Let's walk through an example, again setting `verbose=True` so we can see the prompt."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "ebd68c10",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Relevant pieces of previous conversation:\n",
|
||||
"input: My favorite food is pizza\n",
|
||||
"output: thats good to know\n",
|
||||
"\n",
|
||||
"(You do not need to use these pieces of information if not relevant)\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"Human: Hi, my name is Perry, what's up?\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" Hi Perry, I'm doing well. How about you?\""
|
||||
]
|
||||
},
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0) # Can be any valid LLM\n",
|
||||
"_DEFAULT_TEMPLATE = \"\"\"The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Relevant pieces of previous conversation:\n",
|
||||
"{history}\n",
|
||||
"\n",
|
||||
"(You do not need to use these pieces of information if not relevant)\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"Human: {input}\n",
|
||||
"AI:\"\"\"\n",
|
||||
"PROMPT = PromptTemplate(\n",
|
||||
" input_variables=[\"history\", \"input\"], template=_DEFAULT_TEMPLATE\n",
|
||||
")\n",
|
||||
"conversation_with_summary = ConversationChain(\n",
|
||||
" llm=llm, \n",
|
||||
" prompt=PROMPT,\n",
|
||||
" # We set a very low max_token_limit for the purposes of testing.\n",
|
||||
" memory=memory,\n",
|
||||
" verbose=True\n",
|
||||
")\n",
|
||||
"conversation_with_summary.predict(input=\"Hi, my name is Perry, what's up?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "86207a61",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Relevant pieces of previous conversation:\n",
|
||||
"input: My favorite sport is soccer\n",
|
||||
"output: ...\n",
|
||||
"\n",
|
||||
"(You do not need to use these pieces of information if not relevant)\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"Human: what's my favorite sport?\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' You told me earlier that your favorite sport is soccer.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Here, the basketball related content is surfaced\n",
|
||||
"conversation_with_summary.predict(input=\"what's my favorite sport?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"id": "8c669db1",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Relevant pieces of previous conversation:\n",
|
||||
"input: My favorite food is pizza\n",
|
||||
"output: thats good to know\n",
|
||||
"\n",
|
||||
"(You do not need to use these pieces of information if not relevant)\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"Human: Whats my favorite food\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' You said your favorite food is pizza.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Even though the language model is stateless, since relavent memory is fetched, it can \"reason\" about the time.\n",
|
||||
"# Timestamping memories and data is useful in general to let the agent determine temporal relevance\n",
|
||||
"conversation_with_summary.predict(input=\"Whats my favorite food\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"id": "8c09a239",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Relevant pieces of previous conversation:\n",
|
||||
"input: Hi, my name is Perry, what's up?\n",
|
||||
"response: Hi Perry, I'm doing well. How about you?\n",
|
||||
"\n",
|
||||
"(You do not need to use these pieces of information if not relevant)\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"Human: What's my name?\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Your name is Perry.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# The memories from the conversation are automatically stored,\n",
|
||||
"# since this query best matches the introduction chat above,\n",
|
||||
"# the agent is able to 'remember' the user's name.\n",
|
||||
"conversation_with_summary.predict(input=\"What's my name?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "df27c7dc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -115,7 +115,7 @@
|
||||
"id": "a2d76826",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**The above request should now appear on your [PromptLayer dashboard](https://ww.promptlayer.com).**"
|
||||
"**The above request should now appear on your [PromptLayer dashboard](https://www.promptlayer.com).**"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -60,14 +60,14 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 30.7 ms, sys: 18.6 ms, total: 49.3 ms\n",
|
||||
"Wall time: 791 ms\n"
|
||||
"CPU times: user 14.2 ms, sys: 4.9 ms, total: 19.1 ms\n",
|
||||
"Wall time: 1.1 s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was...two tired!\""
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
@@ -91,14 +91,14 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 80 µs, sys: 0 ns, total: 80 µs\n",
|
||||
"Wall time: 83.9 µs\n"
|
||||
"CPU times: user 162 µs, sys: 7 µs, total: 169 µs\n",
|
||||
"Wall time: 175 µs\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was...two tired!\""
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
@@ -252,6 +252,249 @@
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "684eab55",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## GPTCache\n",
|
||||
"\n",
|
||||
"We can use [GPTCache](https://github.com/zilliztech/GPTCache) for exact match caching OR to cache results based on semantic similarity\n",
|
||||
"\n",
|
||||
"Let's first start with an example of exact match"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "14a82124",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import gptcache\n",
|
||||
"from gptcache.processor.pre import get_prompt\n",
|
||||
"from gptcache.manager.factory import get_data_manager\n",
|
||||
"from langchain.cache import GPTCache\n",
|
||||
"\n",
|
||||
"# Avoid multiple caches using the same file, causing different llm model caches to affect each other\n",
|
||||
"i = 0\n",
|
||||
"file_prefix = \"data_map\"\n",
|
||||
"\n",
|
||||
"def init_gptcache_map(cache_obj: gptcache.Cache):\n",
|
||||
" global i\n",
|
||||
" cache_path = f'{file_prefix}_{i}.txt'\n",
|
||||
" cache_obj.init(\n",
|
||||
" pre_embedding_func=get_prompt,\n",
|
||||
" data_manager=get_data_manager(data_path=cache_path),\n",
|
||||
" )\n",
|
||||
" i += 1\n",
|
||||
"\n",
|
||||
"langchain.llm_cache = GPTCache(init_gptcache_map)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "9e4ecfd1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 8.6 ms, sys: 3.82 ms, total: 12.4 ms\n",
|
||||
"Wall time: 881 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# The first time, it is not yet in cache, so it should take longer\n",
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "c98bbe3b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 286 µs, sys: 21 µs, total: 307 µs\n",
|
||||
"Wall time: 316 µs\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# The second time it is, so it goes faster\n",
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "502b6076",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's now show an example of similarity caching"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "b3c663bb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import gptcache\n",
|
||||
"from gptcache.processor.pre import get_prompt\n",
|
||||
"from gptcache.manager.factory import get_data_manager\n",
|
||||
"from langchain.cache import GPTCache\n",
|
||||
"from gptcache.manager import get_data_manager, CacheBase, VectorBase\n",
|
||||
"from gptcache import Cache\n",
|
||||
"from gptcache.embedding import Onnx\n",
|
||||
"from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation\n",
|
||||
"\n",
|
||||
"# Avoid multiple caches using the same file, causing different llm model caches to affect each other\n",
|
||||
"i = 0\n",
|
||||
"file_prefix = \"data_map\"\n",
|
||||
"llm_cache = Cache()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def init_gptcache_map(cache_obj: gptcache.Cache):\n",
|
||||
" global i\n",
|
||||
" cache_path = f'{file_prefix}_{i}.txt'\n",
|
||||
" onnx = Onnx()\n",
|
||||
" cache_base = CacheBase('sqlite')\n",
|
||||
" vector_base = VectorBase('faiss', dimension=onnx.dimension)\n",
|
||||
" data_manager = get_data_manager(cache_base, vector_base, max_size=10, clean_size=2)\n",
|
||||
" cache_obj.init(\n",
|
||||
" pre_embedding_func=get_prompt,\n",
|
||||
" embedding_func=onnx.to_embeddings,\n",
|
||||
" data_manager=data_manager,\n",
|
||||
" similarity_evaluation=SearchDistanceEvaluation(),\n",
|
||||
" )\n",
|
||||
" i += 1\n",
|
||||
"\n",
|
||||
"langchain.llm_cache = GPTCache(init_gptcache_map)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "8c273ced",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 1.01 s, sys: 153 ms, total: 1.16 s\n",
|
||||
"Wall time: 2.49 s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# The first time, it is not yet in cache, so it should take longer\n",
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "93e21a5f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 745 ms, sys: 13.2 ms, total: 758 ms\n",
|
||||
"Wall time: 136 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# This is an exact match, so it finds it in the cache\n",
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "c4bb024b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 737 ms, sys: 7.79 ms, total: 745 ms\n",
|
||||
"Wall time: 135 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# This is not an exact match, but semantically within distance so it hits!\n",
|
||||
"llm(\"Tell me joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "934943dc",
|
||||
|
||||
@@ -43,22 +43,18 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Total Tokens: 39\n",
|
||||
"Prompt Tokens: 4\n",
|
||||
"Completion Tokens: 35\n",
|
||||
"Tokens Used: 42\n",
|
||||
"\tPrompt Tokens: 4\n",
|
||||
"\tCompletion Tokens: 38\n",
|
||||
"Successful Requests: 1\n",
|
||||
"Total Cost (USD): $0.0007800000000000001\n"
|
||||
"Total Cost (USD): $0.00084\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"with get_openai_callback() as cb:\n",
|
||||
" result = llm(\"Tell me a joke\")\n",
|
||||
" print(f\"Total Tokens: {cb.total_tokens}\")\n",
|
||||
" print(f\"Prompt Tokens: {cb.prompt_tokens}\")\n",
|
||||
" print(f\"Completion Tokens: {cb.completion_tokens}\")\n",
|
||||
" print(f\"Successful Requests: {cb.successful_requests}\")\n",
|
||||
" print(f\"Total Cost (USD): ${cb.total_cost}\")"
|
||||
" print(cb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -186,7 +186,7 @@
|
||||
"source": [
|
||||
"**Number of Tokens:** You can also estimate how many tokens a piece of text will be in that model. This is useful because models have a context length (and cost more for more tokens), which means you need to be aware of how long the text you are passing in is.\n",
|
||||
"\n",
|
||||
"Notice that by default the tokens are estimated using [tiktoken](https://github.com/openai/tiktoken) (except for legacy version <3.8, where a HuggingFace tokenizer is used)"
|
||||
"Notice that by default the tokens are estimated using [tiktoken](https://github.com/openai/tiktoken) (except for legacy version <3.8, where a Hugging Face tokenizer is used)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 41,
|
||||
"id": "3acf0069",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -20,7 +20,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The Seattle Seahawks won the Super Bowl in 2010. Justin Beiber was born in 2010. The final answer: Seattle Seahawks.\n"
|
||||
"The FIFA World Cup is a football tournament that is played every 4 years. The year 1994 was the 44th FIFA World Cup. The final answer: Brazil.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -33,7 +33,7 @@
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=HuggingFaceHub(repo_id=\"google/flan-t5-xl\", model_kwargs={\"temperature\":0, \"max_length\":64}))\n",
|
||||
"\n",
|
||||
"question = \"What NFL team won the Super Bowl in the year Justin Beiber was born?\"\n",
|
||||
"question = \"Who won the FIFA World Cup in the year 1994? \"\n",
|
||||
"\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
@@ -41,7 +41,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ae4559c7",
|
||||
"id": "843a3837",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -63,7 +63,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.8.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
"\n",
|
||||
"And then one optional one:\n",
|
||||
"\n",
|
||||
"- `parse_with_prompt(str) -> Any`: A method which takes in a string (assumed to be the response from a language model) and a prompt (assumed to the prompt that generated such a response) and parses it into some structure. The prompt is largely provided in the event the OutputParser wants to retry or fix the output in some way, and needs information from the prompt to do so.\n",
|
||||
"- `parse_with_prompt(str, PromptValue) -> Any`: A method which takes in a string (assumed to be the response from a language model) and a prompt (assumed to the prompt that generated such a response) and parses it into some structure. The prompt is largely provided in the event the OutputParser wants to retry or fix the output in some way, and needs information from the prompt to do so.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Below we go over the main type of output parser, the `PydanticOutputParser`. See the `examples` folder for other options."
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
"from langchain.llms import BaseLLM\n",
|
||||
"from langchain.vectorstores.base import VectorStore\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"from langchain.chains.base import Chain\n"
|
||||
"from langchain.chains.base import Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -73,6 +73,7 @@
|
||||
"embeddings_model = OpenAIEmbeddings()\n",
|
||||
"# Initialize the vectorstore as empty\n",
|
||||
"import faiss\n",
|
||||
"\n",
|
||||
"embedding_size = 1536\n",
|
||||
"index = faiss.IndexFlatL2(embedding_size)\n",
|
||||
"vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})"
|
||||
@@ -105,7 +106,7 @@
|
||||
" def from_llm(cls, llm: BaseLLM, verbose: bool = True) -> LLMChain:\n",
|
||||
" \"\"\"Get the response parser.\"\"\"\n",
|
||||
" task_creation_template = (\n",
|
||||
" \"You are an task creation AI that uses the result of an execution agent\"\n",
|
||||
" \"You are a task creation AI that uses the result of an execution agent\"\n",
|
||||
" \" to create new tasks with the following objective: {objective},\"\n",
|
||||
" \" The last completed task has the result: {result}.\"\n",
|
||||
" \" This result was based on this task description: {task_description}.\"\n",
|
||||
@@ -116,7 +117,12 @@
|
||||
" )\n",
|
||||
" prompt = PromptTemplate(\n",
|
||||
" template=task_creation_template,\n",
|
||||
" input_variables=[\"result\", \"task_description\", \"incomplete_tasks\", \"objective\"],\n",
|
||||
" input_variables=[\n",
|
||||
" \"result\",\n",
|
||||
" \"task_description\",\n",
|
||||
" \"incomplete_tasks\",\n",
|
||||
" \"objective\",\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)"
|
||||
]
|
||||
@@ -135,7 +141,7 @@
|
||||
" def from_llm(cls, llm: BaseLLM, verbose: bool = True) -> LLMChain:\n",
|
||||
" \"\"\"Get the response parser.\"\"\"\n",
|
||||
" task_prioritization_template = (\n",
|
||||
" \"You are an task prioritization AI tasked with cleaning the formatting of and reprioritizing\"\n",
|
||||
" \"You are a task prioritization AI tasked with cleaning the formatting of and reprioritizing\"\n",
|
||||
" \" the following tasks: {task_names}.\"\n",
|
||||
" \" Consider the ultimate objective of your team: {objective}.\"\n",
|
||||
" \" Do not remove any tasks. Return the result as a numbered list, like:\"\n",
|
||||
@@ -147,7 +153,7 @@
|
||||
" template=task_prioritization_template,\n",
|
||||
" input_variables=[\"task_names\", \"next_task_id\", \"objective\"],\n",
|
||||
" )\n",
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)\n"
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -173,7 +179,7 @@
|
||||
" template=execution_template,\n",
|
||||
" input_variables=[\"objective\", \"context\", \"task\"],\n",
|
||||
" )\n",
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)\n"
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -193,11 +199,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_next_task(task_creation_chain: LLMChain, result: Dict, task_description: str, task_list: List[str], objective: str) -> List[Dict]:\n",
|
||||
"def get_next_task(\n",
|
||||
" task_creation_chain: LLMChain,\n",
|
||||
" result: Dict,\n",
|
||||
" task_description: str,\n",
|
||||
" task_list: List[str],\n",
|
||||
" objective: str,\n",
|
||||
") -> List[Dict]:\n",
|
||||
" \"\"\"Get the next task.\"\"\"\n",
|
||||
" incomplete_tasks = \", \".join(task_list)\n",
|
||||
" response = task_creation_chain.run(result=result, task_description=task_description, incomplete_tasks=incomplete_tasks, objective=objective)\n",
|
||||
" new_tasks = response.split('\\n')\n",
|
||||
" response = task_creation_chain.run(\n",
|
||||
" result=result,\n",
|
||||
" task_description=task_description,\n",
|
||||
" incomplete_tasks=incomplete_tasks,\n",
|
||||
" objective=objective,\n",
|
||||
" )\n",
|
||||
" new_tasks = response.split(\"\\n\")\n",
|
||||
" return [{\"task_name\": task_name} for task_name in new_tasks if task_name.strip()]"
|
||||
]
|
||||
},
|
||||
@@ -208,12 +225,19 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def prioritize_tasks(task_prioritization_chain: LLMChain, this_task_id: int, task_list: List[Dict], objective: str) -> List[Dict]:\n",
|
||||
"def prioritize_tasks(\n",
|
||||
" task_prioritization_chain: LLMChain,\n",
|
||||
" this_task_id: int,\n",
|
||||
" task_list: List[Dict],\n",
|
||||
" objective: str,\n",
|
||||
") -> List[Dict]:\n",
|
||||
" \"\"\"Prioritize tasks.\"\"\"\n",
|
||||
" task_names = [t[\"task_name\"] for t in task_list]\n",
|
||||
" next_task_id = int(this_task_id) + 1\n",
|
||||
" response = task_prioritization_chain.run(task_names=task_names, next_task_id=next_task_id, objective=objective)\n",
|
||||
" new_tasks = response.split('\\n')\n",
|
||||
" response = task_prioritization_chain.run(\n",
|
||||
" task_names=task_names, next_task_id=next_task_id, objective=objective\n",
|
||||
" )\n",
|
||||
" new_tasks = response.split(\"\\n\")\n",
|
||||
" prioritized_task_list = []\n",
|
||||
" for task_string in new_tasks:\n",
|
||||
" if not task_string.strip():\n",
|
||||
@@ -239,9 +263,12 @@
|
||||
" if not results:\n",
|
||||
" return []\n",
|
||||
" sorted_results, _ = zip(*sorted(results, key=lambda x: x[1], reverse=True))\n",
|
||||
" return [str(item.metadata['task']) for item in sorted_results]\n",
|
||||
" return [str(item.metadata[\"task\"]) for item in sorted_results]\n",
|
||||
"\n",
|
||||
"def execute_task(vectorstore, execution_chain: LLMChain, objective: str, task: str, k: int = 5) -> str:\n",
|
||||
"\n",
|
||||
"def execute_task(\n",
|
||||
" vectorstore, execution_chain: LLMChain, objective: str, task: str, k: int = 5\n",
|
||||
") -> str:\n",
|
||||
" \"\"\"Execute a task.\"\"\"\n",
|
||||
" context = _get_top_tasks(vectorstore, query=objective, k=k)\n",
|
||||
" return execution_chain.run(objective=objective, context=context, task=task)"
|
||||
@@ -254,7 +281,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"class BabyAGI(Chain, BaseModel):\n",
|
||||
" \"\"\"Controller model for the BabyAGI agent.\"\"\"\n",
|
||||
"\n",
|
||||
@@ -265,9 +291,10 @@
|
||||
" task_id_counter: int = Field(1)\n",
|
||||
" vectorstore: VectorStore = Field(init=False)\n",
|
||||
" max_iterations: Optional[int] = None\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" class Config:\n",
|
||||
" \"\"\"Configuration for this pydantic object.\"\"\"\n",
|
||||
"\n",
|
||||
" arbitrary_types_allowed = True\n",
|
||||
"\n",
|
||||
" def add_task(self, task: Dict):\n",
|
||||
@@ -285,18 +312,18 @@
|
||||
" def print_task_result(self, result: str):\n",
|
||||
" print(\"\\033[93m\\033[1m\" + \"\\n*****TASK RESULT*****\\n\" + \"\\033[0m\\033[0m\")\n",
|
||||
" print(result)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def input_keys(self) -> List[str]:\n",
|
||||
" return [\"objective\"]\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def output_keys(self) -> List[str]:\n",
|
||||
" return []\n",
|
||||
"\n",
|
||||
" def _call(self, inputs: Dict[str, Any]) -> Dict[str, Any]:\n",
|
||||
" \"\"\"Run the agent.\"\"\"\n",
|
||||
" objective = inputs['objective']\n",
|
||||
" objective = inputs[\"objective\"]\n",
|
||||
" first_task = inputs.get(\"first_task\", \"Make a todo list\")\n",
|
||||
" self.add_task({\"task_id\": 1, \"task_name\": first_task})\n",
|
||||
" num_iters = 0\n",
|
||||
@@ -325,7 +352,11 @@
|
||||
"\n",
|
||||
" # Step 4: Create new tasks and reprioritize task list\n",
|
||||
" new_tasks = get_next_task(\n",
|
||||
" self.task_creation_chain, result, task[\"task_name\"], [t[\"task_name\"] for t in self.task_list], objective\n",
|
||||
" self.task_creation_chain,\n",
|
||||
" result,\n",
|
||||
" task[\"task_name\"],\n",
|
||||
" [t[\"task_name\"] for t in self.task_list],\n",
|
||||
" objective,\n",
|
||||
" )\n",
|
||||
" for new_task in new_tasks:\n",
|
||||
" self.task_id_counter += 1\n",
|
||||
@@ -333,27 +364,26 @@
|
||||
" self.add_task(new_task)\n",
|
||||
" self.task_list = deque(\n",
|
||||
" prioritize_tasks(\n",
|
||||
" self.task_prioritization_chain, this_task_id, list(self.task_list), objective\n",
|
||||
" self.task_prioritization_chain,\n",
|
||||
" this_task_id,\n",
|
||||
" list(self.task_list),\n",
|
||||
" objective,\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" num_iters += 1\n",
|
||||
" if self.max_iterations is not None and num_iters == self.max_iterations:\n",
|
||||
" print(\"\\033[91m\\033[1m\" + \"\\n*****TASK ENDING*****\\n\" + \"\\033[0m\\033[0m\")\n",
|
||||
" print(\n",
|
||||
" \"\\033[91m\\033[1m\" + \"\\n*****TASK ENDING*****\\n\" + \"\\033[0m\\033[0m\"\n",
|
||||
" )\n",
|
||||
" break\n",
|
||||
" return {}\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def from_llm(\n",
|
||||
" cls,\n",
|
||||
" llm: BaseLLM,\n",
|
||||
" vectorstore: VectorStore,\n",
|
||||
" verbose: bool = False,\n",
|
||||
" **kwargs\n",
|
||||
" cls, llm: BaseLLM, vectorstore: VectorStore, verbose: bool = False, **kwargs\n",
|
||||
" ) -> \"BabyAGI\":\n",
|
||||
" \"\"\"Initialize the BabyAGI Controller.\"\"\"\n",
|
||||
" task_creation_chain = TaskCreationChain.from_llm(\n",
|
||||
" llm, verbose=verbose\n",
|
||||
" )\n",
|
||||
" task_creation_chain = TaskCreationChain.from_llm(llm, verbose=verbose)\n",
|
||||
" task_prioritization_chain = TaskPrioritizationChain.from_llm(\n",
|
||||
" llm, verbose=verbose\n",
|
||||
" )\n",
|
||||
@@ -363,7 +393,7 @@
|
||||
" task_prioritization_chain=task_prioritization_chain,\n",
|
||||
" execution_chain=execution_chain,\n",
|
||||
" vectorstore=vectorstore,\n",
|
||||
" **kwargs\n",
|
||||
" **kwargs,\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
@@ -405,14 +435,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Logging of LLMChains\n",
|
||||
"verbose=False\n",
|
||||
"verbose = False\n",
|
||||
"# If None, will keep on going forever\n",
|
||||
"max_iterations: Optional[int] = 3\n",
|
||||
"baby_agi = BabyAGI.from_llm(\n",
|
||||
" llm=llm,\n",
|
||||
" vectorstore=vectorstore,\n",
|
||||
" verbose=verbose,\n",
|
||||
" max_iterations=max_iterations\n",
|
||||
" llm=llm, vectorstore=vectorstore, verbose=verbose, max_iterations=max_iterations\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
"from langchain.llms import BaseLLM\n",
|
||||
"from langchain.vectorstores.base import VectorStore\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"from langchain.chains.base import Chain\n"
|
||||
"from langchain.chains.base import Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -54,7 +54,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install faiss-cpu > /dev/null%pip install google-search-results > /dev/nullfrom langchain.vectorstores import FAISS\n",
|
||||
"%pip install faiss-cpu > /dev/null\n",
|
||||
"%pip install google-search-results > /dev/null\n",
|
||||
"from langchain.vectorstores import FAISS\n",
|
||||
"from langchain.docstore import InMemoryDocstore"
|
||||
]
|
||||
},
|
||||
@@ -69,6 +71,7 @@
|
||||
"embeddings_model = OpenAIEmbeddings()\n",
|
||||
"# Initialize the vectorstore as empty\n",
|
||||
"import faiss\n",
|
||||
"\n",
|
||||
"embedding_size = 1536\n",
|
||||
"index = faiss.IndexFlatL2(embedding_size)\n",
|
||||
"vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})"
|
||||
@@ -115,7 +118,12 @@
|
||||
" )\n",
|
||||
" prompt = PromptTemplate(\n",
|
||||
" template=task_creation_template,\n",
|
||||
" input_variables=[\"result\", \"task_description\", \"incomplete_tasks\", \"objective\"],\n",
|
||||
" input_variables=[\n",
|
||||
" \"result\",\n",
|
||||
" \"task_description\",\n",
|
||||
" \"incomplete_tasks\",\n",
|
||||
" \"objective\",\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)"
|
||||
]
|
||||
@@ -146,7 +154,7 @@
|
||||
" template=task_prioritization_template,\n",
|
||||
" input_variables=[\"task_names\", \"next_task_id\", \"objective\"],\n",
|
||||
" )\n",
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)\n"
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -158,20 +166,23 @@
|
||||
"source": [
|
||||
"from langchain.agents import ZeroShotAgent, Tool, AgentExecutor\n",
|
||||
"from langchain import OpenAI, SerpAPIWrapper, LLMChain\n",
|
||||
"todo_prompt = PromptTemplate.from_template(\"You are a planner who is an expert at coming up with a todo list for a given objective. Come up with a todo list for this objective: {objective}\")\n",
|
||||
"\n",
|
||||
"todo_prompt = PromptTemplate.from_template(\n",
|
||||
" \"You are a planner who is an expert at coming up with a todo list for a given objective. Come up with a todo list for this objective: {objective}\"\n",
|
||||
")\n",
|
||||
"todo_chain = LLMChain(llm=OpenAI(temperature=0), prompt=todo_prompt)\n",
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name = \"Search\",\n",
|
||||
" name=\"Search\",\n",
|
||||
" func=search.run,\n",
|
||||
" description=\"useful for when you need to answer questions about current events\"\n",
|
||||
" description=\"useful for when you need to answer questions about current events\",\n",
|
||||
" ),\n",
|
||||
" Tool(\n",
|
||||
" name = \"TODO\",\n",
|
||||
" name=\"TODO\",\n",
|
||||
" func=todo_chain.run,\n",
|
||||
" description=\"useful for when you need to come up with todo lists. Input: an objective to create a todo list for. Output: a todo list for that objective. Please be very clear what the objective is!\"\n",
|
||||
" )\n",
|
||||
" description=\"useful for when you need to come up with todo lists. Input: an objective to create a todo list for. Output: a todo list for that objective. Please be very clear what the objective is!\",\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -179,10 +190,10 @@
|
||||
"suffix = \"\"\"Question: {task}\n",
|
||||
"{agent_scratchpad}\"\"\"\n",
|
||||
"prompt = ZeroShotAgent.create_prompt(\n",
|
||||
" tools, \n",
|
||||
" prefix=prefix, \n",
|
||||
" suffix=suffix, \n",
|
||||
" input_variables=[\"objective\", \"task\", \"context\",\"agent_scratchpad\"]\n",
|
||||
" tools,\n",
|
||||
" prefix=prefix,\n",
|
||||
" suffix=suffix,\n",
|
||||
" input_variables=[\"objective\", \"task\", \"context\", \"agent_scratchpad\"],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -203,11 +214,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_next_task(task_creation_chain: LLMChain, result: Dict, task_description: str, task_list: List[str], objective: str) -> List[Dict]:\n",
|
||||
"def get_next_task(\n",
|
||||
" task_creation_chain: LLMChain,\n",
|
||||
" result: Dict,\n",
|
||||
" task_description: str,\n",
|
||||
" task_list: List[str],\n",
|
||||
" objective: str,\n",
|
||||
") -> List[Dict]:\n",
|
||||
" \"\"\"Get the next task.\"\"\"\n",
|
||||
" incomplete_tasks = \", \".join(task_list)\n",
|
||||
" response = task_creation_chain.run(result=result, task_description=task_description, incomplete_tasks=incomplete_tasks, objective=objective)\n",
|
||||
" new_tasks = response.split('\\n')\n",
|
||||
" response = task_creation_chain.run(\n",
|
||||
" result=result,\n",
|
||||
" task_description=task_description,\n",
|
||||
" incomplete_tasks=incomplete_tasks,\n",
|
||||
" objective=objective,\n",
|
||||
" )\n",
|
||||
" new_tasks = response.split(\"\\n\")\n",
|
||||
" return [{\"task_name\": task_name} for task_name in new_tasks if task_name.strip()]"
|
||||
]
|
||||
},
|
||||
@@ -218,12 +240,19 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def prioritize_tasks(task_prioritization_chain: LLMChain, this_task_id: int, task_list: List[Dict], objective: str) -> List[Dict]:\n",
|
||||
"def prioritize_tasks(\n",
|
||||
" task_prioritization_chain: LLMChain,\n",
|
||||
" this_task_id: int,\n",
|
||||
" task_list: List[Dict],\n",
|
||||
" objective: str,\n",
|
||||
") -> List[Dict]:\n",
|
||||
" \"\"\"Prioritize tasks.\"\"\"\n",
|
||||
" task_names = [t[\"task_name\"] for t in task_list]\n",
|
||||
" next_task_id = int(this_task_id) + 1\n",
|
||||
" response = task_prioritization_chain.run(task_names=task_names, next_task_id=next_task_id, objective=objective)\n",
|
||||
" new_tasks = response.split('\\n')\n",
|
||||
" response = task_prioritization_chain.run(\n",
|
||||
" task_names=task_names, next_task_id=next_task_id, objective=objective\n",
|
||||
" )\n",
|
||||
" new_tasks = response.split(\"\\n\")\n",
|
||||
" prioritized_task_list = []\n",
|
||||
" for task_string in new_tasks:\n",
|
||||
" if not task_string.strip():\n",
|
||||
@@ -249,9 +278,12 @@
|
||||
" if not results:\n",
|
||||
" return []\n",
|
||||
" sorted_results, _ = zip(*sorted(results, key=lambda x: x[1], reverse=True))\n",
|
||||
" return [str(item.metadata['task']) for item in sorted_results]\n",
|
||||
" return [str(item.metadata[\"task\"]) for item in sorted_results]\n",
|
||||
"\n",
|
||||
"def execute_task(vectorstore, execution_chain: LLMChain, objective: str, task: str, k: int = 5) -> str:\n",
|
||||
"\n",
|
||||
"def execute_task(\n",
|
||||
" vectorstore, execution_chain: LLMChain, objective: str, task: str, k: int = 5\n",
|
||||
") -> str:\n",
|
||||
" \"\"\"Execute a task.\"\"\"\n",
|
||||
" context = _get_top_tasks(vectorstore, query=objective, k=k)\n",
|
||||
" return execution_chain.run(objective=objective, context=context, task=task)"
|
||||
@@ -264,7 +296,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"class BabyAGI(Chain, BaseModel):\n",
|
||||
" \"\"\"Controller model for the BabyAGI agent.\"\"\"\n",
|
||||
"\n",
|
||||
@@ -275,9 +306,10 @@
|
||||
" task_id_counter: int = Field(1)\n",
|
||||
" vectorstore: VectorStore = Field(init=False)\n",
|
||||
" max_iterations: Optional[int] = None\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" class Config:\n",
|
||||
" \"\"\"Configuration for this pydantic object.\"\"\"\n",
|
||||
"\n",
|
||||
" arbitrary_types_allowed = True\n",
|
||||
"\n",
|
||||
" def add_task(self, task: Dict):\n",
|
||||
@@ -295,18 +327,18 @@
|
||||
" def print_task_result(self, result: str):\n",
|
||||
" print(\"\\033[93m\\033[1m\" + \"\\n*****TASK RESULT*****\\n\" + \"\\033[0m\\033[0m\")\n",
|
||||
" print(result)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def input_keys(self) -> List[str]:\n",
|
||||
" return [\"objective\"]\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def output_keys(self) -> List[str]:\n",
|
||||
" return []\n",
|
||||
"\n",
|
||||
" def _call(self, inputs: Dict[str, Any]) -> Dict[str, Any]:\n",
|
||||
" \"\"\"Run the agent.\"\"\"\n",
|
||||
" objective = inputs['objective']\n",
|
||||
" objective = inputs[\"objective\"]\n",
|
||||
" first_task = inputs.get(\"first_task\", \"Make a todo list\")\n",
|
||||
" self.add_task({\"task_id\": 1, \"task_name\": first_task})\n",
|
||||
" num_iters = 0\n",
|
||||
@@ -335,7 +367,11 @@
|
||||
"\n",
|
||||
" # Step 4: Create new tasks and reprioritize task list\n",
|
||||
" new_tasks = get_next_task(\n",
|
||||
" self.task_creation_chain, result, task[\"task_name\"], [t[\"task_name\"] for t in self.task_list], objective\n",
|
||||
" self.task_creation_chain,\n",
|
||||
" result,\n",
|
||||
" task[\"task_name\"],\n",
|
||||
" [t[\"task_name\"] for t in self.task_list],\n",
|
||||
" objective,\n",
|
||||
" )\n",
|
||||
" for new_task in new_tasks:\n",
|
||||
" self.task_id_counter += 1\n",
|
||||
@@ -343,40 +379,41 @@
|
||||
" self.add_task(new_task)\n",
|
||||
" self.task_list = deque(\n",
|
||||
" prioritize_tasks(\n",
|
||||
" self.task_prioritization_chain, this_task_id, list(self.task_list), objective\n",
|
||||
" self.task_prioritization_chain,\n",
|
||||
" this_task_id,\n",
|
||||
" list(self.task_list),\n",
|
||||
" objective,\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" num_iters += 1\n",
|
||||
" if self.max_iterations is not None and num_iters == self.max_iterations:\n",
|
||||
" print(\"\\033[91m\\033[1m\" + \"\\n*****TASK ENDING*****\\n\" + \"\\033[0m\\033[0m\")\n",
|
||||
" print(\n",
|
||||
" \"\\033[91m\\033[1m\" + \"\\n*****TASK ENDING*****\\n\" + \"\\033[0m\\033[0m\"\n",
|
||||
" )\n",
|
||||
" break\n",
|
||||
" return {}\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def from_llm(\n",
|
||||
" cls,\n",
|
||||
" llm: BaseLLM,\n",
|
||||
" vectorstore: VectorStore,\n",
|
||||
" verbose: bool = False,\n",
|
||||
" **kwargs\n",
|
||||
" cls, llm: BaseLLM, vectorstore: VectorStore, verbose: bool = False, **kwargs\n",
|
||||
" ) -> \"BabyAGI\":\n",
|
||||
" \"\"\"Initialize the BabyAGI Controller.\"\"\"\n",
|
||||
" task_creation_chain = TaskCreationChain.from_llm(\n",
|
||||
" llm, verbose=verbose\n",
|
||||
" )\n",
|
||||
" task_creation_chain = TaskCreationChain.from_llm(llm, verbose=verbose)\n",
|
||||
" task_prioritization_chain = TaskPrioritizationChain.from_llm(\n",
|
||||
" llm, verbose=verbose\n",
|
||||
" )\n",
|
||||
" llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
|
||||
" tool_names = [tool.name for tool in tools]\n",
|
||||
" agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names)\n",
|
||||
" agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)\n",
|
||||
" agent_executor = AgentExecutor.from_agent_and_tools(\n",
|
||||
" agent=agent, tools=tools, verbose=True\n",
|
||||
" )\n",
|
||||
" return cls(\n",
|
||||
" task_creation_chain=task_creation_chain,\n",
|
||||
" task_prioritization_chain=task_prioritization_chain,\n",
|
||||
" execution_chain=agent_executor,\n",
|
||||
" vectorstore=vectorstore,\n",
|
||||
" **kwargs\n",
|
||||
" **kwargs,\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
@@ -418,14 +455,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Logging of LLMChains\n",
|
||||
"verbose=False\n",
|
||||
"verbose = False\n",
|
||||
"# If None, will keep on going forever\n",
|
||||
"max_iterations: Optional[int] = 3\n",
|
||||
"baby_agi = BabyAGI.from_llm(\n",
|
||||
" llm=llm,\n",
|
||||
" vectorstore=vectorstore,\n",
|
||||
" verbose=verbose,\n",
|
||||
" max_iterations=max_iterations\n",
|
||||
" llm=llm, vectorstore=vectorstore, verbose=verbose, max_iterations=max_iterations\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
538
docs/use_cases/agents/custom_agent_with_plugin_retrieval.ipynb
Normal file
538
docs/use_cases/agents/custom_agent_with_plugin_retrieval.ipynb
Normal file
@@ -0,0 +1,538 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ba5f8741",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Custom Agent with PlugIn Retrieval\n",
|
||||
"\n",
|
||||
"This notebook combines two concepts in order to build a custom agent that can interact with AI Plugins:\n",
|
||||
"\n",
|
||||
"1. [Custom Agent with Retrieval](../../modules/agents/agents/custom_agent_with_plugin_retrieval.ipynb): This introduces the concept of retrieving many tools, which is useful when trying to work with arbitrarily many plugins.\n",
|
||||
"2. [Natural Language API Chains](../../modules/chains/examples/openapi.ipynb): This creates Natural Language wrappers around OpenAPI endpoints. This is useful because (1) plugins use OpenAPI endpoints under the hood, (2) wrapping them in an NLAChain allows the router agent to call it more easily.\n",
|
||||
"\n",
|
||||
"The novel idea introduced in this notebook is the idea of using retrieval to select not the tools explicitly, but the set of OpenAPI specs to use. We can then generate tools from those OpenAPI specs. The use case for this is when trying to get agents to use plugins. It may be more efficient to choose plugins first, then the endpoints, rather than the endpoints directly. This is because the plugins may contain more useful information for selection."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fea4812c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up environment\n",
|
||||
"\n",
|
||||
"Do necessary imports, etc."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "9af9734e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser\n",
|
||||
"from langchain.prompts import StringPromptTemplate\n",
|
||||
"from langchain import OpenAI, SerpAPIWrapper, LLMChain\n",
|
||||
"from typing import List, Union\n",
|
||||
"from langchain.schema import AgentAction, AgentFinish\n",
|
||||
"from langchain.agents.agent_toolkits import NLAToolkit\n",
|
||||
"from langchain.tools.plugin import AIPlugin\n",
|
||||
"import re"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2f91d8b4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup LLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a1a3b59c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6df0253f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up plugins\n",
|
||||
"\n",
|
||||
"Load and index plugins"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "becda2a1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"urls = [\n",
|
||||
" \"https://datasette.io/.well-known/ai-plugin.json\",\n",
|
||||
" \"https://api.speak.com/.well-known/ai-plugin.json\",\n",
|
||||
" \"https://www.wolframalpha.com/.well-known/ai-plugin.json\",\n",
|
||||
" \"https://www.zapier.com/.well-known/ai-plugin.json\",\n",
|
||||
" \"https://www.klarna.com/.well-known/ai-plugin.json\",\n",
|
||||
" \"https://www.joinmilo.com/.well-known/ai-plugin.json\",\n",
|
||||
" \"https://slack.com/.well-known/ai-plugin.json\",\n",
|
||||
" \"https://schooldigger.com/.well-known/ai-plugin.json\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"AI_PLUGINS = [AIPlugin.from_url(url) for url in urls]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "17362717",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tool Retriever\n",
|
||||
"\n",
|
||||
"We will use a vectorstore to create embeddings for each tool description. Then, for an incoming query we can create embeddings for that query and do a similarity search for relevant tools."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "77c4be4b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.vectorstores import FAISS\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.schema import Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9092a158",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.2 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
|
||||
"Attempting to load a Swagger 2.0 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"docs = [\n",
|
||||
" Document(page_content=plugin.description_for_model, \n",
|
||||
" metadata={\"plugin_name\": plugin.name_for_model}\n",
|
||||
" )\n",
|
||||
" for plugin in AI_PLUGINS\n",
|
||||
"]\n",
|
||||
"vector_store = FAISS.from_documents(docs, embeddings)\n",
|
||||
"toolkits_dict = {plugin.name_for_model: \n",
|
||||
" NLAToolkit.from_llm_and_ai_plugin(llm, plugin) \n",
|
||||
" for plugin in AI_PLUGINS}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "735a7566",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = vector_store.as_retriever()\n",
|
||||
"\n",
|
||||
"def get_tools(query):\n",
|
||||
" # Get documents, which contain the Plugins to use\n",
|
||||
" docs = retriever.get_relevant_documents(query)\n",
|
||||
" # Get the toolkits, one for each plugin\n",
|
||||
" tool_kits = [toolkits_dict[d.metadata[\"plugin_name\"]] for d in docs]\n",
|
||||
" # Get the tools: a separate NLAChain for each endpoint\n",
|
||||
" tools = []\n",
|
||||
" for tk in tool_kits:\n",
|
||||
" tools.extend(tk.nla_tools)\n",
|
||||
" return tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7699afd7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can now test this retriever to see if it seems to work."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "425f2886",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['Milo.askMilo',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.search_all_actions',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.preview_a_zap',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.get_configuration_link',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.list_exposed_actions',\n",
|
||||
" 'SchoolDigger_API_V2.0.Autocomplete_GetSchools',\n",
|
||||
" 'SchoolDigger_API_V2.0.Districts_GetAllDistricts2',\n",
|
||||
" 'SchoolDigger_API_V2.0.Districts_GetDistrict2',\n",
|
||||
" 'SchoolDigger_API_V2.0.Rankings_GetSchoolRank2',\n",
|
||||
" 'SchoolDigger_API_V2.0.Rankings_GetRank_District',\n",
|
||||
" 'SchoolDigger_API_V2.0.Schools_GetAllSchools20',\n",
|
||||
" 'SchoolDigger_API_V2.0.Schools_GetSchool20',\n",
|
||||
" 'Speak.translate',\n",
|
||||
" 'Speak.explainPhrase',\n",
|
||||
" 'Speak.explainTask']"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = get_tools(\"What could I do today with my kiddo\")\n",
|
||||
"[t.name for t in tools]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "3aa88768",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['Open_AI_Klarna_product_Api.productsUsingGET',\n",
|
||||
" 'Milo.askMilo',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.search_all_actions',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.preview_a_zap',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.get_configuration_link',\n",
|
||||
" 'Zapier_Natural_Language_Actions_(NLA)_API_(Dynamic)_-_Beta.list_exposed_actions',\n",
|
||||
" 'SchoolDigger_API_V2.0.Autocomplete_GetSchools',\n",
|
||||
" 'SchoolDigger_API_V2.0.Districts_GetAllDistricts2',\n",
|
||||
" 'SchoolDigger_API_V2.0.Districts_GetDistrict2',\n",
|
||||
" 'SchoolDigger_API_V2.0.Rankings_GetSchoolRank2',\n",
|
||||
" 'SchoolDigger_API_V2.0.Rankings_GetRank_District',\n",
|
||||
" 'SchoolDigger_API_V2.0.Schools_GetAllSchools20',\n",
|
||||
" 'SchoolDigger_API_V2.0.Schools_GetSchool20']"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = get_tools(\"what shirts can i buy?\")\n",
|
||||
"[t.name for t in tools]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2e7a075c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prompt Template\n",
|
||||
"\n",
|
||||
"The prompt template is pretty standard, because we're not actually changing that much logic in the actual prompt template, but rather we are just changing how retrieval is done."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "339b1bb8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set up the base template\n",
|
||||
"template = \"\"\"Answer the following questions as best you can, but speaking as a pirate might speak. You have access to the following tools:\n",
|
||||
"\n",
|
||||
"{tools}\n",
|
||||
"\n",
|
||||
"Use the following format:\n",
|
||||
"\n",
|
||||
"Question: the input question you must answer\n",
|
||||
"Thought: you should always think about what to do\n",
|
||||
"Action: the action to take, should be one of [{tool_names}]\n",
|
||||
"Action Input: the input to the action\n",
|
||||
"Observation: the result of the action\n",
|
||||
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
|
||||
"Thought: I now know the final answer\n",
|
||||
"Final Answer: the final answer to the original input question\n",
|
||||
"\n",
|
||||
"Begin! Remember to speak as a pirate when giving your final answer. Use lots of \"Arg\"s\n",
|
||||
"\n",
|
||||
"Question: {input}\n",
|
||||
"{agent_scratchpad}\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1583acdc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The custom prompt template now has the concept of a tools_getter, which we call on the input to select the tools to use"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "fd969d31",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Callable\n",
|
||||
"# Set up a prompt template\n",
|
||||
"class CustomPromptTemplate(StringPromptTemplate):\n",
|
||||
" # The template to use\n",
|
||||
" template: str\n",
|
||||
" ############## NEW ######################\n",
|
||||
" # The list of tools available\n",
|
||||
" tools_getter: Callable\n",
|
||||
" \n",
|
||||
" def format(self, **kwargs) -> str:\n",
|
||||
" # Get the intermediate steps (AgentAction, Observation tuples)\n",
|
||||
" # Format them in a particular way\n",
|
||||
" intermediate_steps = kwargs.pop(\"intermediate_steps\")\n",
|
||||
" thoughts = \"\"\n",
|
||||
" for action, observation in intermediate_steps:\n",
|
||||
" thoughts += action.log\n",
|
||||
" thoughts += f\"\\nObservation: {observation}\\nThought: \"\n",
|
||||
" # Set the agent_scratchpad variable to that value\n",
|
||||
" kwargs[\"agent_scratchpad\"] = thoughts\n",
|
||||
" ############## NEW ######################\n",
|
||||
" tools = self.tools_getter(kwargs[\"input\"])\n",
|
||||
" # Create a tools variable from the list of tools provided\n",
|
||||
" kwargs[\"tools\"] = \"\\n\".join([f\"{tool.name}: {tool.description}\" for tool in tools])\n",
|
||||
" # Create a list of tool names for the tools provided\n",
|
||||
" kwargs[\"tool_names\"] = \", \".join([tool.name for tool in tools])\n",
|
||||
" return self.template.format(**kwargs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "798ef9fb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt = CustomPromptTemplate(\n",
|
||||
" template=template,\n",
|
||||
" tools_getter=get_tools,\n",
|
||||
" # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically\n",
|
||||
" # This includes the `intermediate_steps` variable because that is needed\n",
|
||||
" input_variables=[\"input\", \"intermediate_steps\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ef3a1af3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Output Parser\n",
|
||||
"\n",
|
||||
"The output parser is unchanged from the previous notebook, since we are not changing anything about the output format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "7c6fe0d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class CustomOutputParser(AgentOutputParser):\n",
|
||||
" \n",
|
||||
" def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:\n",
|
||||
" # Check if agent should finish\n",
|
||||
" if \"Final Answer:\" in llm_output:\n",
|
||||
" return AgentFinish(\n",
|
||||
" # Return values is generally always a dictionary with a single `output` key\n",
|
||||
" # It is not recommended to try anything else at the moment :)\n",
|
||||
" return_values={\"output\": llm_output.split(\"Final Answer:\")[-1].strip()},\n",
|
||||
" log=llm_output,\n",
|
||||
" )\n",
|
||||
" # Parse out the action and action input\n",
|
||||
" regex = r\"Action\\s*\\d*\\s*:(.*?)\\nAction\\s*\\d*\\s*Input\\s*\\d*\\s*:[\\s]*(.*)\"\n",
|
||||
" match = re.search(regex, llm_output, re.DOTALL)\n",
|
||||
" if not match:\n",
|
||||
" raise ValueError(f\"Could not parse LLM output: `{llm_output}`\")\n",
|
||||
" action = match.group(1).strip()\n",
|
||||
" action_input = match.group(2)\n",
|
||||
" # Return the action and action input\n",
|
||||
" return AgentAction(tool=action, tool_input=action_input.strip(\" \").strip('\"'), log=llm_output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "d278706a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"output_parser = CustomOutputParser()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "170587b1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up LLM, stop sequence, and the agent\n",
|
||||
"\n",
|
||||
"Also the same as the previous notebook"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "f9d4c374",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "9b1cc2a2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# LLM chain consisting of the LLM and a prompt\n",
|
||||
"llm_chain = LLMChain(llm=llm, prompt=prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "e4f5092f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tool_names = [tool.name for tool in tools]\n",
|
||||
"agent = LLMSingleActionAgent(\n",
|
||||
" llm_chain=llm_chain, \n",
|
||||
" output_parser=output_parser,\n",
|
||||
" stop=[\"\\nObservation:\"], \n",
|
||||
" allowed_tools=tool_names\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "aa8a5326",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use the Agent\n",
|
||||
"\n",
|
||||
"Now we can use it!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "490604e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "653b1617",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find a product API\n",
|
||||
"Action: Open_AI_Klarna_product_Api.productsUsingGET\n",
|
||||
"Action Input: shirts\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3mI found 10 shirts from the API response. They range in price from $9.99 to $450.00 and come in a variety of materials, colors, and patterns.\u001b[0m\u001b[32;1m\u001b[1;3m I now know what shirts I can buy\n",
|
||||
"Final Answer: Arg, I found 10 shirts from the API response. They range in price from $9.99 to $450.00 and come in a variety of materials, colors, and patterns.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Arg, I found 10 shirts from the API response. They range in price from $9.99 to $450.00 and come in a variety of materials, colors, and patterns.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"what shirts can i buy?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2481ee76",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "18784188d7ecd866c0586ac068b02361a6896dc3a29b64f5cc957f09c590acef"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -23,3 +23,4 @@ Query Understanding: GPT-4 processes user queries, grasping the context and extr
|
||||
|
||||
The full tutorial is available below.
|
||||
- [Twitter the-algorithm codebase analysis with Deep Lake](code/twitter-the-algorithm-analysis-deeplake.ipynb): A notebook walking through how to parse github source code and run queries conversation.
|
||||
- [LangChain codebase analysis with Deep Lake](code/code-analysis-deeplake.ipynb): A notebook walking through how to analyze and do question answering over THIS code base.
|
||||
|
||||
644
docs/use_cases/code/code-analysis-deeplake.ipynb
Normal file
644
docs/use_cases/code/code-analysis-deeplake.ipynb
Normal file
@@ -0,0 +1,644 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Use LangChain, GPT and Deep Lake to work with code base\n",
|
||||
"In this tutorial, we are going to use Langchain + Deep Lake with GPT to analyze the code base of the LangChain itself. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Design"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"1. Prepare data:\n",
|
||||
" 1. Upload all python project files using the `langchain.document_loaders.TextLoader`. We will call these files the **documents**.\n",
|
||||
" 2. Split all documents to chunks using the `langchain.text_splitter.CharacterTextSplitter`.\n",
|
||||
" 3. Embed chunks and upload them into the DeepLake using `langchain.embeddings.openai.OpenAIEmbeddings` and `langchain.vectorstores.DeepLake`\n",
|
||||
"2. Question-Answering:\n",
|
||||
" 1. Build a chain from `langchain.chat_models.ChatOpenAI` and `langchain.chains.ConversationalRetrievalChain`\n",
|
||||
" 2. Prepare questions.\n",
|
||||
" 3. Get answers running the chain.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Implementation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"### Integration preparations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We need to set up keys for external services and install necessary python libraries."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!python3 -m pip install --upgrade langchain deeplake openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set up OpenAI embeddings, Deep Lake multi-modal vector store api and authenticate. \n",
|
||||
"\n",
|
||||
"For full documentation of Deep Lake please follow https://docs.activeloop.ai/ and API reference https://docs.deeplake.ai/en/latest/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"os.environ['OPENAI_API_KEY'] = getpass()\n",
|
||||
"# Please manually enter OpenAI Key"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Authenticate into Deep Lake if you want to create your own dataset and publish it. You can get an API key from the platform at [app.activeloop.ai](https://app.activeloop.ai)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"os.environ['ACTIVELOOP_TOKEN'] = getpass.getpass('Activeloop Token:')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare data "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load all repository files. Here we assume this notebook is downloaded as the part of the langchain fork and we work with the python files of the `langchain` repo.\n",
|
||||
"\n",
|
||||
"If you want to use files from different repo, change `root_dir` to the root dir of your repo."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1147\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"root_dir = '../../../..'\n",
|
||||
"\n",
|
||||
"docs = []\n",
|
||||
"for dirpath, dirnames, filenames in os.walk(root_dir):\n",
|
||||
" for file in filenames:\n",
|
||||
" if file.endswith('.py') and '/.venv/' not in dirpath:\n",
|
||||
" try: \n",
|
||||
" loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')\n",
|
||||
" docs.extend(loader.load_and_split())\n",
|
||||
" except Exception as e: \n",
|
||||
" pass\n",
|
||||
"print(f'{len(docs)}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then, chunk the files"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Created a chunk of size 1620, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1213, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1263, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1448, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1120, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1148, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1826, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1260, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1195, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2147, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1410, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1269, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1030, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1046, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1024, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1026, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1285, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1370, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1031, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1999, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1029, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1120, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1033, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1143, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1416, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2482, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1890, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1418, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1848, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1069, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2369, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1045, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1501, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1208, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1950, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1283, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1414, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1304, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1224, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1060, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2461, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1099, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1178, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1449, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1345, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 3359, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2248, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1589, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2104, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1505, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1387, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1215, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1240, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1635, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1075, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2180, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1791, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1555, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1082, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1225, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1287, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1085, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1117, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1966, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1150, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1285, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1150, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1585, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1208, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1267, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1542, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1183, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2424, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1017, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1304, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1379, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1324, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1205, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1056, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1195, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 3608, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1058, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1075, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1217, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1109, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1440, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1046, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1220, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1403, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1241, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1427, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1049, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1580, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1565, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1131, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1425, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1054, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1027, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2559, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1028, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1382, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1888, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1475, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1652, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1891, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1899, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1021, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1085, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1854, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1672, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2537, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1251, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1734, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1642, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1376, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1253, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1642, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1419, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1438, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1427, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1684, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1760, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1157, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2504, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1082, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2268, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1784, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1311, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2972, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1144, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1825, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1508, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2901, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1715, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1062, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1206, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1102, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1184, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1002, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1065, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1871, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1754, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2413, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1771, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2054, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2000, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2061, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1066, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1419, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1368, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1008, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1227, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1745, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2296, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1083, which is longer than the specified 1000\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"3477\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_documents(docs)\n",
|
||||
"print(f\"{len(texts)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then embed chunks and upload them to the DeepLake.\n",
|
||||
"\n",
|
||||
"This can take several minutes. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', document_model_name='text-embedding-ada-002', query_model_name='text-embedding-ada-002', embedding_ctx_length=8191, openai_api_key=None, openai_organization=None, allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=6)"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.vectorstores import DeepLake\n",
|
||||
"\n",
|
||||
"db = DeepLake.from_documents(texts, embeddings, dataset_path=f\"hub://{DEEPLAKE_ACCOUNT_NAME}/langchain-code\")\n",
|
||||
"db"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Question Answering\n",
|
||||
"First load the dataset, construct the retriever, then construct the Conversational Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/user_name/langchain-code\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"hub://user_name/langchain-code loaded successfully.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Deep Lake Dataset in hub://user_name/langchain-code already exists, loading from the storage\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://user_name/langchain-code', read_only=True, tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (3477, 1536) float32 None \n",
|
||||
" ids text (3477, 1) str None \n",
|
||||
" metadata json (3477, 1) str None \n",
|
||||
" text text (3477, 1) str None \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db = DeepLake(dataset_path=f\"hub://{DEEPLAKE_ACCOUNT_NAME}/langchain-code\", read_only=True, embedding_function=embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = db.as_retriever()\n",
|
||||
"retriever.search_kwargs['distance_metric'] = 'cos'\n",
|
||||
"retriever.search_kwargs['fetch_k'] = 20\n",
|
||||
"retriever.search_kwargs['maximal_marginal_relevance'] = True\n",
|
||||
"retriever.search_kwargs['k'] = 20"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also specify user defined functions using [Deep Lake filters](https://docs.deeplake.ai/en/latest/deeplake.core.dataset.html#deeplake.core.dataset.Dataset.filter)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def filter(x):\n",
|
||||
" # filter based on source code\n",
|
||||
" if 'something' in x['text'].data()['value']:\n",
|
||||
" return False\n",
|
||||
" \n",
|
||||
" # filter based on path e.g. extension\n",
|
||||
" metadata = x['metadata'].data()['value']\n",
|
||||
" return 'only_this' in metadata['source'] or 'also_that' in metadata['source']\n",
|
||||
"\n",
|
||||
"### turn on below for custom filtering\n",
|
||||
"# retriever.search_kwargs['filter'] = filter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(model='gpt-3.5-turbo') # 'ada' 'gpt-3.5-turbo' 'gpt-4',\n",
|
||||
"qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"questions = [\n",
|
||||
" \"What is the class hierarchy?\",\n",
|
||||
" # \"What classes are derived from the Chain class?\",\n",
|
||||
" # \"What classes and functions in the ./langchain/utilities/ forlder are not covered by unit tests?\",\n",
|
||||
" # \"What one improvement do you propose in code in relation to the class herarchy for the Chain class?\",\n",
|
||||
"] \n",
|
||||
"chat_history = []\n",
|
||||
"\n",
|
||||
"for question in questions: \n",
|
||||
" result = qa({\"question\": question, \"chat_history\": chat_history})\n",
|
||||
" chat_history.append((question, result['answer']))\n",
|
||||
" print(f\"-> **Question**: {question} \\n\")\n",
|
||||
" print(f\"**Answer**: {result['answer']} \\n\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"-> **Question**: What is the class hierarchy? \n",
|
||||
"\n",
|
||||
"**Answer**: There are several class hierarchies in the provided code, so I'll list a few:\n",
|
||||
"\n",
|
||||
"1. `BaseModel` -> `ConstitutionalPrinciple`: `ConstitutionalPrinciple` is a subclass of `BaseModel`.\n",
|
||||
"2. `BasePromptTemplate` -> `StringPromptTemplate`, `AIMessagePromptTemplate`, `BaseChatPromptTemplate`, `ChatMessagePromptTemplate`, `ChatPromptTemplate`, `HumanMessagePromptTemplate`, `MessagesPlaceholder`, `SystemMessagePromptTemplate`, `FewShotPromptTemplate`, `FewShotPromptWithTemplates`, `Prompt`, `PromptTemplate`: All of these classes are subclasses of `BasePromptTemplate`.\n",
|
||||
"3. `APIChain`, `Chain`, `MapReduceDocumentsChain`, `MapRerankDocumentsChain`, `RefineDocumentsChain`, `StuffDocumentsChain`, `HypotheticalDocumentEmbedder`, `LLMChain`, `LLMBashChain`, `LLMCheckerChain`, `LLMMathChain`, `LLMRequestsChain`, `PALChain`, `QAWithSourcesChain`, `VectorDBQAWithSourcesChain`, `VectorDBQA`, `SQLDatabaseChain`: All of these classes are subclasses of `Chain`.\n",
|
||||
"4. `BaseLoader`: `BaseLoader` is a subclass of `ABC`.\n",
|
||||
"5. `BaseTracer` -> `ChainRun`, `LLMRun`, `SharedTracer`, `ToolRun`, `Tracer`, `TracerException`, `TracerSession`: All of these classes are subclasses of `BaseTracer`.\n",
|
||||
"6. `OpenAIEmbeddings`, `HuggingFaceEmbeddings`, `CohereEmbeddings`, `JinaEmbeddings`, `LlamaCppEmbeddings`, `HuggingFaceHubEmbeddings`, `TensorflowHubEmbeddings`, `SagemakerEndpointEmbeddings`, `HuggingFaceInstructEmbeddings`, `SelfHostedEmbeddings`, `SelfHostedHuggingFaceEmbeddings`, `SelfHostedHuggingFaceInstructEmbeddings`, `FakeEmbeddings`, `AlephAlphaAsymmetricSemanticEmbedding`, `AlephAlphaSymmetricSemanticEmbedding`: All of these classes are subclasses of `BaseLLM`. \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"-> **Question**: What classes are derived from the Chain class? \n",
|
||||
"\n",
|
||||
"**Answer**: There are multiple classes that are derived from the Chain class. Some of them are:\n",
|
||||
"- APIChain\n",
|
||||
"- AnalyzeDocumentChain\n",
|
||||
"- ChatVectorDBChain\n",
|
||||
"- CombineDocumentsChain\n",
|
||||
"- ConstitutionalChain\n",
|
||||
"- ConversationChain\n",
|
||||
"- GraphQAChain\n",
|
||||
"- HypotheticalDocumentEmbedder\n",
|
||||
"- LLMChain\n",
|
||||
"- LLMCheckerChain\n",
|
||||
"- LLMRequestsChain\n",
|
||||
"- LLMSummarizationCheckerChain\n",
|
||||
"- MapReduceChain\n",
|
||||
"- OpenAPIEndpointChain\n",
|
||||
"- PALChain\n",
|
||||
"- QAWithSourcesChain\n",
|
||||
"- RetrievalQA\n",
|
||||
"- RetrievalQAWithSourcesChain\n",
|
||||
"- SequentialChain\n",
|
||||
"- SQLDatabaseChain\n",
|
||||
"- TransformChain\n",
|
||||
"- VectorDBQA\n",
|
||||
"- VectorDBQAWithSourcesChain\n",
|
||||
"\n",
|
||||
"There might be more classes that are derived from the Chain class as it is possible to create custom classes that extend the Chain class.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"-> **Question**: What classes and functions in the ./langchain/utilities/ forlder are not covered by unit tests? \n",
|
||||
"\n",
|
||||
"**Answer**: All classes and functions in the `./langchain/utilities/` folder seem to have unit tests written for them. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -18,31 +18,13 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Define OpenAI embeddings, Deep Lake multi-modal vector store api and authenticate. For full documentation of Deep Lake please follow https://docs.activeloop.ai/ and API reference https://docs.deeplake.ai/en/latest/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores import DeepLake\n",
|
||||
"Define OpenAI embeddings, Deep Lake multi-modal vector store api and authenticate. For full documentation of Deep Lake please follow [docs](https://docs.activeloop.ai/) and [API reference](https://docs.deeplake.ai/en/latest/).\n",
|
||||
"\n",
|
||||
"os.environ['OPENAI_API_KEY']='sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Authenticate into Deep Lake if you want to create your own dataset and publish it. You can get an API key from the platform at https://app.activeloop.ai"
|
||||
"Authenticate into Deep Lake if you want to create your own dataset and publish it. You can get an API key from the [platform](https://app.activeloop.ai)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -51,7 +33,15 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!activeloop login -t <TOKEN>"
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores import DeepLake\n",
|
||||
"\n",
|
||||
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')\n",
|
||||
"os.environ['ACTIVELOOP_TOKEN'] = getpass.getpass('Activeloop Token:')\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -143,15 +133,35 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/twitter-algorithm\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"hub://davitbun/twitter-algorithm loaded successfully.\n",
|
||||
"\n"
|
||||
]
|
||||
@@ -184,7 +194,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -205,7 +215,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -224,7 +234,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -267,9 +277,14 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"-> **Question**: What does favCountParams do? \n",
|
||||
"\n",
|
||||
"**Answer**: `favCountParams` is an optional ThriftLinearFeatureRankingParams instance that represents the parameters related to the \"favorite count\" feature in the ranking process. It is used to control the weight of the favorite count feature while ranking tweets. The favorite count is the number of times a tweet has been marked as a favorite by users, and it is considered an important signal in the ranking of tweets. By using `favCountParams`, the system can adjust the importance of the favorite count while calculating the final ranking score of a tweet. \n",
|
||||
"\n",
|
||||
"-> **Question**: is it Likes + Bookmarks, or not clear from the code?\n",
|
||||
"\n",
|
||||
"**Answer**: From the provided code, it is not clear if the favorite count metric is determined by the sum of likes and bookmarks. The favorite count is mentioned in the code, but there is no explicit reference to how it is calculated in terms of likes and bookmarks. \n",
|
||||
@@ -423,7 +438,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
Evaluation
|
||||
==============
|
||||
==========
|
||||
|
||||
.. note::
|
||||
`Conceptual Guide <https://docs.langchain.com/docs/use-cases/evaluation>`_
|
||||
@@ -83,7 +83,7 @@ The existing examples we have are:
|
||||
|
||||
|
||||
Other Examples
|
||||
------------
|
||||
--------------
|
||||
|
||||
In addition, we also have some more generic resources for evaluation.
|
||||
|
||||
|
||||
@@ -14,9 +14,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "46bf9205",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Comment this out if you are NOT using tracing\n",
|
||||
@@ -35,32 +37,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "5b2d5e98",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found cached dataset json (/Users/harrisonchase/.cache/huggingface/datasets/LangChainDatasets___json/LangChainDatasets--agent-search-calculator-8a025c0ce5fb99d2/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "3a275586643f4ccfba1a8d54be28c351",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/1 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.evaluation.loading import load_dataset\n",
|
||||
"dataset = load_dataset(\"agent-search-calculator\")"
|
||||
@@ -77,9 +59,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"id": "c18680b5",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
@@ -88,7 +72,7 @@
|
||||
"from langchain.agents import AgentType\n",
|
||||
"\n",
|
||||
"tools = load_tools(['serpapi', 'llm-math'], llm=OpenAI(temperature=0))\n",
|
||||
"agent = initialize_agent(tools, OpenAI(temperature=0), agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION)\n"
|
||||
"agent = initialize_agent(tools, OpenAI(temperature=0), agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -103,22 +87,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"id": "cbcafc92",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'38,630,316 people live in Canada as of 2023.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(dataset[0]['question'])\n",
|
||||
"agent.run(dataset[0]['question'])"
|
||||
]
|
||||
},
|
||||
@@ -133,18 +109,24 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": null,
|
||||
"id": "bbbbb20e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(dataset[4]['question'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "24b4c66e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer')).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"predictions = []\n",
|
||||
"predicted_dataset = []\n",
|
||||
@@ -154,7 +136,8 @@
|
||||
" try:\n",
|
||||
" predictions.append(agent(new_data))\n",
|
||||
" predicted_dataset.append(new_data)\n",
|
||||
" except Exception:\n",
|
||||
" except Exception as e:\n",
|
||||
" predictions.append({\"output\": str(e), **new_data})\n",
|
||||
" error_dataset.append(new_data)"
|
||||
]
|
||||
},
|
||||
@@ -169,25 +152,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"id": "1d583f03",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'How many people live in canada as of 2023?',\n",
|
||||
" 'answer': 'approximately 38,625,801',\n",
|
||||
" 'output': '38,630,316 people live in Canada as of 2023.',\n",
|
||||
" 'intermediate_steps': [(AgentAction(tool='Search', tool_input='Population of Canada 2023', log=' I need to find population data\\nAction: Search\\nAction Input: Population of Canada 2023'),\n",
|
||||
" '38,630,316')]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"predictions[0]"
|
||||
]
|
||||
@@ -202,9 +172,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": null,
|
||||
"id": "d0a9341d",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.evaluation.qa import QAEvalChain"
|
||||
@@ -212,9 +184,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": null,
|
||||
"id": "1612dec1",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
@@ -232,9 +206,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": null,
|
||||
"id": "2a689df5",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for i, prediction in enumerate(predictions):\n",
|
||||
@@ -243,21 +219,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": null,
|
||||
"id": "27b61215",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Counter({' CORRECT': 4, ' INCORRECT': 6})"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from collections import Counter\n",
|
||||
"Counter([pred['grade'] for pred in predictions])"
|
||||
@@ -273,7 +240,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": null,
|
||||
"id": "47c692a1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -283,38 +250,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": null,
|
||||
"id": "0ef976c1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': \"who is dua lipa's boyfriend? what is his age raised to the .43 power?\",\n",
|
||||
" 'answer': 'her boyfriend is Romain Gravas. his age raised to the .43 power is approximately 4.9373857399466665',\n",
|
||||
" 'output': \"Isaac Carew, Dua Lipa's boyfriend, is 36 years old and his age raised to the .43 power is 4.6688516567750975.\",\n",
|
||||
" 'intermediate_steps': [(AgentAction(tool='Search', tool_input=\"Dua Lipa's boyfriend\", log=' I need to find out who Dua Lipa\\'s boyfriend is and then calculate his age raised to the .43 power\\nAction: Search\\nAction Input: \"Dua Lipa\\'s boyfriend\"'),\n",
|
||||
" 'Dua and Isaac, a model and a chef, dated on and off from 2013 to 2019. The two first split in early 2017, which is when Dua went on to date LANY ...'),\n",
|
||||
" (AgentAction(tool='Search', tool_input='Isaac Carew age', log=' I need to find out Isaac\\'s age\\nAction: Search\\nAction Input: \"Isaac Carew age\"'),\n",
|
||||
" '36 years'),\n",
|
||||
" (AgentAction(tool='Calculator', tool_input='36^.43', log=' I need to calculate 36 raised to the .43 power\\nAction: Calculator\\nAction Input: 36^.43'),\n",
|
||||
" 'Answer: 4.6688516567750975\\n')],\n",
|
||||
" 'grade': ' INCORRECT'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"incorrect[0]"
|
||||
"incorrect"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7710401a",
|
||||
"id": "3eb948cf-f767-4c87-a12d-275b66eef407",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -336,7 +283,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -22,3 +22,4 @@ Specific examples of this include:
|
||||
- [Baby AGI](agents/baby_agi.ipynb): a notebook implementing [BabyAGI](https://github.com/yoheinakajima/babyagi) by Yohei Nakajima as LLM Chains
|
||||
- [Baby AGI with Tools](agents/baby_agi_with_agent.ipynb): building off the above notebook, this example substitutes in an agent with tools as the execution tools, allowing it to actually take actions.
|
||||
- [CAMEL](agents/camel_role_playing.ipynb): an implementation of the CAMEL (Communicative Agents for “Mind” Exploration of Large Scale Language Model Society) paper, where two agents communicate with eachother.
|
||||
- [AI Plugins](agents/custom_agent_with_plugin_retrieval.ipynb): an implementation of an agent that is designed to be able to use all AI Plugins.
|
||||
|
||||
@@ -16,6 +16,7 @@ from langchain.agents.agent_toolkits import (
|
||||
create_vectorstore_agent,
|
||||
create_vectorstore_router_agent,
|
||||
)
|
||||
from langchain.agents.autogpt.agent import AutoGPT
|
||||
from langchain.agents.agent_types import AgentType
|
||||
from langchain.agents.conversational.base import ConversationalAgent
|
||||
from langchain.agents.conversational_chat.base import ConversationalChatAgent
|
||||
@@ -28,6 +29,7 @@ from langchain.agents.self_ask_with_search.base import SelfAskWithSearchChain
|
||||
from langchain.agents.tools import Tool, tool
|
||||
|
||||
__all__ = [
|
||||
"AutoGPT",
|
||||
"MRKLChain",
|
||||
"SelfAskWithSearchChain",
|
||||
"ReActChain",
|
||||
|
||||
@@ -30,7 +30,7 @@ from langchain.schema import (
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utilities.asyncio import asyncio_timeout
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseSingleActionAgent(BaseModel):
|
||||
@@ -99,6 +99,16 @@ class BaseSingleActionAgent(BaseModel):
|
||||
f"Got unsupported early_stopping_method `{early_stopping_method}`"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_tools(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
tools: Sequence[BaseTool],
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
**kwargs: Any,
|
||||
) -> BaseSingleActionAgent:
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def _agent_type(self) -> str:
|
||||
"""Return Identifier of agent type."""
|
||||
@@ -326,6 +336,7 @@ class Agent(BaseSingleActionAgent):
|
||||
"""
|
||||
|
||||
llm_chain: LLMChain
|
||||
output_parser: AgentOutputParser
|
||||
allowed_tools: Optional[List[str]] = None
|
||||
|
||||
def get_allowed_tools(self) -> Optional[List[str]]:
|
||||
@@ -335,10 +346,6 @@ class Agent(BaseSingleActionAgent):
|
||||
def return_values(self) -> List[str]:
|
||||
return ["output"]
|
||||
|
||||
@abstractmethod
|
||||
def _extract_tool_and_input(self, text: str) -> Optional[Tuple[str, str]]:
|
||||
"""Extract tool and tool input from llm output."""
|
||||
|
||||
def _fix_text(self, text: str) -> str:
|
||||
"""Fix the text."""
|
||||
raise ValueError("fix_text not implemented for this agent.")
|
||||
@@ -360,32 +367,6 @@ class Agent(BaseSingleActionAgent):
|
||||
thoughts += f"\n{self.observation_prefix}{observation}\n{self.llm_prefix}"
|
||||
return thoughts
|
||||
|
||||
def _get_next_action(self, full_inputs: Dict[str, str]) -> AgentAction:
|
||||
full_output = self.llm_chain.predict(**full_inputs)
|
||||
parsed_output = self._extract_tool_and_input(full_output)
|
||||
while parsed_output is None:
|
||||
full_output = self._fix_text(full_output)
|
||||
full_inputs["agent_scratchpad"] += full_output
|
||||
output = self.llm_chain.predict(**full_inputs)
|
||||
full_output += output
|
||||
parsed_output = self._extract_tool_and_input(full_output)
|
||||
return AgentAction(
|
||||
tool=parsed_output[0], tool_input=parsed_output[1], log=full_output
|
||||
)
|
||||
|
||||
async def _aget_next_action(self, full_inputs: Dict[str, str]) -> AgentAction:
|
||||
full_output = await self.llm_chain.apredict(**full_inputs)
|
||||
parsed_output = self._extract_tool_and_input(full_output)
|
||||
while parsed_output is None:
|
||||
full_output = self._fix_text(full_output)
|
||||
full_inputs["agent_scratchpad"] += full_output
|
||||
output = await self.llm_chain.apredict(**full_inputs)
|
||||
full_output += output
|
||||
parsed_output = self._extract_tool_and_input(full_output)
|
||||
return AgentAction(
|
||||
tool=parsed_output[0], tool_input=parsed_output[1], log=full_output
|
||||
)
|
||||
|
||||
def plan(
|
||||
self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any
|
||||
) -> Union[AgentAction, AgentFinish]:
|
||||
@@ -400,10 +381,8 @@ class Agent(BaseSingleActionAgent):
|
||||
Action specifying what tool to use.
|
||||
"""
|
||||
full_inputs = self.get_full_inputs(intermediate_steps, **kwargs)
|
||||
action = self._get_next_action(full_inputs)
|
||||
if action.tool == self.finish_tool_name:
|
||||
return AgentFinish({"output": action.tool_input}, action.log)
|
||||
return action
|
||||
full_output = self.llm_chain.predict(**full_inputs)
|
||||
return self.output_parser.parse(full_output)
|
||||
|
||||
async def aplan(
|
||||
self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any
|
||||
@@ -419,10 +398,8 @@ class Agent(BaseSingleActionAgent):
|
||||
Action specifying what tool to use.
|
||||
"""
|
||||
full_inputs = self.get_full_inputs(intermediate_steps, **kwargs)
|
||||
action = await self._aget_next_action(full_inputs)
|
||||
if action.tool == self.finish_tool_name:
|
||||
return AgentFinish({"output": action.tool_input}, action.log)
|
||||
return action
|
||||
full_output = await self.llm_chain.apredict(**full_inputs)
|
||||
return self.output_parser.parse(full_output)
|
||||
|
||||
def get_full_inputs(
|
||||
self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any
|
||||
@@ -433,11 +410,6 @@ class Agent(BaseSingleActionAgent):
|
||||
full_inputs = {**kwargs, **new_inputs}
|
||||
return full_inputs
|
||||
|
||||
@property
|
||||
def finish_tool_name(self) -> str:
|
||||
"""Name of the tool to use to finish the chain."""
|
||||
return "Final Answer"
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the input keys.
|
||||
@@ -484,12 +456,18 @@ class Agent(BaseSingleActionAgent):
|
||||
"""Validate that appropriate tools are passed in."""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def _get_default_output_parser(cls, **kwargs: Any) -> AgentOutputParser:
|
||||
"""Get default output parser for this class."""
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_tools(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
tools: Sequence[BaseTool],
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
output_parser: Optional[AgentOutputParser] = None,
|
||||
**kwargs: Any,
|
||||
) -> Agent:
|
||||
"""Construct an agent from an LLM and tools."""
|
||||
@@ -500,7 +478,13 @@ class Agent(BaseSingleActionAgent):
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
return cls(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
|
||||
_output_parser = output_parser or cls._get_default_output_parser()
|
||||
return cls(
|
||||
llm_chain=llm_chain,
|
||||
allowed_tools=tool_names,
|
||||
output_parser=_output_parser,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def return_stopped_response(
|
||||
self,
|
||||
@@ -530,14 +514,10 @@ class Agent(BaseSingleActionAgent):
|
||||
full_inputs = {**kwargs, **new_inputs}
|
||||
full_output = self.llm_chain.predict(**full_inputs)
|
||||
# We try to extract a final answer
|
||||
parsed_output = self._extract_tool_and_input(full_output)
|
||||
if parsed_output is None:
|
||||
# If we cannot extract, we just return the full output
|
||||
return AgentFinish({"output": full_output}, full_output)
|
||||
tool, tool_input = parsed_output
|
||||
if tool == self.finish_tool_name:
|
||||
parsed_output = self.output_parser.parse(full_output)
|
||||
if isinstance(parsed_output, AgentFinish):
|
||||
# If we can extract, we send the correct stuff
|
||||
return AgentFinish({"output": tool_input}, full_output)
|
||||
return parsed_output
|
||||
else:
|
||||
# If we can extract, but the tool is not the final tool,
|
||||
# we just return the full output
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
"""Toolkit for interacting with API's using natural language."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, List, Optional, Sequence
|
||||
|
||||
@@ -11,6 +11,7 @@ from langchain.llms.base import BaseLLM
|
||||
from langchain.requests import Requests
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.tools.openapi.utils.openapi_utils import OpenAPISpec
|
||||
from langchain.tools.plugin import AIPlugin
|
||||
|
||||
|
||||
class NLAToolkit(BaseToolkit):
|
||||
@@ -23,19 +24,18 @@ class NLAToolkit(BaseToolkit):
|
||||
"""Get the tools for all the API operations."""
|
||||
return list(self.nla_tools)
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_spec(
|
||||
cls,
|
||||
@staticmethod
|
||||
def _get_http_operation_tools(
|
||||
llm: BaseLLM,
|
||||
spec: OpenAPISpec,
|
||||
requests: Optional[Requests] = None,
|
||||
verbose: bool = False,
|
||||
**kwargs: Any
|
||||
) -> "NLAToolkit":
|
||||
"""Instantiate the toolkit by creating tools for each operation."""
|
||||
http_operation_tools: List[NLATool] = []
|
||||
**kwargs: Any,
|
||||
) -> List[NLATool]:
|
||||
"""Get the tools for all the API operations."""
|
||||
if not spec.paths:
|
||||
return cls(nla_tools=http_operation_tools)
|
||||
return []
|
||||
http_operation_tools = []
|
||||
for path in spec.paths:
|
||||
for method in spec.get_methods_for_path(path):
|
||||
endpoint_tool = NLATool.from_llm_and_method(
|
||||
@@ -45,9 +45,24 @@ class NLAToolkit(BaseToolkit):
|
||||
spec=spec,
|
||||
requests=requests,
|
||||
verbose=verbose,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
http_operation_tools.append(endpoint_tool)
|
||||
return http_operation_tools
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_spec(
|
||||
cls,
|
||||
llm: BaseLLM,
|
||||
spec: OpenAPISpec,
|
||||
requests: Optional[Requests] = None,
|
||||
verbose: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> NLAToolkit:
|
||||
"""Instantiate the toolkit by creating tools for each operation."""
|
||||
http_operation_tools = cls._get_http_operation_tools(
|
||||
llm=llm, spec=spec, requests=requests, verbose=verbose, **kwargs
|
||||
)
|
||||
return cls(nla_tools=http_operation_tools)
|
||||
|
||||
@classmethod
|
||||
@@ -57,10 +72,45 @@ class NLAToolkit(BaseToolkit):
|
||||
open_api_url: str,
|
||||
requests: Optional[Requests] = None,
|
||||
verbose: bool = False,
|
||||
**kwargs: Any
|
||||
) -> "NLAToolkit":
|
||||
**kwargs: Any,
|
||||
) -> NLAToolkit:
|
||||
"""Instantiate the toolkit from an OpenAPI Spec URL"""
|
||||
spec = OpenAPISpec.from_url(open_api_url)
|
||||
return cls.from_llm_and_spec(
|
||||
llm=llm, spec=spec, requests=requests, verbose=verbose, **kwargs
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_ai_plugin(
|
||||
cls,
|
||||
llm: BaseLLM,
|
||||
ai_plugin: AIPlugin,
|
||||
requests: Optional[Requests] = None,
|
||||
verbose: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> NLAToolkit:
|
||||
"""Instantiate the toolkit from an OpenAPI Spec URL"""
|
||||
spec = OpenAPISpec.from_url(ai_plugin.api.url)
|
||||
# TODO: Merge optional Auth information with the `requests` argument
|
||||
return cls.from_llm_and_spec(
|
||||
llm=llm,
|
||||
spec=spec,
|
||||
requests=requests,
|
||||
verbose=verbose,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_ai_plugin_url(
|
||||
cls,
|
||||
llm: BaseLLM,
|
||||
ai_plugin_url: str,
|
||||
requests: Optional[Requests] = None,
|
||||
verbose: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> NLAToolkit:
|
||||
"""Instantiate the toolkit from an OpenAPI Spec URL"""
|
||||
plugin = AIPlugin.from_url(ai_plugin_url)
|
||||
return cls.from_llm_and_ai_plugin(
|
||||
llm=llm, ai_plugin=plugin, requests=requests, verbose=verbose, **kwargs
|
||||
)
|
||||
|
||||
@@ -22,6 +22,9 @@ def create_openapi_agent(
|
||||
suffix: str = OPENAPI_SUFFIX,
|
||||
format_instructions: str = FORMAT_INSTRUCTIONS,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
max_iterations: Optional[int] = 15,
|
||||
max_execution_time: Optional[float] = None,
|
||||
early_stopping_method: str = "force",
|
||||
verbose: bool = False,
|
||||
return_intermediate_steps: bool = False,
|
||||
**kwargs: Any,
|
||||
@@ -47,4 +50,7 @@ def create_openapi_agent(
|
||||
tools=toolkit.get_tools(),
|
||||
verbose=verbose,
|
||||
return_intermediate_steps=return_intermediate_steps,
|
||||
max_iterations=max_iterations,
|
||||
max_execution_time=max_execution_time,
|
||||
early_stopping_method=early_stopping_method,
|
||||
)
|
||||
|
||||
@@ -14,9 +14,13 @@ from langchain.agents.agent_toolkits.openapi.planner_prompt import (
|
||||
API_PLANNER_PROMPT,
|
||||
API_PLANNER_TOOL_DESCRIPTION,
|
||||
API_PLANNER_TOOL_NAME,
|
||||
PARSING_DELETE_PROMPT,
|
||||
PARSING_GET_PROMPT,
|
||||
PARSING_PATCH_PROMPT,
|
||||
PARSING_POST_PROMPT,
|
||||
REQUESTS_DELETE_TOOL_DESCRIPTION,
|
||||
REQUESTS_GET_TOOL_DESCRIPTION,
|
||||
REQUESTS_PATCH_TOOL_DESCRIPTION,
|
||||
REQUESTS_POST_TOOL_DESCRIPTION,
|
||||
)
|
||||
from langchain.agents.agent_toolkits.openapi.spec import ReducedOpenAPISpec
|
||||
@@ -90,6 +94,56 @@ class RequestsPostToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class RequestsPatchToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
name = "requests_patch"
|
||||
description = REQUESTS_PATCH_TOOL_DESCRIPTION
|
||||
|
||||
response_length: Optional[int] = MAX_RESPONSE_LENGTH
|
||||
llm_chain = LLMChain(
|
||||
llm=OpenAI(),
|
||||
prompt=PARSING_PATCH_PROMPT,
|
||||
)
|
||||
|
||||
def _run(self, text: str) -> str:
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise e
|
||||
response = self.requests_wrapper.patch(data["url"], data["data"])
|
||||
response = response[: self.response_length]
|
||||
return self.llm_chain.predict(
|
||||
response=response, instructions=data["output_instructions"]
|
||||
).strip()
|
||||
|
||||
async def _arun(self, text: str) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class RequestsDeleteToolWithParsing(BaseRequestsTool, BaseTool):
|
||||
name = "requests_delete"
|
||||
description = REQUESTS_DELETE_TOOL_DESCRIPTION
|
||||
|
||||
response_length: Optional[int] = MAX_RESPONSE_LENGTH
|
||||
llm_chain = LLMChain(
|
||||
llm=OpenAI(),
|
||||
prompt=PARSING_DELETE_PROMPT,
|
||||
)
|
||||
|
||||
def _run(self, text: str) -> str:
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise e
|
||||
response = self.requests_wrapper.delete(data["url"])
|
||||
response = response[: self.response_length]
|
||||
return self.llm_chain.predict(
|
||||
response=response, instructions=data["output_instructions"]
|
||||
).strip()
|
||||
|
||||
async def _arun(self, text: str) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
#
|
||||
# Orchestrator, planner, controller.
|
||||
#
|
||||
@@ -157,7 +211,7 @@ def _create_api_controller_tool(
|
||||
base_url = api_spec.servers[0]["url"] # TODO: do better.
|
||||
|
||||
def _create_and_run_api_controller_agent(plan_str: str) -> str:
|
||||
pattern = r"\b(GET|POST)\s+(/\S+)*"
|
||||
pattern = r"\b(GET|POST|PATCH|DELETE)\s+(/\S+)*"
|
||||
matches = re.findall(pattern, plan_str)
|
||||
endpoint_names = [
|
||||
"{method} {route}".format(method=method, route=route.split("?")[0])
|
||||
|
||||
@@ -2,13 +2,16 @@
|
||||
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
|
||||
|
||||
API_PLANNER_PROMPT = """You are a planner that plans a sequence of API calls to assist with user queries against an API.
|
||||
|
||||
You should:
|
||||
1) evaluate whether the user query can be solved by the API documentated below. If no, say why.
|
||||
2) if yes, generate a plan of API calls and say what they are doing step by step.
|
||||
3) If the plan includes a DELETE call, you should always return an ask from the User for authorization first unless the User has specifically asked to delete something.
|
||||
|
||||
You should only use API endpoints documented below ("Endpoints you can use:").
|
||||
You can only use the DELETE tool if the User has specifically asked to delete something. Otherwise, you should return a request authorization from the User first.
|
||||
Some user queries can be resolved in a single API call, but some will require several API calls.
|
||||
The plan will be passed to an API controller that can format it into web requests and return the responses.
|
||||
|
||||
@@ -20,15 +23,31 @@ Fake endpoints for examples:
|
||||
GET /user to get information about the current user
|
||||
GET /products/search search across products
|
||||
POST /users/{{id}}/cart to add products to a user's cart
|
||||
PATCH /users/{{id}}/cart to update a user's cart
|
||||
DELETE /users/{{id}}/cart to delete a user's cart
|
||||
|
||||
User query: tell me a joke
|
||||
Plan: Sorry, this API's domain is shopping, not comedy.
|
||||
|
||||
Usery query: I want to buy a couch
|
||||
Plan: 1. GET /products/search to search for couches
|
||||
Plan: 1. GET /products with a query param to search for couches
|
||||
2. GET /user to find the user's id
|
||||
3. POST /users/{{id}}/cart to add a couch to the user's cart
|
||||
|
||||
User query: I want to add a lamp to my cart
|
||||
Plan: 1. GET /products with a query param to search for lamps
|
||||
2. GET /user to find the user's id
|
||||
3. PATCH /users/{{id}}/cart to add a lamp to the user's cart
|
||||
|
||||
User query: I want to delete my cart
|
||||
Plan: 1. GET /user to find the user's id
|
||||
2. DELETE required. Did user specify DELETE or previously authorize? Yes, proceed.
|
||||
3. DELETE /users/{{id}}/cart to delete the user's cart
|
||||
|
||||
User query: I want to start a new cart
|
||||
Plan: 1. GET /user to find the user's id
|
||||
2. DELETE required. Did user specify DELETE or previously authorize? No, ask for authorization.
|
||||
3. Are you sure you want to delete your cart?
|
||||
----
|
||||
|
||||
Here are endpoints you can use. Do not reference any of the endpoints above.
|
||||
@@ -83,6 +102,7 @@ API_CONTROLLER_TOOL_DESCRIPTION = f"Can be used to execute a plan of API calls,
|
||||
API_ORCHESTRATOR_PROMPT = """You are an agent that assists with user queries against API, things like querying information or creating resources.
|
||||
Some user queries can be resolved in a single API call, particularly if you can find appropriate params from the OpenAPI spec; though some require several API call.
|
||||
You should always plan your API calls first, and then execute the plan second.
|
||||
If the plan includes a DELETE call, be sure to ask the User for authorization first unless the User has specifically asked to delete something.
|
||||
You should never return information without executing the api_controller tool.
|
||||
|
||||
|
||||
@@ -145,7 +165,7 @@ REQUESTS_POST_TOOL_DESCRIPTION = """Use this when you want to POST to a website.
|
||||
Input to the tool should be a json string with 3 keys: "url", "data", and "output_instructions".
|
||||
The value of "url" should be a string.
|
||||
The value of "data" should be a dictionary of key-value pairs you want to POST to the url.
|
||||
The value of "summary_instructions" should be instructions on what information to extract from the response, for example the id(s) for a resource(s) that the POST request creates.
|
||||
The value of "output_instructions" should be instructions on what information to extract from the response, for example the id(s) for a resource(s) that the POST request creates.
|
||||
Always use double quotes for strings in the json string."""
|
||||
|
||||
PARSING_POST_PROMPT = PromptTemplate(
|
||||
@@ -157,3 +177,37 @@ If the response indicates an error, you should instead output a summary of the e
|
||||
Output:""",
|
||||
input_variables=["response", "instructions"],
|
||||
)
|
||||
|
||||
REQUESTS_PATCH_TOOL_DESCRIPTION = """Use this when you want to PATCH content on a website.
|
||||
Input to the tool should be a json string with 3 keys: "url", "data", and "output_instructions".
|
||||
The value of "url" should be a string.
|
||||
The value of "data" should be a dictionary of key-value pairs of the body params available in the OpenAPI spec you want to PATCH the content with at the url.
|
||||
The value of "output_instructions" should be instructions on what information to extract from the response, for example the id(s) for a resource(s) that the PATCH request creates.
|
||||
Always use double quotes for strings in the json string."""
|
||||
|
||||
PARSING_PATCH_PROMPT = PromptTemplate(
|
||||
template="""Here is an API response:\n\n{response}\n\n====
|
||||
Your task is to extract some information according to these instructions: {instructions}
|
||||
When working with API objects, you should usually use ids over names. Do not return any ids or names that are not in the response.
|
||||
If the response indicates an error, you should instead output a summary of the error.
|
||||
|
||||
Output:""",
|
||||
input_variables=["response", "instructions"],
|
||||
)
|
||||
|
||||
REQUESTS_DELETE_TOOL_DESCRIPTION = """ONLY USE THIS TOOL WHEN THE USER HAS SPECIFICALLY REQUESTED TO DELETE CONTENT FROM A WEBSITE.
|
||||
Input to the tool should be a json string with 2 keys: "url", and "output_instructions".
|
||||
The value of "url" should be a string.
|
||||
The value of "output_instructions" should be instructions on what information to extract from the response, for example the id(s) for a resource(s) that the DELETE request creates.
|
||||
Always use double quotes for strings in the json string.
|
||||
ONLY USE THIS TOOL IF THE USER HAS SPECIFICALLY REQUESTED TO DELETE SOMETHING."""
|
||||
|
||||
PARSING_DELETE_PROMPT = PromptTemplate(
|
||||
template="""Here is an API response:\n\n{response}\n\n====
|
||||
Your task is to extract some information according to these instructions: {instructions}
|
||||
When working with API objects, you should usually use ids over names. Do not return any ids or names that are not in the response.
|
||||
If the response indicates an error, you should instead output a summary of the error.
|
||||
|
||||
Output:""",
|
||||
input_variables=["response", "instructions"],
|
||||
)
|
||||
|
||||
@@ -68,12 +68,12 @@ def reduce_openapi_spec(spec: dict, dereference: bool = True) -> ReducedOpenAPIS
|
||||
I was hoping https://openapi.tools/ would have some useful bits
|
||||
to this end, but doesn't seem so.
|
||||
"""
|
||||
# 1. Consider only get, post endpoints.
|
||||
# 1. Consider only get, post, patch, delete endpoints.
|
||||
endpoints = [
|
||||
(f"{operation_name.upper()} {route}", docs.get("description"), docs)
|
||||
for route, operation in spec["paths"].items()
|
||||
for operation_name, docs in operation.items()
|
||||
if operation_name in ["get", "post"]
|
||||
if operation_name in ["get", "post", "patch", "delete"]
|
||||
]
|
||||
|
||||
# 2. Replace any refs so that complete docs are retrieved.
|
||||
|
||||
@@ -20,6 +20,7 @@ def create_pandas_dataframe_agent(
|
||||
verbose: bool = False,
|
||||
return_intermediate_steps: bool = False,
|
||||
max_iterations: Optional[int] = 15,
|
||||
max_execution_time: Optional[float] = None,
|
||||
early_stopping_method: str = "force",
|
||||
**kwargs: Any,
|
||||
) -> AgentExecutor:
|
||||
@@ -48,5 +49,6 @@ def create_pandas_dataframe_agent(
|
||||
verbose=verbose,
|
||||
return_intermediate_steps=return_intermediate_steps,
|
||||
max_iterations=max_iterations,
|
||||
max_execution_time=max_execution_time,
|
||||
early_stopping_method=early_stopping_method,
|
||||
)
|
||||
|
||||
@@ -21,6 +21,7 @@ def create_sql_agent(
|
||||
input_variables: Optional[List[str]] = None,
|
||||
top_k: int = 10,
|
||||
max_iterations: Optional[int] = 15,
|
||||
max_execution_time: Optional[float] = None,
|
||||
early_stopping_method: str = "force",
|
||||
verbose: bool = False,
|
||||
**kwargs: Any,
|
||||
@@ -47,5 +48,6 @@ def create_sql_agent(
|
||||
tools=tools,
|
||||
verbose=verbose,
|
||||
max_iterations=max_iterations,
|
||||
max_execution_time=max_execution_time,
|
||||
early_stopping_method=early_stopping_method,
|
||||
)
|
||||
|
||||
0
langchain/agents/autogpt/__init__.py
Normal file
0
langchain/agents/autogpt/__init__.py
Normal file
110
langchain/agents/autogpt/agent.py
Normal file
110
langchain/agents/autogpt/agent.py
Normal file
@@ -0,0 +1,110 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain.agents.autogpt.output_parser import (
|
||||
AutoGPTOutputParser,
|
||||
BaseAutoGPTOutputParser,
|
||||
)
|
||||
from langchain.agents.autogpt.prompt import AutoGPTPrompt
|
||||
from langchain.agents.autogpt.prompt_generator import FINISH_NAME
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.chat_models.base import BaseChatModel
|
||||
from langchain.schema import (
|
||||
AIMessage,
|
||||
BaseMessage,
|
||||
Document,
|
||||
HumanMessage,
|
||||
SystemMessage,
|
||||
)
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.vectorstores.base import VectorStoreRetriever
|
||||
|
||||
|
||||
class AutoGPT:
|
||||
"""Agent class for interacting with Auto-GPT."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ai_name: str,
|
||||
memory: VectorStoreRetriever,
|
||||
chain: LLMChain,
|
||||
output_parser: BaseAutoGPTOutputParser,
|
||||
tools: List[BaseTool],
|
||||
):
|
||||
self.ai_name = ai_name
|
||||
self.memory = memory
|
||||
self.full_message_history: List[BaseMessage] = []
|
||||
self.next_action_count = 0
|
||||
self.chain = chain
|
||||
self.output_parser = output_parser
|
||||
self.tools = tools
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_tools(
|
||||
cls,
|
||||
ai_name: str,
|
||||
ai_role: str,
|
||||
memory: VectorStoreRetriever,
|
||||
tools: List[BaseTool],
|
||||
llm: BaseChatModel,
|
||||
output_parser: Optional[BaseAutoGPTOutputParser] = None,
|
||||
) -> AutoGPT:
|
||||
prompt = AutoGPTPrompt(
|
||||
ai_name=ai_name,
|
||||
ai_role=ai_role,
|
||||
tools=tools,
|
||||
input_variables=["memory", "messages", "goals", "user_input"],
|
||||
token_counter=llm.get_num_tokens,
|
||||
)
|
||||
chain = LLMChain(llm=llm, prompt=prompt)
|
||||
return cls(
|
||||
ai_name, memory, chain, output_parser or AutoGPTOutputParser(), tools
|
||||
)
|
||||
|
||||
def run(self, goals: List[str]) -> str:
|
||||
user_input = (
|
||||
"Determine which next command to use, "
|
||||
"and respond using the format specified above:"
|
||||
)
|
||||
# Interaction Loop
|
||||
loop_count = 0
|
||||
while True:
|
||||
# Discontinue if continuous limit is reached
|
||||
loop_count += 1
|
||||
|
||||
# Send message to AI, get response
|
||||
assistant_reply = self.chain.run(
|
||||
goals=goals,
|
||||
messages=self.full_message_history,
|
||||
memory=self.memory,
|
||||
user_input=user_input,
|
||||
)
|
||||
|
||||
# Print Assistant thoughts
|
||||
print(assistant_reply)
|
||||
self.full_message_history.append(HumanMessage(content=user_input))
|
||||
self.full_message_history.append(AIMessage(content=assistant_reply))
|
||||
|
||||
# Get command name and arguments
|
||||
action = self.output_parser.parse(assistant_reply)
|
||||
tools = {t.name: t for t in self.tools}
|
||||
if action.name == FINISH_NAME:
|
||||
return action.args["response"]
|
||||
if action.name in tools:
|
||||
tool = tools[action.name]
|
||||
observation = tool.run(action.args)
|
||||
result = f"Command {tool.name} returned: {observation}"
|
||||
else:
|
||||
result = (
|
||||
f"Unknown command '{action.name}'. "
|
||||
f"Please refer to the 'COMMANDS' list for available "
|
||||
f"commands and only respond in the specified JSON format."
|
||||
)
|
||||
|
||||
memory_to_add = (
|
||||
f"Assistant Reply: {assistant_reply} " f"\nResult: {result} "
|
||||
)
|
||||
|
||||
self.memory.add_documents([Document(page_content=memory_to_add)])
|
||||
self.full_message_history.append(SystemMessage(content=result))
|
||||
30
langchain/agents/autogpt/memory.py
Normal file
30
langchain/agents/autogpt/memory.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from langchain.memory.chat_memory import BaseChatMemory, get_prompt_input_key
|
||||
from langchain.vectorstores.base import VectorStoreRetriever
|
||||
|
||||
|
||||
class AutoGPTMemory(BaseChatMemory):
|
||||
retriever: VectorStoreRetriever = Field(exclude=True)
|
||||
"""VectorStoreRetriever object to connect to."""
|
||||
|
||||
@property
|
||||
def memory_variables(self) -> List[str]:
|
||||
return ["chat_history", "relevant_context"]
|
||||
|
||||
def _get_prompt_input_key(self, inputs: Dict[str, Any]) -> str:
|
||||
"""Get the input key for the prompt."""
|
||||
if self.input_key is None:
|
||||
return get_prompt_input_key(inputs, self.memory_variables)
|
||||
return self.input_key
|
||||
|
||||
def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
|
||||
input_key = self._get_prompt_input_key(inputs)
|
||||
query = inputs[input_key]
|
||||
docs = self.retriever.get_relevant_documents(query)
|
||||
return {
|
||||
"chat_history": self.chat_memory.messages[-10:],
|
||||
"relevant_context": docs,
|
||||
}
|
||||
25
langchain/agents/autogpt/output_parser.py
Normal file
25
langchain/agents/autogpt/output_parser.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import json
|
||||
from abc import abstractmethod
|
||||
from typing import Dict, NamedTuple
|
||||
|
||||
from langchain.schema import BaseOutputParser
|
||||
|
||||
|
||||
class AutoGPTAction(NamedTuple):
|
||||
name: str
|
||||
args: Dict
|
||||
|
||||
|
||||
class BaseAutoGPTOutputParser(BaseOutputParser):
|
||||
@abstractmethod
|
||||
def parse(self, text: str) -> AutoGPTAction:
|
||||
"""Return AutoGPTAction"""
|
||||
|
||||
|
||||
class AutoGPTOutputParser(BaseAutoGPTOutputParser):
|
||||
def parse(self, text: str) -> AutoGPTAction:
|
||||
parsed = json.loads(text)
|
||||
return AutoGPTAction(
|
||||
name=parsed["command"]["name"],
|
||||
args=parsed["command"]["args"],
|
||||
)
|
||||
75
langchain/agents/autogpt/prompt.py
Normal file
75
langchain/agents/autogpt/prompt.py
Normal file
@@ -0,0 +1,75 @@
|
||||
import time
|
||||
from typing import Any, Callable, List
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from langchain.agents.autogpt.prompt_generator import get_prompt
|
||||
from langchain.prompts.chat import (
|
||||
BaseChatPromptTemplate,
|
||||
)
|
||||
from langchain.schema import BaseMessage, HumanMessage, SystemMessage
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.vectorstores.base import VectorStoreRetriever
|
||||
|
||||
|
||||
class AutoGPTPrompt(BaseChatPromptTemplate, BaseModel):
|
||||
ai_name: str
|
||||
ai_role: str
|
||||
tools: List[BaseTool]
|
||||
token_counter: Callable[[str], int]
|
||||
send_token_limit: int = 4196
|
||||
|
||||
def construct_full_prompt(self, goals: List[str]) -> str:
|
||||
prompt_start = """Your decisions must always be made independently
|
||||
without seeking user assistance. Play to your strengths
|
||||
as an LLM and pursue simple strategies with no legal complications.
|
||||
If you have completed all your tasks,
|
||||
make sure to use the "finish" command."""
|
||||
|
||||
# Construct full prompt
|
||||
full_prompt = (
|
||||
f"You are {self.ai_name}, {self.ai_role}\n{prompt_start}\n\nGOALS:\n\n"
|
||||
)
|
||||
for i, goal in enumerate(goals):
|
||||
full_prompt += f"{i+1}. {goal}\n"
|
||||
|
||||
full_prompt += f"\n\n{get_prompt(self.tools)}"
|
||||
return full_prompt
|
||||
|
||||
def format_messages(self, **kwargs: Any) -> List[BaseMessage]:
|
||||
base_prompt = SystemMessage(content=self.construct_full_prompt(kwargs["goals"]))
|
||||
time_prompt = SystemMessage(
|
||||
content=f"The current time and date is {time.strftime('%c')}"
|
||||
)
|
||||
used_tokens = self.token_counter(base_prompt.content) + self.token_counter(
|
||||
time_prompt.content
|
||||
)
|
||||
memory: VectorStoreRetriever = kwargs["memory"]
|
||||
previous_messages = kwargs["messages"]
|
||||
relevant_docs = memory.get_relevant_documents(str(previous_messages[-10:]))
|
||||
relevant_memory = [d.page_content for d in relevant_docs]
|
||||
relevant_memory_tokens = sum(
|
||||
[self.token_counter(doc) for doc in relevant_memory]
|
||||
)
|
||||
while used_tokens + relevant_memory_tokens > 2500:
|
||||
relevant_memory = relevant_memory[:-1]
|
||||
relevant_memory_tokens = sum(
|
||||
[self.token_counter(doc) for doc in relevant_memory]
|
||||
)
|
||||
content_format = (
|
||||
f"This reminds you of these events "
|
||||
f"from your past:\n{relevant_memory}\n\n"
|
||||
)
|
||||
memory_message = SystemMessage(content=content_format)
|
||||
used_tokens += len(memory_message.content)
|
||||
historical_messages: List[BaseMessage] = []
|
||||
for message in previous_messages[-10:][::-1]:
|
||||
message_tokens = self.token_counter(message.content)
|
||||
if used_tokens + message_tokens > self.send_token_limit - 1000:
|
||||
break
|
||||
historical_messages = [message] + historical_messages
|
||||
input_message = HumanMessage(content=kwargs["user_input"])
|
||||
messages: List[BaseMessage] = [base_prompt, time_prompt, memory_message]
|
||||
messages += historical_messages
|
||||
messages.append(input_message)
|
||||
return messages
|
||||
187
langchain/agents/autogpt/prompt_generator.py
Normal file
187
langchain/agents/autogpt/prompt_generator.py
Normal file
@@ -0,0 +1,187 @@
|
||||
import json
|
||||
from typing import List
|
||||
|
||||
from langchain.tools.base import BaseTool
|
||||
|
||||
FINISH_NAME = "finish"
|
||||
|
||||
|
||||
class PromptGenerator:
|
||||
"""A class for generating custom prompt strings.
|
||||
|
||||
Does this based on constraints, commands, resources, and performance evaluations.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize the PromptGenerator object.
|
||||
|
||||
Starts with empty lists of constraints, commands, resources,
|
||||
and performance evaluations.
|
||||
"""
|
||||
self.constraints: List[str] = []
|
||||
self.commands: List[BaseTool] = []
|
||||
self.resources: List[str] = []
|
||||
self.performance_evaluation: List[str] = []
|
||||
self.response_format = {
|
||||
"thoughts": {
|
||||
"text": "thought",
|
||||
"reasoning": "reasoning",
|
||||
"plan": "- short bulleted\n- list that conveys\n- long-term plan",
|
||||
"criticism": "constructive self-criticism",
|
||||
"speak": "thoughts summary to say to user",
|
||||
},
|
||||
"command": {"name": "command name", "args": {"arg name": "value"}},
|
||||
}
|
||||
|
||||
def add_constraint(self, constraint: str) -> None:
|
||||
"""
|
||||
Add a constraint to the constraints list.
|
||||
|
||||
Args:
|
||||
constraint (str): The constraint to be added.
|
||||
"""
|
||||
self.constraints.append(constraint)
|
||||
|
||||
def add_tool(self, tool: BaseTool) -> None:
|
||||
self.commands.append(tool)
|
||||
|
||||
def _generate_command_string(self, tool: BaseTool) -> str:
|
||||
args_string = ", ".join(
|
||||
f'"{arg.name}": "{arg.description}"' for arg in tool.args
|
||||
)
|
||||
return f"{tool.name}: {tool.description}, args: {args_string}"
|
||||
|
||||
def add_resource(self, resource: str) -> None:
|
||||
"""
|
||||
Add a resource to the resources list.
|
||||
|
||||
Args:
|
||||
resource (str): The resource to be added.
|
||||
"""
|
||||
self.resources.append(resource)
|
||||
|
||||
def add_performance_evaluation(self, evaluation: str) -> None:
|
||||
"""
|
||||
Add a performance evaluation item to the performance_evaluation list.
|
||||
|
||||
Args:
|
||||
evaluation (str): The evaluation item to be added.
|
||||
"""
|
||||
self.performance_evaluation.append(evaluation)
|
||||
|
||||
def _generate_numbered_list(self, items: list, item_type: str = "list") -> str:
|
||||
"""
|
||||
Generate a numbered list from given items based on the item_type.
|
||||
|
||||
Args:
|
||||
items (list): A list of items to be numbered.
|
||||
item_type (str, optional): The type of items in the list.
|
||||
Defaults to 'list'.
|
||||
|
||||
Returns:
|
||||
str: The formatted numbered list.
|
||||
"""
|
||||
if item_type == "command":
|
||||
command_strings = [
|
||||
f"{i + 1}. {self._generate_command_string(item)}"
|
||||
for i, item in enumerate(items)
|
||||
]
|
||||
finish_description = (
|
||||
"use this to signal that you have finished all your objectives"
|
||||
)
|
||||
finish_args = (
|
||||
'"response": "final response to let '
|
||||
'people know you have finished your objectives"'
|
||||
)
|
||||
finish_string = (
|
||||
f"{len(items) + 1}. {FINISH_NAME}: "
|
||||
f"{finish_description}, args: {finish_args}"
|
||||
)
|
||||
return "\n".join(command_strings + [finish_string])
|
||||
else:
|
||||
return "\n".join(f"{i+1}. {item}" for i, item in enumerate(items))
|
||||
|
||||
def generate_prompt_string(self) -> str:
|
||||
"""Generate a prompt string.
|
||||
|
||||
Returns:
|
||||
str: The generated prompt string.
|
||||
"""
|
||||
formatted_response_format = json.dumps(self.response_format, indent=4)
|
||||
prompt_string = (
|
||||
f"Constraints:\n{self._generate_numbered_list(self.constraints)}\n\n"
|
||||
f"Commands:\n"
|
||||
f"{self._generate_numbered_list(self.commands, item_type='command')}\n\n"
|
||||
f"Resources:\n{self._generate_numbered_list(self.resources)}\n\n"
|
||||
f"Performance Evaluation:\n"
|
||||
f"{self._generate_numbered_list(self.performance_evaluation)}\n\n"
|
||||
f"You should only respond in JSON format as described below "
|
||||
f"\nResponse Format: \n{formatted_response_format} "
|
||||
f"\nEnsure the response can be parsed by Python json.loads"
|
||||
)
|
||||
|
||||
return prompt_string
|
||||
|
||||
|
||||
def get_prompt(tools: List[BaseTool]) -> str:
|
||||
"""This function generates a prompt string.
|
||||
|
||||
It includes various constraints, commands, resources, and performance evaluations.
|
||||
|
||||
Returns:
|
||||
str: The generated prompt string.
|
||||
"""
|
||||
|
||||
# Initialize the PromptGenerator object
|
||||
prompt_generator = PromptGenerator()
|
||||
|
||||
# Add constraints to the PromptGenerator object
|
||||
prompt_generator.add_constraint(
|
||||
"~4000 word limit for short term memory. "
|
||||
"Your short term memory is short, "
|
||||
"so immediately save important information to files."
|
||||
)
|
||||
prompt_generator.add_constraint(
|
||||
"If you are unsure how you previously did something "
|
||||
"or want to recall past events, "
|
||||
"thinking about similar events will help you remember."
|
||||
)
|
||||
prompt_generator.add_constraint("No user assistance")
|
||||
prompt_generator.add_constraint(
|
||||
'Exclusively use the commands listed in double quotes e.g. "command name"'
|
||||
)
|
||||
|
||||
# Add commands to the PromptGenerator object
|
||||
for tool in tools:
|
||||
prompt_generator.add_tool(tool)
|
||||
|
||||
# Add resources to the PromptGenerator object
|
||||
prompt_generator.add_resource(
|
||||
"Internet access for searches and information gathering."
|
||||
)
|
||||
prompt_generator.add_resource("Long Term memory management.")
|
||||
prompt_generator.add_resource(
|
||||
"GPT-3.5 powered Agents for delegation of simple tasks."
|
||||
)
|
||||
prompt_generator.add_resource("File output.")
|
||||
|
||||
# Add performance evaluations to the PromptGenerator object
|
||||
prompt_generator.add_performance_evaluation(
|
||||
"Continuously review and analyze your actions "
|
||||
"to ensure you are performing to the best of your abilities."
|
||||
)
|
||||
prompt_generator.add_performance_evaluation(
|
||||
"Constructively self-criticize your big-picture behavior constantly."
|
||||
)
|
||||
prompt_generator.add_performance_evaluation(
|
||||
"Reflect on past decisions and strategies to refine your approach."
|
||||
)
|
||||
prompt_generator.add_performance_evaluation(
|
||||
"Every command has a cost, so be smart and efficient. "
|
||||
"Aim to complete tasks in the least number of steps."
|
||||
)
|
||||
|
||||
# Generate the prompt string
|
||||
prompt_string = prompt_generator.generate_prompt_string()
|
||||
|
||||
return prompt_string
|
||||
@@ -1,7 +1,9 @@
|
||||
import json
|
||||
from typing import Any, List, Optional, Sequence, Tuple
|
||||
|
||||
from langchain.agents.agent import Agent
|
||||
from pydantic import Field
|
||||
|
||||
from langchain.agents.agent import Agent, AgentOutputParser
|
||||
from langchain.agents.chat.output_parser import ChatOutputParser
|
||||
from langchain.agents.chat.prompt import FORMAT_INSTRUCTIONS, PREFIX, SUFFIX
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains.llm import LLMChain
|
||||
@@ -14,10 +16,10 @@ from langchain.prompts.chat import (
|
||||
from langchain.schema import AgentAction, BaseLanguageModel
|
||||
from langchain.tools import BaseTool
|
||||
|
||||
FINAL_ANSWER_ACTION = "Final Answer:"
|
||||
|
||||
|
||||
class ChatAgent(Agent):
|
||||
output_parser: AgentOutputParser = Field(default_factory=ChatOutputParser)
|
||||
|
||||
@property
|
||||
def observation_prefix(self) -> str:
|
||||
"""Prefix to append the observation with."""
|
||||
@@ -43,16 +45,9 @@ class ChatAgent(Agent):
|
||||
else:
|
||||
return agent_scratchpad
|
||||
|
||||
def _extract_tool_and_input(self, text: str) -> Optional[Tuple[str, str]]:
|
||||
if FINAL_ANSWER_ACTION in text:
|
||||
return "Final Answer", text.split(FINAL_ANSWER_ACTION)[-1].strip()
|
||||
try:
|
||||
_, action, _ = text.split("```")
|
||||
response = json.loads(action.strip())
|
||||
return response["action"], response["action_input"]
|
||||
|
||||
except Exception:
|
||||
raise ValueError(f"Could not parse LLM output: {text}")
|
||||
@classmethod
|
||||
def _get_default_output_parser(cls, **kwargs: Any) -> AgentOutputParser:
|
||||
return ChatOutputParser()
|
||||
|
||||
@property
|
||||
def _stop(self) -> List[str]:
|
||||
@@ -85,6 +80,7 @@ class ChatAgent(Agent):
|
||||
llm: BaseLanguageModel,
|
||||
tools: Sequence[BaseTool],
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
output_parser: Optional[AgentOutputParser] = None,
|
||||
prefix: str = PREFIX,
|
||||
suffix: str = SUFFIX,
|
||||
format_instructions: str = FORMAT_INSTRUCTIONS,
|
||||
@@ -106,7 +102,13 @@ class ChatAgent(Agent):
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
return cls(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
|
||||
_output_parser = output_parser or cls._get_default_output_parser()
|
||||
return cls(
|
||||
llm_chain=llm_chain,
|
||||
allowed_tools=tool_names,
|
||||
output_parser=_output_parser,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@property
|
||||
def _agent_type(self) -> str:
|
||||
|
||||
22
langchain/agents/chat/output_parser.py
Normal file
22
langchain/agents/chat/output_parser.py
Normal file
@@ -0,0 +1,22 @@
|
||||
import json
|
||||
from typing import Union
|
||||
|
||||
from langchain.agents.agent import AgentOutputParser
|
||||
from langchain.schema import AgentAction, AgentFinish
|
||||
|
||||
FINAL_ANSWER_ACTION = "Final Answer:"
|
||||
|
||||
|
||||
class ChatOutputParser(AgentOutputParser):
|
||||
def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
|
||||
if FINAL_ANSWER_ACTION in text:
|
||||
return AgentFinish(
|
||||
{"output": text.split(FINAL_ANSWER_ACTION)[-1].strip()}, text
|
||||
)
|
||||
try:
|
||||
_, action, _ = text.split("```")
|
||||
response = json.loads(action.strip())
|
||||
return AgentAction(response["action"], response["action_input"], text)
|
||||
|
||||
except Exception:
|
||||
raise ValueError(f"Could not parse LLM output: {text}")
|
||||
@@ -1,11 +1,13 @@
|
||||
"""An agent designed to hold a conversation in addition to using tools."""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, List, Optional, Sequence, Tuple
|
||||
from typing import Any, List, Optional, Sequence
|
||||
|
||||
from langchain.agents.agent import Agent
|
||||
from pydantic import Field
|
||||
|
||||
from langchain.agents.agent import Agent, AgentOutputParser
|
||||
from langchain.agents.agent_types import AgentType
|
||||
from langchain.agents.conversational.output_parser import ConvoOutputParser
|
||||
from langchain.agents.conversational.prompt import FORMAT_INSTRUCTIONS, PREFIX, SUFFIX
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains import LLMChain
|
||||
@@ -18,6 +20,13 @@ class ConversationalAgent(Agent):
|
||||
"""An agent designed to hold a conversation in addition to using tools."""
|
||||
|
||||
ai_prefix: str = "AI"
|
||||
output_parser: AgentOutputParser = Field(default_factory=ConvoOutputParser)
|
||||
|
||||
@classmethod
|
||||
def _get_default_output_parser(
|
||||
cls, ai_prefix: str = "AI", **kwargs: Any
|
||||
) -> AgentOutputParser:
|
||||
return ConvoOutputParser(ai_prefix=ai_prefix)
|
||||
|
||||
@property
|
||||
def _agent_type(self) -> str:
|
||||
@@ -71,28 +80,13 @@ class ConversationalAgent(Agent):
|
||||
input_variables = ["input", "chat_history", "agent_scratchpad"]
|
||||
return PromptTemplate(template=template, input_variables=input_variables)
|
||||
|
||||
@property
|
||||
def finish_tool_name(self) -> str:
|
||||
"""Name of the tool to use to finish the chain."""
|
||||
return self.ai_prefix
|
||||
|
||||
def _extract_tool_and_input(self, llm_output: str) -> Optional[Tuple[str, str]]:
|
||||
if f"{self.ai_prefix}:" in llm_output:
|
||||
return self.ai_prefix, llm_output.split(f"{self.ai_prefix}:")[-1].strip()
|
||||
regex = r"Action: (.*?)[\n]*Action Input: (.*)"
|
||||
match = re.search(regex, llm_output)
|
||||
if not match:
|
||||
raise ValueError(f"Could not parse LLM output: `{llm_output}`")
|
||||
action = match.group(1)
|
||||
action_input = match.group(2)
|
||||
return action.strip(), action_input.strip(" ").strip('"')
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_tools(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
tools: Sequence[BaseTool],
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
output_parser: Optional[AgentOutputParser] = None,
|
||||
prefix: str = PREFIX,
|
||||
suffix: str = SUFFIX,
|
||||
format_instructions: str = FORMAT_INSTRUCTIONS,
|
||||
@@ -118,6 +112,13 @@ class ConversationalAgent(Agent):
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
return cls(
|
||||
llm_chain=llm_chain, allowed_tools=tool_names, ai_prefix=ai_prefix, **kwargs
|
||||
_output_parser = output_parser or cls._get_default_output_parser(
|
||||
ai_prefix=ai_prefix
|
||||
)
|
||||
return cls(
|
||||
llm_chain=llm_chain,
|
||||
allowed_tools=tool_names,
|
||||
ai_prefix=ai_prefix,
|
||||
output_parser=_output_parser,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
22
langchain/agents/conversational/output_parser.py
Normal file
22
langchain/agents/conversational/output_parser.py
Normal file
@@ -0,0 +1,22 @@
|
||||
import re
|
||||
from typing import Union
|
||||
|
||||
from langchain.agents.agent import AgentOutputParser
|
||||
from langchain.schema import AgentAction, AgentFinish
|
||||
|
||||
|
||||
class ConvoOutputParser(AgentOutputParser):
|
||||
ai_prefix: str = "AI"
|
||||
|
||||
def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
|
||||
if f"{self.ai_prefix}:" in text:
|
||||
return AgentFinish(
|
||||
{"output": text.split(f"{self.ai_prefix}:")[-1].strip()}, text
|
||||
)
|
||||
regex = r"Action: (.*?)[\n]*Action Input: (.*)"
|
||||
match = re.search(regex, text)
|
||||
if not match:
|
||||
raise ValueError(f"Could not parse LLM output: `{text}`")
|
||||
action = match.group(1)
|
||||
action_input = match.group(2)
|
||||
return AgentAction(action.strip(), action_input.strip(" ").strip('"'), text)
|
||||
@@ -1,12 +1,13 @@
|
||||
"""An agent designed to hold a conversation in addition to using tools."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, List, Optional, Sequence, Tuple
|
||||
|
||||
from langchain.agents.agent import Agent
|
||||
from pydantic import Field
|
||||
|
||||
from langchain.agents.agent import Agent, AgentOutputParser
|
||||
from langchain.agents.conversational_chat.output_parser import ConvoOutputParser
|
||||
from langchain.agents.conversational_chat.prompt import (
|
||||
FORMAT_INSTRUCTIONS,
|
||||
PREFIX,
|
||||
SUFFIX,
|
||||
TEMPLATE_TOOL_RESPONSE,
|
||||
@@ -31,31 +32,14 @@ from langchain.schema import (
|
||||
from langchain.tools.base import BaseTool
|
||||
|
||||
|
||||
class AgentOutputParser(BaseOutputParser):
|
||||
def get_format_instructions(self) -> str:
|
||||
return FORMAT_INSTRUCTIONS
|
||||
|
||||
def parse(self, text: str) -> Any:
|
||||
cleaned_output = text.strip()
|
||||
if "```json" in cleaned_output:
|
||||
_, cleaned_output = cleaned_output.split("```json")
|
||||
if "```" in cleaned_output:
|
||||
cleaned_output, _ = cleaned_output.split("```")
|
||||
if cleaned_output.startswith("```json"):
|
||||
cleaned_output = cleaned_output[len("```json") :]
|
||||
if cleaned_output.startswith("```"):
|
||||
cleaned_output = cleaned_output[len("```") :]
|
||||
if cleaned_output.endswith("```"):
|
||||
cleaned_output = cleaned_output[: -len("```")]
|
||||
cleaned_output = cleaned_output.strip()
|
||||
response = json.loads(cleaned_output)
|
||||
return {"action": response["action"], "action_input": response["action_input"]}
|
||||
|
||||
|
||||
class ConversationalChatAgent(Agent):
|
||||
"""An agent designed to hold a conversation in addition to using tools."""
|
||||
|
||||
output_parser: BaseOutputParser
|
||||
output_parser: AgentOutputParser = Field(default_factory=ConvoOutputParser)
|
||||
|
||||
@classmethod
|
||||
def _get_default_output_parser(cls, **kwargs: Any) -> AgentOutputParser:
|
||||
return ConvoOutputParser()
|
||||
|
||||
@property
|
||||
def _agent_type(self) -> str:
|
||||
@@ -84,7 +68,7 @@ class ConversationalChatAgent(Agent):
|
||||
[f"> {tool.name}: {tool.description}" for tool in tools]
|
||||
)
|
||||
tool_names = ", ".join([tool.name for tool in tools])
|
||||
_output_parser = output_parser or AgentOutputParser()
|
||||
_output_parser = output_parser or cls._get_default_output_parser()
|
||||
format_instructions = human_message.format(
|
||||
format_instructions=_output_parser.get_format_instructions()
|
||||
)
|
||||
@@ -101,13 +85,6 @@ class ConversationalChatAgent(Agent):
|
||||
]
|
||||
return ChatPromptTemplate(input_variables=input_variables, messages=messages)
|
||||
|
||||
def _extract_tool_and_input(self, llm_output: str) -> Optional[Tuple[str, str]]:
|
||||
try:
|
||||
response = self.output_parser.parse(llm_output)
|
||||
return response["action"], response["action_input"]
|
||||
except Exception:
|
||||
raise ValueError(f"Could not parse LLM output: {llm_output}")
|
||||
|
||||
def _construct_scratchpad(
|
||||
self, intermediate_steps: List[Tuple[AgentAction, str]]
|
||||
) -> List[BaseMessage]:
|
||||
@@ -127,15 +104,15 @@ class ConversationalChatAgent(Agent):
|
||||
llm: BaseLanguageModel,
|
||||
tools: Sequence[BaseTool],
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
output_parser: Optional[AgentOutputParser] = None,
|
||||
system_message: str = PREFIX,
|
||||
human_message: str = SUFFIX,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
output_parser: Optional[BaseOutputParser] = None,
|
||||
**kwargs: Any,
|
||||
) -> Agent:
|
||||
"""Construct an agent from an LLM and tools."""
|
||||
cls._validate_tools(tools)
|
||||
_output_parser = output_parser or AgentOutputParser()
|
||||
_output_parser = output_parser or cls._get_default_output_parser()
|
||||
prompt = cls.create_prompt(
|
||||
tools,
|
||||
system_message=system_message,
|
||||
|
||||
33
langchain/agents/conversational_chat/output_parser.py
Normal file
33
langchain/agents/conversational_chat/output_parser.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Union
|
||||
|
||||
from langchain.agents import AgentOutputParser
|
||||
from langchain.agents.conversational_chat.prompt import FORMAT_INSTRUCTIONS
|
||||
from langchain.schema import AgentAction, AgentFinish
|
||||
|
||||
|
||||
class ConvoOutputParser(AgentOutputParser):
|
||||
def get_format_instructions(self) -> str:
|
||||
return FORMAT_INSTRUCTIONS
|
||||
|
||||
def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
|
||||
cleaned_output = text.strip()
|
||||
if "```json" in cleaned_output:
|
||||
_, cleaned_output = cleaned_output.split("```json")
|
||||
if "```" in cleaned_output:
|
||||
cleaned_output, _ = cleaned_output.split("```")
|
||||
if cleaned_output.startswith("```json"):
|
||||
cleaned_output = cleaned_output[len("```json") :]
|
||||
if cleaned_output.startswith("```"):
|
||||
cleaned_output = cleaned_output[len("```") :]
|
||||
if cleaned_output.endswith("```"):
|
||||
cleaned_output = cleaned_output[: -len("```")]
|
||||
cleaned_output = cleaned_output.strip()
|
||||
response = json.loads(cleaned_output)
|
||||
action, action_input = response["action"], response["action_input"]
|
||||
if action == "Final Answer":
|
||||
return AgentFinish({"output": action_input}, text)
|
||||
else:
|
||||
return AgentAction(action, action_input, text)
|
||||
@@ -1,11 +1,11 @@
|
||||
"""Functionality for loading agents."""
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Optional, Union
|
||||
from typing import Any, Dict, List, Optional, Type, Union
|
||||
|
||||
import yaml
|
||||
|
||||
from langchain.agents.agent import Agent
|
||||
from langchain.agents.agent import BaseSingleActionAgent
|
||||
from langchain.agents.agent_types import AgentType
|
||||
from langchain.agents.chat.base import ChatAgent
|
||||
from langchain.agents.conversational.base import ConversationalAgent
|
||||
@@ -18,7 +18,7 @@ from langchain.chains.loading import load_chain, load_chain_from_config
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.utilities.loading import try_load_from_hub
|
||||
|
||||
AGENT_TO_CLASS = {
|
||||
AGENT_TO_CLASS: Dict[AgentType, Type[BaseSingleActionAgent]] = {
|
||||
AgentType.ZERO_SHOT_REACT_DESCRIPTION: ZeroShotAgent,
|
||||
AgentType.REACT_DOCSTORE: ReActDocstoreAgent,
|
||||
AgentType.SELF_ASK_WITH_SEARCH: SelfAskWithSearchAgent,
|
||||
@@ -32,7 +32,7 @@ URL_BASE = "https://raw.githubusercontent.com/hwchase17/langchain-hub/master/age
|
||||
|
||||
def _load_agent_from_tools(
|
||||
config: dict, llm: BaseLLM, tools: List[Tool], **kwargs: Any
|
||||
) -> Agent:
|
||||
) -> BaseSingleActionAgent:
|
||||
config_type = config.pop("_type")
|
||||
if config_type not in AGENT_TO_CLASS:
|
||||
raise ValueError(f"Loading {config_type} agent not supported")
|
||||
@@ -49,7 +49,7 @@ def load_agent_from_config(
|
||||
llm: Optional[BaseLLM] = None,
|
||||
tools: Optional[List[Tool]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Agent:
|
||||
) -> BaseSingleActionAgent:
|
||||
"""Load agent from Config Dict."""
|
||||
if "_type" not in config:
|
||||
raise ValueError("Must specify an agent Type in config")
|
||||
@@ -82,7 +82,7 @@ def load_agent_from_config(
|
||||
return agent_cls(**combined_config) # type: ignore
|
||||
|
||||
|
||||
def load_agent(path: Union[str, Path], **kwargs: Any) -> Agent:
|
||||
def load_agent(path: Union[str, Path], **kwargs: Any) -> BaseSingleActionAgent:
|
||||
"""Unified method for loading a agent from LangChainHub or local fs."""
|
||||
if hub_result := try_load_from_hub(
|
||||
path, _load_agent_from_file, "agents", {"json", "yaml"}
|
||||
@@ -92,7 +92,9 @@ def load_agent(path: Union[str, Path], **kwargs: Any) -> Agent:
|
||||
return _load_agent_from_file(path, **kwargs)
|
||||
|
||||
|
||||
def _load_agent_from_file(file: Union[str, Path], **kwargs: Any) -> Agent:
|
||||
def _load_agent_from_file(
|
||||
file: Union[str, Path], **kwargs: Any
|
||||
) -> BaseSingleActionAgent:
|
||||
"""Load agent from file."""
|
||||
# Convert file to Path object.
|
||||
if isinstance(file, str):
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
"""Attempt to implement MRKL systems as described in arxiv.org/pdf/2205.00445.pdf."""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, Callable, List, NamedTuple, Optional, Sequence, Tuple
|
||||
from typing import Any, Callable, List, NamedTuple, Optional, Sequence
|
||||
|
||||
from langchain.agents.agent import Agent, AgentExecutor
|
||||
from pydantic import Field
|
||||
|
||||
from langchain.agents.agent import Agent, AgentExecutor, AgentOutputParser
|
||||
from langchain.agents.agent_types import AgentType
|
||||
from langchain.agents.mrkl.output_parser import MRKLOutputParser
|
||||
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS, PREFIX, SUFFIX
|
||||
from langchain.agents.tools import Tool
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
@@ -14,8 +16,6 @@ from langchain.prompts import PromptTemplate
|
||||
from langchain.schema import BaseLanguageModel
|
||||
from langchain.tools.base import BaseTool
|
||||
|
||||
FINAL_ANSWER_ACTION = "Final Answer:"
|
||||
|
||||
|
||||
class ChainConfig(NamedTuple):
|
||||
"""Configuration for chain to use in MRKL system.
|
||||
@@ -31,29 +31,15 @@ class ChainConfig(NamedTuple):
|
||||
action_description: str
|
||||
|
||||
|
||||
def get_action_and_input(llm_output: str) -> Tuple[str, str]:
|
||||
"""Parse out the action and input from the LLM output.
|
||||
|
||||
Note: if you're specifying a custom prompt for the ZeroShotAgent,
|
||||
you will need to ensure that it meets the following Regex requirements.
|
||||
The string starting with "Action:" and the following string starting
|
||||
with "Action Input:" should be separated by a newline.
|
||||
"""
|
||||
if FINAL_ANSWER_ACTION in llm_output:
|
||||
return "Final Answer", llm_output.split(FINAL_ANSWER_ACTION)[-1].strip()
|
||||
# \s matches against tab/newline/whitespace
|
||||
regex = r"Action: (.*?)[\n]*Action Input:[\s]*(.*)"
|
||||
match = re.search(regex, llm_output, re.DOTALL)
|
||||
if not match:
|
||||
raise ValueError(f"Could not parse LLM output: `{llm_output}`")
|
||||
action = match.group(1).strip()
|
||||
action_input = match.group(2)
|
||||
return action, action_input.strip(" ").strip('"')
|
||||
|
||||
|
||||
class ZeroShotAgent(Agent):
|
||||
"""Agent for the MRKL chain."""
|
||||
|
||||
output_parser: AgentOutputParser = Field(default_factory=MRKLOutputParser)
|
||||
|
||||
@classmethod
|
||||
def _get_default_output_parser(cls, **kwargs: Any) -> AgentOutputParser:
|
||||
return MRKLOutputParser()
|
||||
|
||||
@property
|
||||
def _agent_type(self) -> str:
|
||||
"""Return Identifier of agent type."""
|
||||
@@ -104,6 +90,7 @@ class ZeroShotAgent(Agent):
|
||||
llm: BaseLanguageModel,
|
||||
tools: Sequence[BaseTool],
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
output_parser: Optional[AgentOutputParser] = None,
|
||||
prefix: str = PREFIX,
|
||||
suffix: str = SUFFIX,
|
||||
format_instructions: str = FORMAT_INSTRUCTIONS,
|
||||
@@ -125,7 +112,13 @@ class ZeroShotAgent(Agent):
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
return cls(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
|
||||
_output_parser = output_parser or cls._get_default_output_parser()
|
||||
return cls(
|
||||
llm_chain=llm_chain,
|
||||
allowed_tools=tool_names,
|
||||
output_parser=_output_parser,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _validate_tools(cls, tools: Sequence[BaseTool]) -> None:
|
||||
@@ -136,9 +129,6 @@ class ZeroShotAgent(Agent):
|
||||
f"a description must always be provided."
|
||||
)
|
||||
|
||||
def _extract_tool_and_input(self, text: str) -> Optional[Tuple[str, str]]:
|
||||
return get_action_and_input(text)
|
||||
|
||||
|
||||
class MRKLChain(AgentExecutor):
|
||||
"""Chain that implements the MRKL system.
|
||||
|
||||
23
langchain/agents/mrkl/output_parser.py
Normal file
23
langchain/agents/mrkl/output_parser.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import re
|
||||
from typing import Union
|
||||
|
||||
from langchain.agents.agent import AgentOutputParser
|
||||
from langchain.schema import AgentAction, AgentFinish
|
||||
|
||||
FINAL_ANSWER_ACTION = "Final Answer:"
|
||||
|
||||
|
||||
class MRKLOutputParser(AgentOutputParser):
|
||||
def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
|
||||
if FINAL_ANSWER_ACTION in text:
|
||||
return AgentFinish(
|
||||
{"output": text.split(FINAL_ANSWER_ACTION)[-1].strip()}, text
|
||||
)
|
||||
# \s matches against tab/newline/whitespace
|
||||
regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
|
||||
match = re.search(regex, text, re.DOTALL)
|
||||
if not match:
|
||||
raise ValueError(f"Could not parse LLM output: `{text}`")
|
||||
action = match.group(1).strip()
|
||||
action_input = match.group(2)
|
||||
return AgentAction(action, action_input.strip(" ").strip('"'), text)
|
||||
@@ -1,9 +1,11 @@
|
||||
"""Chain that implements the ReAct paper from https://arxiv.org/pdf/2210.03629.pdf."""
|
||||
import re
|
||||
from typing import Any, List, Optional, Sequence, Tuple
|
||||
from typing import Any, List, Optional, Sequence
|
||||
|
||||
from langchain.agents.agent import Agent, AgentExecutor
|
||||
from pydantic import Field
|
||||
|
||||
from langchain.agents.agent import Agent, AgentExecutor, AgentOutputParser
|
||||
from langchain.agents.agent_types import AgentType
|
||||
from langchain.agents.react.output_parser import ReActOutputParser
|
||||
from langchain.agents.react.textworld_prompt import TEXTWORLD_PROMPT
|
||||
from langchain.agents.react.wiki_prompt import WIKI_PROMPT
|
||||
from langchain.agents.tools import Tool
|
||||
@@ -17,6 +19,12 @@ from langchain.tools.base import BaseTool
|
||||
class ReActDocstoreAgent(Agent):
|
||||
"""Agent for the ReAct chain."""
|
||||
|
||||
output_parser: AgentOutputParser = Field(default_factory=ReActOutputParser)
|
||||
|
||||
@classmethod
|
||||
def _get_default_output_parser(cls, **kwargs: Any) -> AgentOutputParser:
|
||||
return ReActOutputParser()
|
||||
|
||||
@property
|
||||
def _agent_type(self) -> str:
|
||||
"""Return Identifier of agent type."""
|
||||
@@ -37,27 +45,6 @@ class ReActDocstoreAgent(Agent):
|
||||
f"Tool names should be Lookup and Search, got {tool_names}"
|
||||
)
|
||||
|
||||
def _fix_text(self, text: str) -> str:
|
||||
return text + "\nAction:"
|
||||
|
||||
def _extract_tool_and_input(self, text: str) -> Optional[Tuple[str, str]]:
|
||||
action_prefix = "Action: "
|
||||
if not text.strip().split("\n")[-1].startswith(action_prefix):
|
||||
return None
|
||||
action_block = text.strip().split("\n")[-1]
|
||||
|
||||
action_str = action_block[len(action_prefix) :]
|
||||
# Parse out the action and the directive.
|
||||
re_matches = re.search(r"(.*?)\[(.*?)\]", action_str)
|
||||
if re_matches is None:
|
||||
raise ValueError(f"Could not parse action directive: {action_str}")
|
||||
return re_matches.group(1), re_matches.group(2)
|
||||
|
||||
@property
|
||||
def finish_tool_name(self) -> str:
|
||||
"""Name of the tool of when to finish the chain."""
|
||||
return "Finish"
|
||||
|
||||
@property
|
||||
def observation_prefix(self) -> str:
|
||||
"""Prefix to append the observation with."""
|
||||
|
||||
24
langchain/agents/react/output_parser.py
Normal file
24
langchain/agents/react/output_parser.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import re
|
||||
from typing import Union
|
||||
|
||||
from langchain.agents.agent import AgentOutputParser
|
||||
from langchain.schema import AgentAction, AgentFinish
|
||||
|
||||
|
||||
class ReActOutputParser(AgentOutputParser):
|
||||
def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
|
||||
action_prefix = "Action: "
|
||||
if not text.strip().split("\n")[-1].startswith(action_prefix):
|
||||
raise ValueError(f"Could not parse LLM Output: {text}")
|
||||
action_block = text.strip().split("\n")[-1]
|
||||
|
||||
action_str = action_block[len(action_prefix) :]
|
||||
# Parse out the action and the directive.
|
||||
re_matches = re.search(r"(.*?)\[(.*?)\]", action_str)
|
||||
if re_matches is None:
|
||||
raise ValueError(f"Could not parse action directive: {action_str}")
|
||||
action, action_input = re_matches.group(1), re_matches.group(2)
|
||||
if action == "Finish":
|
||||
return AgentFinish({"output": action_input}, text)
|
||||
else:
|
||||
return AgentAction(action, action_input, text)
|
||||
28
langchain/agents/schema.py
Normal file
28
langchain/agents/schema.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
from langchain.prompts.chat import ChatPromptTemplate
|
||||
from langchain.schema import AgentAction
|
||||
|
||||
|
||||
class AgentScratchPadChatPromptTemplate(ChatPromptTemplate):
|
||||
def _construct_agent_scratchpad(
|
||||
self, intermediate_steps: List[Tuple[AgentAction, str]]
|
||||
) -> str:
|
||||
if len(intermediate_steps) == 0:
|
||||
return ""
|
||||
thoughts = ""
|
||||
for action, observation in intermediate_steps:
|
||||
thoughts += action.log
|
||||
thoughts += f"\nObservation: {observation}\nThought: "
|
||||
return (
|
||||
f"This was your previous work "
|
||||
f"(but I haven't seen any of it! I only see what "
|
||||
f"you return as final answer):\n{thoughts}"
|
||||
)
|
||||
|
||||
def _merge_partial_and_user_variables(self, **kwargs: Any) -> Dict[str, Any]:
|
||||
intermediate_steps = kwargs.pop("intermediate_steps")
|
||||
kwargs["agent_scratchpad"] = self._construct_agent_scratchpad(
|
||||
intermediate_steps
|
||||
)
|
||||
return kwargs
|
||||
@@ -1,8 +1,11 @@
|
||||
"""Chain that does self ask with search."""
|
||||
from typing import Any, Optional, Sequence, Tuple, Union
|
||||
from typing import Any, Sequence, Union
|
||||
|
||||
from langchain.agents.agent import Agent, AgentExecutor
|
||||
from pydantic import Field
|
||||
|
||||
from langchain.agents.agent import Agent, AgentExecutor, AgentOutputParser
|
||||
from langchain.agents.agent_types import AgentType
|
||||
from langchain.agents.self_ask_with_search.output_parser import SelfAskOutputParser
|
||||
from langchain.agents.self_ask_with_search.prompt import PROMPT
|
||||
from langchain.agents.tools import Tool
|
||||
from langchain.llms.base import BaseLLM
|
||||
@@ -15,6 +18,12 @@ from langchain.utilities.serpapi import SerpAPIWrapper
|
||||
class SelfAskWithSearchAgent(Agent):
|
||||
"""Agent for the self-ask-with-search paper."""
|
||||
|
||||
output_parser: AgentOutputParser = Field(default_factory=SelfAskOutputParser)
|
||||
|
||||
@classmethod
|
||||
def _get_default_output_parser(cls, **kwargs: Any) -> AgentOutputParser:
|
||||
return SelfAskOutputParser()
|
||||
|
||||
@property
|
||||
def _agent_type(self) -> str:
|
||||
"""Return Identifier of agent type."""
|
||||
@@ -35,26 +44,6 @@ class SelfAskWithSearchAgent(Agent):
|
||||
f"Tool name should be Intermediate Answer, got {tool_names}"
|
||||
)
|
||||
|
||||
def _extract_tool_and_input(self, text: str) -> Optional[Tuple[str, str]]:
|
||||
followup = "Follow up:"
|
||||
last_line = text.split("\n")[-1]
|
||||
|
||||
if followup not in last_line:
|
||||
finish_string = "So the final answer is: "
|
||||
if finish_string not in last_line:
|
||||
return None
|
||||
return "Final Answer", last_line[len(finish_string) :]
|
||||
|
||||
after_colon = text.split(":")[-1]
|
||||
|
||||
if " " == after_colon[0]:
|
||||
after_colon = after_colon[1:]
|
||||
|
||||
return "Intermediate Answer", after_colon
|
||||
|
||||
def _fix_text(self, text: str) -> str:
|
||||
return f"{text}\nSo the final answer is:"
|
||||
|
||||
@property
|
||||
def observation_prefix(self) -> str:
|
||||
"""Prefix to append the observation with."""
|
||||
@@ -65,11 +54,6 @@ class SelfAskWithSearchAgent(Agent):
|
||||
"""Prefix to append the LLM call with."""
|
||||
return ""
|
||||
|
||||
@property
|
||||
def starter_string(self) -> str:
|
||||
"""Put this string after user input but before first LLM call."""
|
||||
return "Are follow up questions needed here:"
|
||||
|
||||
|
||||
class SelfAskWithSearchChain(AgentExecutor):
|
||||
"""Chain that does self ask with search.
|
||||
|
||||
22
langchain/agents/self_ask_with_search/output_parser.py
Normal file
22
langchain/agents/self_ask_with_search/output_parser.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from typing import Union
|
||||
|
||||
from langchain.agents.agent import AgentOutputParser
|
||||
from langchain.schema import AgentAction, AgentFinish
|
||||
|
||||
|
||||
class SelfAskOutputParser(AgentOutputParser):
|
||||
def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
|
||||
followup = "Follow up:"
|
||||
last_line = text.split("\n")[-1]
|
||||
|
||||
if followup not in last_line:
|
||||
finish_string = "So the final answer is: "
|
||||
if finish_string not in last_line:
|
||||
raise ValueError(f"Could not parse output: {text}")
|
||||
return AgentFinish({"output": last_line[len(finish_string) :]}, text)
|
||||
|
||||
after_colon = text.split(":")[-1]
|
||||
|
||||
if " " == after_colon[0]:
|
||||
after_colon = after_colon[1:]
|
||||
return AgentAction("Intermediate Answer", after_colon, text)
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Beta Feature: base interface for cache."""
|
||||
import json
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
|
||||
from sqlalchemy import Column, Integer, String, create_engine, select
|
||||
from sqlalchemy.engine.base import Engine
|
||||
@@ -137,3 +138,125 @@ class RedisCache(BaseCache):
|
||||
"""Update cache based on prompt and llm_string."""
|
||||
for i, generation in enumerate(return_val):
|
||||
self.redis.set(self._key(prompt, llm_string, i), generation.text)
|
||||
|
||||
|
||||
class GPTCache(BaseCache):
|
||||
"""Cache that uses GPTCache as a backend."""
|
||||
|
||||
def __init__(self, init_func: Callable[[Any], None]):
|
||||
"""Initialize by passing in the `init` GPTCache func
|
||||
|
||||
Args:
|
||||
init_func (Callable[[Any], None]): init `GPTCache` function
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
import gptcache
|
||||
from gptcache.processor.pre import get_prompt
|
||||
from gptcache.manager.factory import get_data_manager
|
||||
|
||||
# Avoid multiple caches using the same file,
|
||||
causing different llm model caches to affect each other
|
||||
i = 0
|
||||
file_prefix = "data_map"
|
||||
|
||||
def init_gptcache_map(cache_obj: gptcache.Cache):
|
||||
nonlocal i
|
||||
cache_path = f'{file_prefix}_{i}.txt'
|
||||
cache_obj.init(
|
||||
pre_embedding_func=get_prompt,
|
||||
data_manager=get_data_manager(data_path=cache_path),
|
||||
)
|
||||
i += 1
|
||||
|
||||
langchain.llm_cache = GPTCache(init_gptcache_map)
|
||||
|
||||
"""
|
||||
try:
|
||||
import gptcache # noqa: F401
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import gptcache python package. "
|
||||
"Please install it with `pip install gptcache`."
|
||||
)
|
||||
self.init_gptcache_func: Callable[[Any], None] = init_func
|
||||
self.gptcache_dict: Dict[str, Any] = {}
|
||||
|
||||
@staticmethod
|
||||
def _update_cache_callback_none(*_: Any, **__: Any) -> None:
|
||||
"""When updating cached data, do nothing.
|
||||
|
||||
Because currently only cached queries are processed."""
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _llm_handle_none(*_: Any, **__: Any) -> None:
|
||||
"""Do nothing on a cache miss"""
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _cache_data_converter(data: str) -> RETURN_VAL_TYPE:
|
||||
"""Convert the `data` in the cache to the `RETURN_VAL_TYPE` data format."""
|
||||
return [Generation(**generation_dict) for generation_dict in json.loads(data)]
|
||||
|
||||
def _get_gptcache(self, llm_string: str) -> Any:
|
||||
"""Get a cache object.
|
||||
|
||||
When the corresponding llm model cache does not exist, it will be created."""
|
||||
from gptcache import Cache
|
||||
|
||||
_gptcache = self.gptcache_dict.get(llm_string, None)
|
||||
if _gptcache is None:
|
||||
_gptcache = Cache()
|
||||
self.init_gptcache_func(_gptcache)
|
||||
self.gptcache_dict[llm_string] = _gptcache
|
||||
return _gptcache
|
||||
|
||||
def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
|
||||
"""Look up the cache data.
|
||||
First, retrieve the corresponding cache object using the `llm_string` parameter,
|
||||
and then retrieve the data from the cache based on the `prompt`.
|
||||
"""
|
||||
from gptcache.adapter.adapter import adapt
|
||||
|
||||
_gptcache = self.gptcache_dict.get(llm_string)
|
||||
if _gptcache is None:
|
||||
return None
|
||||
res = adapt(
|
||||
GPTCache._llm_handle_none,
|
||||
GPTCache._cache_data_converter,
|
||||
GPTCache._update_cache_callback_none,
|
||||
cache_obj=_gptcache,
|
||||
prompt=prompt,
|
||||
)
|
||||
return res
|
||||
|
||||
@staticmethod
|
||||
def _update_cache_callback(
|
||||
llm_data: RETURN_VAL_TYPE, update_cache_func: Callable[[Any], None]
|
||||
) -> None:
|
||||
"""Save the `llm_data` to cache storage"""
|
||||
handled_data = json.dumps([generation.dict() for generation in llm_data])
|
||||
update_cache_func(handled_data)
|
||||
|
||||
def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
|
||||
"""Update cache.
|
||||
First, retrieve the corresponding cache object using the `llm_string` parameter,
|
||||
and then store the `prompt` and `return_val` in the cache object.
|
||||
"""
|
||||
from gptcache.adapter.adapter import adapt
|
||||
|
||||
_gptcache = self._get_gptcache(llm_string)
|
||||
|
||||
def llm_handle(*_: Any, **__: Any) -> RETURN_VAL_TYPE:
|
||||
return return_val
|
||||
|
||||
return adapt(
|
||||
llm_handle,
|
||||
GPTCache._cache_data_converter,
|
||||
GPTCache._update_cache_callback,
|
||||
cache_obj=_gptcache,
|
||||
cache_skip=True,
|
||||
prompt=prompt,
|
||||
)
|
||||
|
||||
@@ -11,6 +11,7 @@ from langchain.callbacks.base import (
|
||||
CallbackManager,
|
||||
)
|
||||
from langchain.callbacks.clearml_callback import ClearMLCallbackHandler
|
||||
from langchain.callbacks.comet_ml_callback import CometCallbackHandler
|
||||
from langchain.callbacks.openai_info import OpenAICallbackHandler
|
||||
from langchain.callbacks.shared import SharedCallbackManager
|
||||
from langchain.callbacks.stdout import StdOutCallbackHandler
|
||||
@@ -78,6 +79,7 @@ __all__ = [
|
||||
"AimCallbackHandler",
|
||||
"WandbCallbackHandler",
|
||||
"ClearMLCallbackHandler",
|
||||
"CometCallbackHandler",
|
||||
"AsyncIteratorCallbackHandler",
|
||||
"get_openai_callback",
|
||||
"set_tracing_callback_manager",
|
||||
|
||||
625
langchain/callbacks/comet_ml_callback.py
Normal file
625
langchain/callbacks/comet_ml_callback.py
Normal file
@@ -0,0 +1,625 @@
|
||||
import tempfile
|
||||
from copy import deepcopy
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Sequence, Union
|
||||
|
||||
import langchain
|
||||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
from langchain.callbacks.utils import (
|
||||
BaseMetadataCallbackHandler,
|
||||
flatten_dict,
|
||||
import_pandas,
|
||||
import_spacy,
|
||||
import_textstat,
|
||||
)
|
||||
from langchain.schema import AgentAction, AgentFinish, Generation, LLMResult
|
||||
|
||||
LANGCHAIN_MODEL_NAME = "langchain-model"
|
||||
|
||||
|
||||
def import_comet_ml() -> Any:
|
||||
try:
|
||||
import comet_ml # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"To use the comet_ml callback manager you need to have the "
|
||||
"`comet_ml` python package installed. Please install it with"
|
||||
" `pip install comet_ml`"
|
||||
)
|
||||
return comet_ml
|
||||
|
||||
|
||||
def _get_experiment(
|
||||
workspace: Optional[str] = None, project_name: Optional[str] = None
|
||||
) -> Any:
|
||||
comet_ml = import_comet_ml()
|
||||
|
||||
experiment = comet_ml.Experiment( # type: ignore
|
||||
workspace=workspace,
|
||||
project_name=project_name,
|
||||
)
|
||||
|
||||
return experiment
|
||||
|
||||
|
||||
def _fetch_text_complexity_metrics(text: str) -> dict:
|
||||
textstat = import_textstat()
|
||||
text_complexity_metrics = {
|
||||
"flesch_reading_ease": textstat.flesch_reading_ease(text),
|
||||
"flesch_kincaid_grade": textstat.flesch_kincaid_grade(text),
|
||||
"smog_index": textstat.smog_index(text),
|
||||
"coleman_liau_index": textstat.coleman_liau_index(text),
|
||||
"automated_readability_index": textstat.automated_readability_index(text),
|
||||
"dale_chall_readability_score": textstat.dale_chall_readability_score(text),
|
||||
"difficult_words": textstat.difficult_words(text),
|
||||
"linsear_write_formula": textstat.linsear_write_formula(text),
|
||||
"gunning_fog": textstat.gunning_fog(text),
|
||||
"text_standard": textstat.text_standard(text),
|
||||
"fernandez_huerta": textstat.fernandez_huerta(text),
|
||||
"szigriszt_pazos": textstat.szigriszt_pazos(text),
|
||||
"gutierrez_polini": textstat.gutierrez_polini(text),
|
||||
"crawford": textstat.crawford(text),
|
||||
"gulpease_index": textstat.gulpease_index(text),
|
||||
"osman": textstat.osman(text),
|
||||
}
|
||||
return text_complexity_metrics
|
||||
|
||||
|
||||
def _summarize_metrics_for_generated_outputs(metrics: Sequence) -> dict:
|
||||
pd = import_pandas()
|
||||
metrics_df = pd.DataFrame(metrics)
|
||||
metrics_summary = metrics_df.describe()
|
||||
|
||||
return metrics_summary.to_dict()
|
||||
|
||||
|
||||
class CometCallbackHandler(BaseMetadataCallbackHandler, BaseCallbackHandler):
|
||||
"""Callback Handler that logs to Comet.
|
||||
|
||||
Parameters:
|
||||
job_type (str): The type of comet_ml task such as "inference",
|
||||
"testing" or "qc"
|
||||
project_name (str): The comet_ml project name
|
||||
tags (list): Tags to add to the task
|
||||
task_name (str): Name of the comet_ml task
|
||||
visualize (bool): Whether to visualize the run.
|
||||
complexity_metrics (bool): Whether to log complexity metrics
|
||||
stream_logs (bool): Whether to stream callback actions to Comet
|
||||
|
||||
This handler will utilize the associated callback method and formats
|
||||
the input of each callback function with metadata regarding the state of LLM run,
|
||||
and adds the response to the list of records for both the {method}_records and
|
||||
action. It then logs the response to Comet.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
task_type: Optional[str] = "inference",
|
||||
workspace: Optional[str] = None,
|
||||
project_name: Optional[str] = "comet-langchain-demo",
|
||||
tags: Optional[Sequence] = None,
|
||||
name: Optional[str] = None,
|
||||
visualizations: Optional[List[str]] = None,
|
||||
complexity_metrics: bool = False,
|
||||
custom_metrics: Optional[Callable] = None,
|
||||
stream_logs: bool = True,
|
||||
) -> None:
|
||||
"""Initialize callback handler."""
|
||||
|
||||
comet_ml = import_comet_ml()
|
||||
super().__init__()
|
||||
|
||||
self.task_type = task_type
|
||||
self.workspace = workspace
|
||||
self.project_name = project_name
|
||||
self.tags = tags
|
||||
self.visualizations = visualizations
|
||||
self.complexity_metrics = complexity_metrics
|
||||
self.custom_metrics = custom_metrics
|
||||
self.stream_logs = stream_logs
|
||||
self.temp_dir = tempfile.TemporaryDirectory()
|
||||
|
||||
self.experiment = _get_experiment(workspace, project_name)
|
||||
self.experiment.log_other("Created from", "langchain")
|
||||
if tags:
|
||||
self.experiment.add_tags(tags)
|
||||
self.name = name
|
||||
if self.name:
|
||||
self.experiment.set_name(self.name)
|
||||
|
||||
warning = (
|
||||
"The comet_ml callback is currently in beta and is subject to change "
|
||||
"based on updates to `langchain`. Please report any issues to "
|
||||
"https://github.com/comet-ml/issue_tracking/issues with the tag "
|
||||
"`langchain`."
|
||||
)
|
||||
comet_ml.LOGGER.warning(warning)
|
||||
|
||||
self.callback_columns: list = []
|
||||
self.action_records: list = []
|
||||
self.complexity_metrics = complexity_metrics
|
||||
if self.visualizations:
|
||||
spacy = import_spacy()
|
||||
self.nlp = spacy.load("en_core_web_sm")
|
||||
else:
|
||||
self.nlp = None
|
||||
|
||||
def _init_resp(self) -> Dict:
|
||||
return {k: None for k in self.callback_columns}
|
||||
|
||||
def on_llm_start(
|
||||
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when LLM starts."""
|
||||
self.step += 1
|
||||
self.llm_starts += 1
|
||||
self.starts += 1
|
||||
|
||||
metadata = self._init_resp()
|
||||
metadata.update({"action": "on_llm_start"})
|
||||
metadata.update(flatten_dict(serialized))
|
||||
metadata.update(self.get_custom_callback_meta())
|
||||
|
||||
for prompt in prompts:
|
||||
prompt_resp = deepcopy(metadata)
|
||||
prompt_resp["prompts"] = prompt
|
||||
self.on_llm_start_records.append(prompt_resp)
|
||||
self.action_records.append(prompt_resp)
|
||||
|
||||
if self.stream_logs:
|
||||
self._log_stream(prompt, metadata, self.step)
|
||||
|
||||
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
||||
"""Run when LLM generates a new token."""
|
||||
self.step += 1
|
||||
self.llm_streams += 1
|
||||
|
||||
resp = self._init_resp()
|
||||
resp.update({"action": "on_llm_new_token", "token": token})
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
|
||||
self.action_records.append(resp)
|
||||
|
||||
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
||||
"""Run when LLM ends running."""
|
||||
self.step += 1
|
||||
self.llm_ends += 1
|
||||
self.ends += 1
|
||||
|
||||
metadata = self._init_resp()
|
||||
metadata.update({"action": "on_llm_end"})
|
||||
metadata.update(flatten_dict(response.llm_output or {}))
|
||||
metadata.update(self.get_custom_callback_meta())
|
||||
|
||||
output_complexity_metrics = []
|
||||
output_custom_metrics = []
|
||||
|
||||
for prompt_idx, generations in enumerate(response.generations):
|
||||
for gen_idx, generation in enumerate(generations):
|
||||
text = generation.text
|
||||
|
||||
generation_resp = deepcopy(metadata)
|
||||
generation_resp.update(flatten_dict(generation.dict()))
|
||||
|
||||
complexity_metrics = self._get_complexity_metrics(text)
|
||||
if complexity_metrics:
|
||||
output_complexity_metrics.append(complexity_metrics)
|
||||
generation_resp.update(complexity_metrics)
|
||||
|
||||
custom_metrics = self._get_custom_metrics(
|
||||
generation, prompt_idx, gen_idx
|
||||
)
|
||||
if custom_metrics:
|
||||
output_custom_metrics.append(custom_metrics)
|
||||
generation_resp.update(custom_metrics)
|
||||
|
||||
if self.stream_logs:
|
||||
self._log_stream(text, metadata, self.step)
|
||||
|
||||
self.action_records.append(generation_resp)
|
||||
self.on_llm_end_records.append(generation_resp)
|
||||
|
||||
self._log_text_metrics(output_complexity_metrics, step=self.step)
|
||||
self._log_text_metrics(output_custom_metrics, step=self.step)
|
||||
|
||||
def on_llm_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when LLM errors."""
|
||||
self.step += 1
|
||||
self.errors += 1
|
||||
|
||||
def on_chain_start(
|
||||
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when chain starts running."""
|
||||
self.step += 1
|
||||
self.chain_starts += 1
|
||||
self.starts += 1
|
||||
|
||||
resp = self._init_resp()
|
||||
resp.update({"action": "on_chain_start"})
|
||||
resp.update(flatten_dict(serialized))
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
|
||||
comet_ml = import_comet_ml()
|
||||
|
||||
for chain_input_key, chain_input_val in inputs.items():
|
||||
if isinstance(chain_input_val, str):
|
||||
input_resp = deepcopy(resp)
|
||||
if self.stream_logs:
|
||||
self._log_stream(chain_input_val, resp, self.step)
|
||||
input_resp.update({chain_input_key: chain_input_val})
|
||||
self.action_records.append(input_resp)
|
||||
|
||||
else:
|
||||
comet_ml.LOGGER.warning(
|
||||
f"Unexpected data format provided! "
|
||||
f"Input Value for {chain_input_key} will not be logged"
|
||||
)
|
||||
|
||||
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
||||
"""Run when chain ends running."""
|
||||
self.step += 1
|
||||
self.chain_ends += 1
|
||||
self.ends += 1
|
||||
|
||||
resp = self._init_resp()
|
||||
resp.update({"action": "on_chain_end"})
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
|
||||
comet_ml = import_comet_ml()
|
||||
|
||||
for chain_output_key, chain_output_val in outputs.items():
|
||||
if isinstance(chain_output_val, str):
|
||||
output_resp = deepcopy(resp)
|
||||
if self.stream_logs:
|
||||
self._log_stream(chain_output_val, resp, self.step)
|
||||
output_resp.update({chain_output_key: chain_output_val})
|
||||
self.action_records.append(output_resp)
|
||||
else:
|
||||
comet_ml.LOGGER.warning(
|
||||
f"Unexpected data format provided! "
|
||||
f"Output Value for {chain_output_key} will not be logged"
|
||||
)
|
||||
|
||||
def on_chain_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when chain errors."""
|
||||
self.step += 1
|
||||
self.errors += 1
|
||||
|
||||
def on_tool_start(
|
||||
self, serialized: Dict[str, Any], input_str: str, **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when tool starts running."""
|
||||
self.step += 1
|
||||
self.tool_starts += 1
|
||||
self.starts += 1
|
||||
|
||||
resp = self._init_resp()
|
||||
resp.update({"action": "on_tool_start"})
|
||||
resp.update(flatten_dict(serialized))
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
if self.stream_logs:
|
||||
self._log_stream(input_str, resp, self.step)
|
||||
|
||||
resp.update({"input_str": input_str})
|
||||
self.action_records.append(resp)
|
||||
|
||||
def on_tool_end(self, output: str, **kwargs: Any) -> None:
|
||||
"""Run when tool ends running."""
|
||||
self.step += 1
|
||||
self.tool_ends += 1
|
||||
self.ends += 1
|
||||
|
||||
resp = self._init_resp()
|
||||
resp.update({"action": "on_tool_end"})
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
if self.stream_logs:
|
||||
self._log_stream(output, resp, self.step)
|
||||
|
||||
resp.update({"output": output})
|
||||
self.action_records.append(resp)
|
||||
|
||||
def on_tool_error(
|
||||
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||
) -> None:
|
||||
"""Run when tool errors."""
|
||||
self.step += 1
|
||||
self.errors += 1
|
||||
|
||||
def on_text(self, text: str, **kwargs: Any) -> None:
|
||||
"""
|
||||
Run when agent is ending.
|
||||
"""
|
||||
self.step += 1
|
||||
self.text_ctr += 1
|
||||
|
||||
resp = self._init_resp()
|
||||
resp.update({"action": "on_text"})
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
if self.stream_logs:
|
||||
self._log_stream(text, resp, self.step)
|
||||
|
||||
resp.update({"text": text})
|
||||
self.action_records.append(resp)
|
||||
|
||||
def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None:
|
||||
"""Run when agent ends running."""
|
||||
self.step += 1
|
||||
self.agent_ends += 1
|
||||
self.ends += 1
|
||||
|
||||
resp = self._init_resp()
|
||||
output = finish.return_values["output"]
|
||||
log = finish.log
|
||||
|
||||
resp.update({"action": "on_agent_finish", "log": log})
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
if self.stream_logs:
|
||||
self._log_stream(output, resp, self.step)
|
||||
|
||||
resp.update({"output": output})
|
||||
self.action_records.append(resp)
|
||||
|
||||
def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
|
||||
"""Run on agent action."""
|
||||
self.step += 1
|
||||
self.tool_starts += 1
|
||||
self.starts += 1
|
||||
|
||||
tool = action.tool
|
||||
tool_input = action.tool_input
|
||||
log = action.log
|
||||
|
||||
resp = self._init_resp()
|
||||
resp.update({"action": "on_agent_action", "log": log, "tool": tool})
|
||||
resp.update(self.get_custom_callback_meta())
|
||||
if self.stream_logs:
|
||||
self._log_stream(tool_input, resp, self.step)
|
||||
|
||||
resp.update({"tool_input": tool_input})
|
||||
self.action_records.append(resp)
|
||||
|
||||
def _get_complexity_metrics(self, text: str) -> dict:
|
||||
"""Compute text complexity metrics using textstat.
|
||||
|
||||
Parameters:
|
||||
text (str): The text to analyze.
|
||||
|
||||
Returns:
|
||||
(dict): A dictionary containing the complexity metrics.
|
||||
"""
|
||||
resp = {}
|
||||
if self.complexity_metrics:
|
||||
text_complexity_metrics = _fetch_text_complexity_metrics(text)
|
||||
resp.update(text_complexity_metrics)
|
||||
|
||||
return resp
|
||||
|
||||
def _get_custom_metrics(
|
||||
self, generation: Generation, prompt_idx: int, gen_idx: int
|
||||
) -> dict:
|
||||
"""Compute Custom Metrics for an LLM Generated Output
|
||||
|
||||
Args:
|
||||
generation (LLMResult): Output generation from an LLM
|
||||
prompt_idx (int): List index of the input prompt
|
||||
gen_idx (int): List index of the generated output
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the custom metrics.
|
||||
"""
|
||||
|
||||
resp = {}
|
||||
if self.custom_metrics:
|
||||
custom_metrics = self.custom_metrics(generation, prompt_idx, gen_idx)
|
||||
resp.update(custom_metrics)
|
||||
|
||||
return resp
|
||||
|
||||
def flush_tracker(
|
||||
self,
|
||||
langchain_asset: Any = None,
|
||||
task_type: Optional[str] = "inference",
|
||||
workspace: Optional[str] = None,
|
||||
project_name: Optional[str] = "comet-langchain-demo",
|
||||
tags: Optional[Sequence] = None,
|
||||
name: Optional[str] = None,
|
||||
visualizations: Optional[List[str]] = None,
|
||||
complexity_metrics: bool = False,
|
||||
custom_metrics: Optional[Callable] = None,
|
||||
finish: bool = False,
|
||||
reset: bool = False,
|
||||
) -> None:
|
||||
"""Flush the tracker and setup the session.
|
||||
|
||||
Everything after this will be a new table.
|
||||
|
||||
Args:
|
||||
name: Name of the preformed session so far so it is identifyable
|
||||
langchain_asset: The langchain asset to save.
|
||||
finish: Whether to finish the run.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
self._log_session(langchain_asset)
|
||||
|
||||
if langchain_asset:
|
||||
self._log_model(langchain_asset)
|
||||
|
||||
if finish:
|
||||
self.experiment.end()
|
||||
|
||||
if reset:
|
||||
self._reset(
|
||||
task_type,
|
||||
workspace,
|
||||
project_name,
|
||||
tags,
|
||||
name,
|
||||
visualizations,
|
||||
complexity_metrics,
|
||||
custom_metrics,
|
||||
)
|
||||
|
||||
def _log_stream(self, prompt: str, metadata: dict, step: int) -> None:
|
||||
self.experiment.log_text(prompt, metadata=metadata, step=step)
|
||||
|
||||
def _log_model(self, langchain_asset: Any) -> None:
|
||||
comet_ml = import_comet_ml()
|
||||
|
||||
model_parameters = self._get_llm_parameters(langchain_asset)
|
||||
self.experiment.log_parameters(model_parameters, prefix="model")
|
||||
|
||||
langchain_asset_path = Path(self.temp_dir.name, "model.json")
|
||||
model_name = self.name if self.name else LANGCHAIN_MODEL_NAME
|
||||
|
||||
try:
|
||||
if hasattr(langchain_asset, "save"):
|
||||
langchain_asset.save(langchain_asset_path)
|
||||
self.experiment.log_model(model_name, str(langchain_asset_path))
|
||||
except (ValueError, AttributeError, NotImplementedError) as e:
|
||||
if hasattr(langchain_asset, "save_agent"):
|
||||
langchain_asset.save_agent(langchain_asset_path)
|
||||
self.experiment.log_model(model_name, str(langchain_asset_path))
|
||||
else:
|
||||
comet_ml.LOGGER.warning(
|
||||
f"{e}"
|
||||
" Could not save Langchain Asset "
|
||||
f"for {langchain_asset.__class__.__name__}"
|
||||
)
|
||||
|
||||
def _log_session(self, langchain_asset: Optional[Any] = None) -> None:
|
||||
llm_session_df = self._create_session_analysis_dataframe(langchain_asset)
|
||||
# Log the cleaned dataframe as a table
|
||||
self.experiment.log_table("langchain-llm-session.csv", llm_session_df)
|
||||
|
||||
metadata = {"langchain_version": str(langchain.__version__)}
|
||||
# Log the langchain low-level records as a JSON file directly
|
||||
self.experiment.log_asset_data(
|
||||
self.action_records, "langchain-action_records.json", metadata=metadata
|
||||
)
|
||||
|
||||
self._log_visualizations(llm_session_df)
|
||||
|
||||
def _log_text_metrics(self, metrics: Sequence[dict], step: int) -> None:
|
||||
if not metrics:
|
||||
return
|
||||
|
||||
metrics_summary = _summarize_metrics_for_generated_outputs(metrics)
|
||||
for key, value in metrics_summary.items():
|
||||
self.experiment.log_metrics(value, prefix=key, step=step)
|
||||
|
||||
def _log_visualizations(self, session_df: Any) -> None:
|
||||
if not (self.visualizations and self.nlp):
|
||||
return
|
||||
|
||||
spacy = import_spacy()
|
||||
comet_ml = import_comet_ml()
|
||||
|
||||
prompts = session_df["prompts"].tolist()
|
||||
outputs = session_df["text"].tolist()
|
||||
|
||||
for idx, (prompt, output) in enumerate(zip(prompts, outputs)):
|
||||
doc = self.nlp(output)
|
||||
sentence_spans = list(doc.sents)
|
||||
|
||||
for visualization in self.visualizations:
|
||||
try:
|
||||
html = spacy.displacy.render(
|
||||
sentence_spans,
|
||||
style=visualization,
|
||||
options={"compact": True},
|
||||
jupyter=False,
|
||||
page=True,
|
||||
)
|
||||
self.experiment.log_asset_data(
|
||||
html,
|
||||
name=f"langchain-viz-{visualization}-{idx}.html",
|
||||
metadata={"prompt": prompt},
|
||||
step=idx,
|
||||
)
|
||||
except Exception as e:
|
||||
comet_ml.LOGGER.warning(e)
|
||||
|
||||
return
|
||||
|
||||
def _reset(
|
||||
self,
|
||||
task_type: Optional[str] = None,
|
||||
workspace: Optional[str] = None,
|
||||
project_name: Optional[str] = None,
|
||||
tags: Optional[Sequence] = None,
|
||||
name: Optional[str] = None,
|
||||
visualizations: Optional[List[str]] = None,
|
||||
complexity_metrics: bool = False,
|
||||
custom_metrics: Optional[Callable] = None,
|
||||
) -> None:
|
||||
_task_type = task_type if task_type else self.task_type
|
||||
_workspace = workspace if workspace else self.workspace
|
||||
_project_name = project_name if project_name else self.project_name
|
||||
_tags = tags if tags else self.tags
|
||||
_name = name if name else self.name
|
||||
_visualizations = visualizations if visualizations else self.visualizations
|
||||
_complexity_metrics = (
|
||||
complexity_metrics if complexity_metrics else self.complexity_metrics
|
||||
)
|
||||
_custom_metrics = custom_metrics if custom_metrics else self.custom_metrics
|
||||
|
||||
self.__init__( # type: ignore
|
||||
task_type=_task_type,
|
||||
workspace=_workspace,
|
||||
project_name=_project_name,
|
||||
tags=_tags,
|
||||
name=_name,
|
||||
visualizations=_visualizations,
|
||||
complexity_metrics=_complexity_metrics,
|
||||
custom_metrics=_custom_metrics,
|
||||
)
|
||||
|
||||
self.reset_callback_meta()
|
||||
self.temp_dir = tempfile.TemporaryDirectory()
|
||||
|
||||
def _create_session_analysis_dataframe(self, langchain_asset: Any = None) -> dict:
|
||||
pd = import_pandas()
|
||||
|
||||
llm_parameters = self._get_llm_parameters(langchain_asset)
|
||||
num_generations_per_prompt = llm_parameters.get("n", 1)
|
||||
|
||||
llm_start_records_df = pd.DataFrame(self.on_llm_start_records)
|
||||
# Repeat each input row based on the number of outputs generated per prompt
|
||||
llm_start_records_df = llm_start_records_df.loc[
|
||||
llm_start_records_df.index.repeat(num_generations_per_prompt)
|
||||
].reset_index(drop=True)
|
||||
llm_end_records_df = pd.DataFrame(self.on_llm_end_records)
|
||||
|
||||
llm_session_df = pd.merge(
|
||||
llm_start_records_df,
|
||||
llm_end_records_df,
|
||||
left_index=True,
|
||||
right_index=True,
|
||||
suffixes=["_llm_start", "_llm_end"],
|
||||
)
|
||||
|
||||
return llm_session_df
|
||||
|
||||
def _get_llm_parameters(self, langchain_asset: Any = None) -> dict:
|
||||
if not langchain_asset:
|
||||
return {}
|
||||
try:
|
||||
if hasattr(langchain_asset, "agent"):
|
||||
llm_parameters = langchain_asset.agent.llm_chain.llm.dict()
|
||||
elif hasattr(langchain_asset, "llm_chain"):
|
||||
llm_parameters = langchain_asset.llm_chain.llm.dict()
|
||||
elif hasattr(langchain_asset, "llm"):
|
||||
llm_parameters = langchain_asset.llm.dict()
|
||||
else:
|
||||
llm_parameters = langchain_asset.dict()
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
return llm_parameters
|
||||
@@ -53,6 +53,15 @@ class OpenAICallbackHandler(BaseCallbackHandler):
|
||||
successful_requests: int = 0
|
||||
total_cost: float = 0.0
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"Tokens Used: {self.total_tokens}\n"
|
||||
f"\tPrompt Tokens: {self.prompt_tokens}\n"
|
||||
f"\tCompletion Tokens: {self.completion_tokens}\n"
|
||||
f"Successful Requests: {self.successful_requests}\n"
|
||||
f"Total Cost (USD): ${self.total_cost}"
|
||||
)
|
||||
|
||||
@property
|
||||
def always_verbose(self) -> bool:
|
||||
"""Whether to call verbose callbacks even if verbose is False."""
|
||||
|
||||
@@ -74,10 +74,10 @@ class StdOutCallbackHandler(BaseCallbackHandler):
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""If not the final action, print out observation."""
|
||||
if observation_prefix:
|
||||
if observation_prefix is not None:
|
||||
print_text(f"\n{observation_prefix}")
|
||||
print_text(output, color=color if color else self.color)
|
||||
if llm_prefix:
|
||||
if llm_prefix is not None:
|
||||
print_text(f"\n{llm_prefix}")
|
||||
|
||||
def on_tool_error(
|
||||
|
||||
@@ -47,14 +47,19 @@ class AsyncIteratorCallbackHandler(AsyncCallbackHandler):
|
||||
while not self.queue.empty() or not self.done.is_set():
|
||||
# Wait for the next token in the queue,
|
||||
# but stop waiting if the done event is set
|
||||
done, _ = await asyncio.wait(
|
||||
done, other = await asyncio.wait(
|
||||
[
|
||||
# NOTE: If you add other tasks here, update the code below,
|
||||
# which assumes each set has exactly one task each
|
||||
asyncio.ensure_future(self.queue.get()),
|
||||
asyncio.ensure_future(self.done.wait()),
|
||||
],
|
||||
return_when=asyncio.FIRST_COMPLETED,
|
||||
)
|
||||
|
||||
# Cancel the other task
|
||||
other.pop().cancel()
|
||||
|
||||
# Extract the value of the first completed task
|
||||
token_or_done = cast(Union[str, Literal[True]], done.pop().result())
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ class OpenAPIEndpointChain(Chain, BaseModel):
|
||||
"""Chain interacts with an OpenAPI endpoint using natural language."""
|
||||
|
||||
api_request_chain: LLMChain
|
||||
api_response_chain: LLMChain
|
||||
api_response_chain: Optional[LLMChain]
|
||||
api_operation: APIOperation
|
||||
requests: Requests = Field(exclude=True, default_factory=Requests)
|
||||
param_mapping: _ParamMapping = Field(alias="param_mapping")
|
||||
@@ -144,15 +144,18 @@ class OpenAPIEndpointChain(Chain, BaseModel):
|
||||
self.callback_manager.on_text(
|
||||
response_text, color="blue", end="\n", verbose=self.verbose
|
||||
)
|
||||
_answer = self.api_response_chain.predict_and_parse(
|
||||
response=response_text,
|
||||
instructions=instructions,
|
||||
)
|
||||
answer = cast(str, _answer)
|
||||
self.callback_manager.on_text(
|
||||
answer, color="yellow", end="\n", verbose=self.verbose
|
||||
)
|
||||
return self._get_output(answer, intermediate_steps)
|
||||
if self.api_response_chain is not None:
|
||||
_answer = self.api_response_chain.predict_and_parse(
|
||||
response=response_text,
|
||||
instructions=instructions,
|
||||
)
|
||||
answer = cast(str, _answer)
|
||||
self.callback_manager.on_text(
|
||||
answer, color="yellow", end="\n", verbose=self.verbose
|
||||
)
|
||||
return self._get_output(answer, intermediate_steps)
|
||||
else:
|
||||
return self._get_output(response_text, intermediate_steps)
|
||||
|
||||
@classmethod
|
||||
def from_url_and_method(
|
||||
@@ -184,6 +187,7 @@ class OpenAPIEndpointChain(Chain, BaseModel):
|
||||
requests: Optional[Requests] = None,
|
||||
verbose: bool = False,
|
||||
return_intermediate_steps: bool = False,
|
||||
raw_response: bool = False,
|
||||
**kwargs: Any
|
||||
# TODO: Handle async
|
||||
) -> "OpenAPIEndpointChain":
|
||||
@@ -196,7 +200,10 @@ class OpenAPIEndpointChain(Chain, BaseModel):
|
||||
requests_chain = APIRequesterChain.from_llm_and_typescript(
|
||||
llm, typescript_definition=operation.to_typescript(), verbose=verbose
|
||||
)
|
||||
response_chain = APIResponderChain.from_llm(llm, verbose=verbose)
|
||||
if raw_response:
|
||||
response_chain = None
|
||||
else:
|
||||
response_chain = APIResponderChain.from_llm(llm, verbose=verbose)
|
||||
_requests = requests or Requests()
|
||||
return cls(
|
||||
api_request_chain=requests_chain,
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
"""Chain that interprets a prompt and executes python code to do math."""
|
||||
import math
|
||||
import re
|
||||
from typing import Dict, List
|
||||
|
||||
import numexpr
|
||||
from pydantic import Extra
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.chains.llm_math.prompt import PROMPT
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.python import PythonREPL
|
||||
from langchain.schema import BaseLanguageModel
|
||||
|
||||
|
||||
@@ -50,34 +52,54 @@ class LLMMathChain(Chain):
|
||||
"""
|
||||
return [self.output_key]
|
||||
|
||||
def _process_llm_result(self, t: str) -> Dict[str, str]:
|
||||
python_executor = PythonREPL()
|
||||
self.callback_manager.on_text(t, color="green", verbose=self.verbose)
|
||||
t = t.strip()
|
||||
if t.startswith("```python"):
|
||||
code = t[9:-4]
|
||||
output = python_executor.run(code)
|
||||
def _evaluate_expression(self, expression: str) -> str:
|
||||
try:
|
||||
local_dict = {"pi": math.pi, "e": math.e}
|
||||
output = str(
|
||||
numexpr.evaluate(
|
||||
expression.strip(),
|
||||
global_dict={}, # restrict access to globals
|
||||
local_dict=local_dict, # add common mathematical functions
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
raise ValueError(f"{e}. Please try again with a valid numerical expression")
|
||||
|
||||
# Remove any leading and trailing brackets from the output
|
||||
return re.sub(r"^\[|\]$", "", output)
|
||||
|
||||
def _process_llm_result(self, llm_output: str) -> Dict[str, str]:
|
||||
self.callback_manager.on_text(llm_output, color="green", verbose=self.verbose)
|
||||
llm_output = llm_output.strip()
|
||||
text_match = re.search(r"^```text(.*?)```", llm_output, re.DOTALL)
|
||||
if text_match:
|
||||
expression = text_match.group(1)
|
||||
output = self._evaluate_expression(expression)
|
||||
self.callback_manager.on_text("\nAnswer: ", verbose=self.verbose)
|
||||
self.callback_manager.on_text(output, color="yellow", verbose=self.verbose)
|
||||
answer = "Answer: " + output
|
||||
elif t.startswith("Answer:"):
|
||||
answer = t
|
||||
elif "Answer:" in t:
|
||||
answer = "Answer: " + t.split("Answer:")[-1]
|
||||
elif llm_output.startswith("Answer:"):
|
||||
answer = llm_output
|
||||
elif "Answer:" in llm_output:
|
||||
answer = "Answer: " + llm_output.split("Answer:")[-1]
|
||||
else:
|
||||
raise ValueError(f"unknown format from LLM: {t}")
|
||||
raise ValueError(f"unknown format from LLM: {llm_output}")
|
||||
return {self.output_key: answer}
|
||||
|
||||
async def _aprocess_llm_result(self, t: str) -> Dict[str, str]:
|
||||
python_executor = PythonREPL()
|
||||
async def _aprocess_llm_result(self, llm_output: str) -> Dict[str, str]:
|
||||
if self.callback_manager.is_async:
|
||||
await self.callback_manager.on_text(t, color="green", verbose=self.verbose)
|
||||
await self.callback_manager.on_text(
|
||||
llm_output, color="green", verbose=self.verbose
|
||||
)
|
||||
else:
|
||||
self.callback_manager.on_text(t, color="green", verbose=self.verbose)
|
||||
t = t.strip()
|
||||
if t.startswith("```python"):
|
||||
code = t[9:-4]
|
||||
output = python_executor.run(code)
|
||||
self.callback_manager.on_text(
|
||||
llm_output, color="green", verbose=self.verbose
|
||||
)
|
||||
llm_output = llm_output.strip()
|
||||
text_match = re.search(r"^```text(.*?)```", llm_output, re.DOTALL)
|
||||
if text_match:
|
||||
expression = text_match.group(1)
|
||||
output = self._evaluate_expression(expression)
|
||||
if self.callback_manager.is_async:
|
||||
await self.callback_manager.on_text("\nAnswer: ", verbose=self.verbose)
|
||||
await self.callback_manager.on_text(
|
||||
@@ -89,12 +111,12 @@ class LLMMathChain(Chain):
|
||||
output, color="yellow", verbose=self.verbose
|
||||
)
|
||||
answer = "Answer: " + output
|
||||
elif t.startswith("Answer:"):
|
||||
answer = t
|
||||
elif "Answer:" in t:
|
||||
answer = "Answer: " + t.split("Answer:")[-1]
|
||||
elif llm_output.startswith("Answer:"):
|
||||
answer = llm_output
|
||||
elif "Answer:" in llm_output:
|
||||
answer = "Answer: " + llm_output.split("Answer:")[-1]
|
||||
else:
|
||||
raise ValueError(f"unknown format from LLM: {t}")
|
||||
raise ValueError(f"unknown format from LLM: {llm_output}")
|
||||
return {self.output_key: answer}
|
||||
|
||||
def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
|
||||
@@ -102,8 +124,10 @@ class LLMMathChain(Chain):
|
||||
prompt=self.prompt, llm=self.llm, callback_manager=self.callback_manager
|
||||
)
|
||||
self.callback_manager.on_text(inputs[self.input_key], verbose=self.verbose)
|
||||
t = llm_executor.predict(question=inputs[self.input_key], stop=["```output"])
|
||||
return self._process_llm_result(t)
|
||||
llm_output = llm_executor.predict(
|
||||
question=inputs[self.input_key], stop=["```output"]
|
||||
)
|
||||
return self._process_llm_result(llm_output)
|
||||
|
||||
async def _acall(self, inputs: Dict[str, str]) -> Dict[str, str]:
|
||||
llm_executor = LLMChain(
|
||||
@@ -115,10 +139,10 @@ class LLMMathChain(Chain):
|
||||
)
|
||||
else:
|
||||
self.callback_manager.on_text(inputs[self.input_key], verbose=self.verbose)
|
||||
t = await llm_executor.apredict(
|
||||
llm_output = await llm_executor.apredict(
|
||||
question=inputs[self.input_key], stop=["```output"]
|
||||
)
|
||||
return await self._aprocess_llm_result(t)
|
||||
return await self._aprocess_llm_result(llm_output)
|
||||
|
||||
@property
|
||||
def _chain_type(self) -> str:
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
# flake8: noqa
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
|
||||
_PROMPT_TEMPLATE = """Translate a math problem into Python code that can be executed in Python 3 REPL. Use the output of running this code to answer the question.
|
||||
_PROMPT_TEMPLATE = """Translate a math problem into a expression that can be executed using Python's numexpr library. Use the output of running this code to answer the question.
|
||||
|
||||
Question: ${{Question with math problem.}}
|
||||
```python
|
||||
${{Code that solves the problem and prints the solution}}
|
||||
```text
|
||||
${{single line mathematical expression that solves the problem}}
|
||||
```
|
||||
...numexpr.evaluate(text)...
|
||||
```output
|
||||
${{Output of running the code}}
|
||||
```
|
||||
@@ -16,9 +17,10 @@ Begin.
|
||||
|
||||
Question: What is 37593 * 67?
|
||||
|
||||
```python
|
||||
print(37593 * 67)
|
||||
```text
|
||||
37593 * 67
|
||||
```
|
||||
...numexpr.evaluate("37593 * 67")...
|
||||
```output
|
||||
2518731
|
||||
```
|
||||
@@ -27,4 +29,7 @@ Answer: 2518731
|
||||
Question: {question}
|
||||
"""
|
||||
|
||||
PROMPT = PromptTemplate(input_variables=["question"], template=_PROMPT_TEMPLATE)
|
||||
PROMPT = PromptTemplate(
|
||||
input_variables=["question"],
|
||||
template=_PROMPT_TEMPLATE,
|
||||
)
|
||||
|
||||
@@ -57,7 +57,7 @@ class LLMRequestsChain(Chain):
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import bs4 python package. "
|
||||
"Please it install it with `pip install bs4`."
|
||||
"Please install it with `pip install bs4`."
|
||||
)
|
||||
return values
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ class OpenAIModerationChain(Chain):
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import openai python package. "
|
||||
"Please it install it with `pip install openai`."
|
||||
"Please install it with `pip install openai`."
|
||||
)
|
||||
return values
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ class Crawler:
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"Could not import playwright python package. "
|
||||
"Please it install it with `pip install playwright`."
|
||||
"Please install it with `pip install playwright`."
|
||||
)
|
||||
self.browser: Browser = (
|
||||
sync_playwright().start().chromium.launch(headless=False)
|
||||
|
||||
@@ -13,8 +13,8 @@ from langchain.chains.llm import LLMChain
|
||||
from langchain.chains.pal.colored_object_prompt import COLORED_OBJECT_PROMPT
|
||||
from langchain.chains.pal.math_prompt import MATH_PROMPT
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.python import PythonREPL
|
||||
from langchain.schema import BaseLanguageModel
|
||||
from langchain.utilities import PythonREPL
|
||||
|
||||
|
||||
class PALChain(Chain):
|
||||
|
||||
@@ -196,7 +196,7 @@ def load_qa_chain(
|
||||
Args:
|
||||
llm: Language Model to use in the chain.
|
||||
chain_type: Type of document combining chain to use. Should be one of "stuff",
|
||||
"map_reduce", and "refine".
|
||||
"map_reduce", "map_rerank", and "refine".
|
||||
verbose: Whether chains should be run in verbose mode or not. Note that this
|
||||
applies to all chains that make up the final chain.
|
||||
callback_manager: Callback manager to use for the chain.
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
"""Chain for interacting with SQL Database."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import Extra, Field
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.chains.sql_database.prompt import DECIDER_PROMPT, PROMPT
|
||||
from langchain.chains.sql_database.prompt import DECIDER_PROMPT, PROMPT, SQL_PROMPTS
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.schema import BaseLanguageModel
|
||||
from langchain.sql_database import SQLDatabase
|
||||
@@ -28,7 +28,7 @@ class SQLDatabaseChain(Chain):
|
||||
"""LLM wrapper to use."""
|
||||
database: SQLDatabase = Field(exclude=True)
|
||||
"""SQL Database to connect to."""
|
||||
prompt: BasePromptTemplate = PROMPT
|
||||
prompt: Optional[BasePromptTemplate] = None
|
||||
"""Prompt to use to translate natural language to SQL."""
|
||||
top_k: int = 5
|
||||
"""Number of results to return from the query"""
|
||||
@@ -65,8 +65,9 @@ class SQLDatabaseChain(Chain):
|
||||
return [self.output_key, "intermediate_steps"]
|
||||
|
||||
def _call(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
|
||||
llm_chain = LLMChain(llm=self.llm, prompt=self.prompt)
|
||||
input_text = f"{inputs[self.input_key]} \nSQLQuery:"
|
||||
prompt = self.prompt or SQL_PROMPTS.get(self.database.dialect, PROMPT)
|
||||
llm_chain = LLMChain(llm=self.llm, prompt=prompt)
|
||||
input_text = f"{inputs[self.input_key]}\nSQLQuery:"
|
||||
self.callback_manager.on_text(input_text, verbose=self.verbose)
|
||||
# If not present, then defaults to None which is all tables.
|
||||
table_names_to_use = inputs.get("table_names_to_use")
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
from langchain.output_parsers.list import CommaSeparatedListOutputParser
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
|
||||
|
||||
_DEFAULT_TEMPLATE = """Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer. Unless the user specifies in his question a specific number of examples he wishes to obtain, always limit your query to at most {top_k} results. You can order the results by a relevant column to return the most interesting examples in the database.
|
||||
|
||||
Never query for all the columns from a specific table, only ask for a the few relevant columns given the question.
|
||||
@@ -38,3 +39,149 @@ DECIDER_PROMPT = PromptTemplate(
|
||||
template=_DECIDER_TEMPLATE,
|
||||
output_parser=CommaSeparatedListOutputParser(),
|
||||
)
|
||||
|
||||
|
||||
_mssql_prompt = """You are an MS SQL expert. Given an input question, first create a syntactically correct MS SQL query to run, then look at the results of the query and return the answer to the input question.
|
||||
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the TOP clause as per MS SQL. You can order the results to return the most informative data in the database.
|
||||
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in square brackets ([]) to denote them as delimited identifiers.
|
||||
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
|
||||
|
||||
Use the following format:
|
||||
|
||||
Question: "Question here"
|
||||
SQLQuery: "SQL Query to run"
|
||||
SQLResult: "Result of the SQLQuery"
|
||||
Answer: "Final answer here"
|
||||
|
||||
Only use the following tables:
|
||||
{table_info}
|
||||
|
||||
Question: {input}"""
|
||||
|
||||
MSSQL_PROMPT = PromptTemplate(
|
||||
input_variables=["input", "table_info", "top_k"], template=_mssql_prompt
|
||||
)
|
||||
|
||||
|
||||
_mysql_prompt = """You are a MySQL expert. Given an input question, first create a syntactically correct MySQL query to run, then look at the results of the query and return the answer to the input question.
|
||||
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per MySQL. You can order the results to return the most informative data in the database.
|
||||
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in backticks (`) to denote them as delimited identifiers.
|
||||
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
|
||||
|
||||
Use the following format:
|
||||
|
||||
Question: "Question here"
|
||||
SQLQuery: "SQL Query to run"
|
||||
SQLResult: "Result of the SQLQuery"
|
||||
Answer: "Final answer here"
|
||||
|
||||
Only use the following tables:
|
||||
{table_info}
|
||||
|
||||
Question: {input}"""
|
||||
|
||||
MYSQL_PROMPT = PromptTemplate(
|
||||
input_variables=["input", "table_info", "top_k"],
|
||||
template=_mysql_prompt,
|
||||
)
|
||||
|
||||
|
||||
_mariadb_prompt = """You are a MariaDB expert. Given an input question, first create a syntactically correct MariaDB query to run, then look at the results of the query and return the answer to the input question.
|
||||
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per MariaDB. You can order the results to return the most informative data in the database.
|
||||
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in backticks (`) to denote them as delimited identifiers.
|
||||
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
|
||||
|
||||
Use the following format:
|
||||
|
||||
Question: "Question here"
|
||||
SQLQuery: "SQL Query to run"
|
||||
SQLResult: "Result of the SQLQuery"
|
||||
Answer: "Final answer here"
|
||||
|
||||
Only use the following tables:
|
||||
{table_info}
|
||||
|
||||
Question: {input}"""
|
||||
|
||||
MARIADB_PROMPT = PromptTemplate(
|
||||
input_variables=["input", "table_info", "top_k"],
|
||||
template=_mariadb_prompt,
|
||||
)
|
||||
|
||||
|
||||
_oracle_prompt = """You are an Oracle SQL expert. Given an input question, first create a syntactically correct Oracle SQL query to run, then look at the results of the query and return the answer to the input question.
|
||||
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the FETCH FIRST n ROWS ONLY clause as per Oracle SQL. You can order the results to return the most informative data in the database.
|
||||
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
|
||||
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
|
||||
|
||||
Use the following format:
|
||||
|
||||
Question: "Question here"
|
||||
SQLQuery: "SQL Query to run"
|
||||
SQLResult: "Result of the SQLQuery"
|
||||
Answer: "Final answer here"
|
||||
|
||||
Only use the following tables:
|
||||
{table_info}
|
||||
|
||||
Question: {input}"""
|
||||
|
||||
ORACLE_PROMPT = PromptTemplate(
|
||||
input_variables=["input", "table_info", "top_k"],
|
||||
template=_oracle_prompt,
|
||||
)
|
||||
|
||||
|
||||
_postgres_prompt = """You are a PostgreSQL expert. Given an input question, first create a syntactically correct PostgreSQL query to run, then look at the results of the query and return the answer to the input question.
|
||||
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per PostgreSQL. You can order the results to return the most informative data in the database.
|
||||
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
|
||||
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
|
||||
|
||||
Use the following format:
|
||||
|
||||
Question: "Question here"
|
||||
SQLQuery: "SQL Query to run"
|
||||
SQLResult: "Result of the SQLQuery"
|
||||
Answer: "Final answer here"
|
||||
|
||||
Only use the following tables:
|
||||
{table_info}
|
||||
|
||||
Question: {input}"""
|
||||
|
||||
POSTGRES_PROMPT = PromptTemplate(
|
||||
input_variables=["input", "table_info", "top_k"], template=_postgres_prompt
|
||||
)
|
||||
|
||||
|
||||
_sqlite_prompt = """You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
|
||||
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
|
||||
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
|
||||
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
|
||||
|
||||
Use the following format:
|
||||
|
||||
Question: "Question here"
|
||||
SQLQuery: "SQL Query to run"
|
||||
SQLResult: "Result of the SQLQuery"
|
||||
Answer: "Final answer here"
|
||||
|
||||
Only use the following tables:
|
||||
{table_info}
|
||||
|
||||
Question: {input}"""
|
||||
|
||||
SQLITE_PROMPT = PromptTemplate(
|
||||
input_variables=["input", "table_info", "top_k"],
|
||||
template=_sqlite_prompt,
|
||||
)
|
||||
|
||||
|
||||
SQL_PROMPTS = {
|
||||
"mssql": MSSQL_PROMPT,
|
||||
"mysql": MYSQL_PROMPT,
|
||||
"mariadb": MARIADB_PROMPT,
|
||||
"oracle": ORACLE_PROMPT,
|
||||
"postgresql": POSTGRES_PROMPT,
|
||||
"sqlite": SQLITE_PROMPT,
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from langchain.chat_models.anthropic import ChatAnthropic
|
||||
from langchain.chat_models.azure_openai import AzureChatOpenAI
|
||||
from langchain.chat_models.openai import ChatOpenAI
|
||||
from langchain.chat_models.promptlayer_openai import PromptLayerChatOpenAI
|
||||
|
||||
__all__ = ["ChatOpenAI", "AzureChatOpenAI", "PromptLayerChatOpenAI"]
|
||||
__all__ = ["ChatOpenAI", "AzureChatOpenAI", "PromptLayerChatOpenAI", "ChatAnthropic"]
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user