mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-07 17:50:35 +00:00
Compare commits
99 Commits
agents-4-1
...
dev2049/do
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b2a8fb35a0 | ||
|
|
03085d6447 | ||
|
|
49122a96e7 | ||
|
|
f22b9d0e57 | ||
|
|
0cf934ce7d | ||
|
|
2c0023393b | ||
|
|
93d53e417a | ||
|
|
487a57ffe6 | ||
|
|
3d8243ec95 | ||
|
|
738ee56b86 | ||
|
|
20f530e9c5 | ||
|
|
73bc70b4fa | ||
|
|
b4de839ed8 | ||
|
|
651cb62556 | ||
|
|
199cb855ea | ||
|
|
e5ffbee5eb | ||
|
|
acfd11c8e4 | ||
|
|
b21fe0a18f | ||
|
|
77bb6c99f7 | ||
|
|
3a1bdce3f5 | ||
|
|
a6664be79c | ||
|
|
6200a2a00e | ||
|
|
a5ad1c270f | ||
|
|
61d40ba042 | ||
|
|
7e79f8c136 | ||
|
|
215dcc2d26 | ||
|
|
8191c6b81a | ||
|
|
88a8f59aa7 | ||
|
|
cc6fe18152 | ||
|
|
61e09229c8 | ||
|
|
05a8aa5447 | ||
|
|
d2f922f525 | ||
|
|
e933be9605 | ||
|
|
aa9d5707e0 | ||
|
|
1ecbeec24e | ||
|
|
2fd24d31a4 | ||
|
|
3bc703b0d6 | ||
|
|
1e91266a8a | ||
|
|
04e1d6c699 | ||
|
|
a71a2c0eb2 | ||
|
|
bf78200f55 | ||
|
|
87544d2378 | ||
|
|
bb6c459f7a | ||
|
|
36720cb57f | ||
|
|
d7942a9f19 | ||
|
|
46542dc774 | ||
|
|
3943759a90 | ||
|
|
5ef2d1e2a1 | ||
|
|
4aedbeaffb | ||
|
|
2dbb5261b5 | ||
|
|
0684aa081a | ||
|
|
0e797a3ff9 | ||
|
|
ae528fd06e | ||
|
|
7d3e6389f2 | ||
|
|
daee0b2b97 | ||
|
|
8f22949dc4 | ||
|
|
130e4b9fcb | ||
|
|
d54b977d4e | ||
|
|
b7dea80cba | ||
|
|
b7f2061736 | ||
|
|
34fb56b633 | ||
|
|
d2520a5f1e | ||
|
|
36c10f8a52 | ||
|
|
27cdf8d675 | ||
|
|
9a0356d276 | ||
|
|
a66cab8b71 | ||
|
|
96809b5794 | ||
|
|
8faef1a91a | ||
|
|
c03a65c6dc | ||
|
|
f19b3890c9 | ||
|
|
e55db5841a | ||
|
|
d6b2f2b9bd | ||
|
|
c757c3cde4 | ||
|
|
6adf2d1c39 | ||
|
|
9181cd9b22 | ||
|
|
68cd37175e | ||
|
|
6e48107734 | ||
|
|
4adfd790f0 | ||
|
|
a63bfb6c9f | ||
|
|
dbbc340f25 | ||
|
|
3e0c44bae8 | ||
|
|
7b1f0656b8 | ||
|
|
10e4b32ecb | ||
|
|
74342ab209 | ||
|
|
a78f55b851 | ||
|
|
26c8cd1ea2 | ||
|
|
5e66d05928 | ||
|
|
99b1983461 | ||
|
|
89c63cf8a6 | ||
|
|
0b542661b4 | ||
|
|
126d7f11dd | ||
|
|
599e17cea8 | ||
|
|
575b717d10 | ||
|
|
72b7d76d79 | ||
|
|
b7dc04c086 | ||
|
|
8a050ba4bf | ||
|
|
364257d967 | ||
|
|
f329196cf4 | ||
|
|
8e386613ac |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -142,3 +142,6 @@ wandb/
|
||||
# asdf tool versions
|
||||
.tool-versions
|
||||
/.ruff_cache/
|
||||
|
||||
*.pkl
|
||||
*.bin
|
||||
@@ -37,6 +37,10 @@ A minimal example on how to run LangChain on Vercel using Flask.
|
||||
|
||||
A minimal example on how to deploy LangChain to DigitalOcean App Platform.
|
||||
|
||||
## [Google Cloud Run](https://github.com/homanp/gcp-langchain)
|
||||
|
||||
A minimal example on how to deploy LangChain to Google Cloud Run.
|
||||
|
||||
## [SteamShip](https://github.com/steamship-core/steamship-langchain/)
|
||||
|
||||
This repository contains LangChain adapters for Steamship, enabling LangChain developers to rapidly deploy their apps on Steamship.
|
||||
|
||||
15
docs/ecosystem/analyticdb.md
Normal file
15
docs/ecosystem/analyticdb.md
Normal file
@@ -0,0 +1,15 @@
|
||||
# AnalyticDB
|
||||
|
||||
This page covers how to use the AnalyticDB ecosystem within LangChain.
|
||||
|
||||
### VectorStore
|
||||
|
||||
There exists a wrapper around AnalyticDB, allowing you to use it as a vectorstore,
|
||||
whether for semantic search or example selection.
|
||||
|
||||
To import this vectorstore:
|
||||
```python
|
||||
from langchain.vectorstores import AnalyticDB
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the AnalyticDB wrapper, see [this notebook](../modules/indexes/vectorstores/examples/analyticdb.ipynb)
|
||||
65
docs/ecosystem/myscale.md
Normal file
65
docs/ecosystem/myscale.md
Normal file
@@ -0,0 +1,65 @@
|
||||
# MyScale
|
||||
|
||||
This page covers how to use MyScale vector database within LangChain.
|
||||
It is broken into two parts: installation and setup, and then references to specific MyScale wrappers.
|
||||
|
||||
With MyScale, you can manage both structured and unstructured (vectorized) data, and perform joint queries and analytics on both types of data using SQL. Plus, MyScale's cloud-native OLAP architecture, built on top of ClickHouse, enables lightning-fast data processing even on massive datasets.
|
||||
|
||||
## Introduction
|
||||
|
||||
[Overview to MyScale and High performance vector search](https://docs.myscale.com/en/overview/)
|
||||
|
||||
You can now register on our SaaS and [start a cluster now!](https://docs.myscale.com/en/quickstart/)
|
||||
|
||||
If you are also interested in how we managed to integrate SQL and vector, please refer to [this document](https://docs.myscale.com/en/vector-reference/) for further syntax reference.
|
||||
|
||||
We also deliver with live demo on huggingface! Please checkout our [huggingface space](https://huggingface.co/myscale)! They search millions of vector within a blink!
|
||||
|
||||
## Installation and Setup
|
||||
- Install the Python SDK with `pip install clickhouse-connect`
|
||||
|
||||
### Setting up envrionments
|
||||
|
||||
There are two ways to set up parameters for myscale index.
|
||||
|
||||
1. Environment Variables
|
||||
|
||||
Before you run the app, please set the environment variable with `export`:
|
||||
`export MYSCALE_URL='<your-endpoints-url>' MYSCALE_PORT=<your-endpoints-port> MYSCALE_USERNAME=<your-username> MYSCALE_PASSWORD=<your-password> ...`
|
||||
|
||||
You can easily find your account, password and other info on our SaaS. For details please refer to [this document](https://docs.myscale.com/en/cluster-management/)
|
||||
Every attributes under `MyScaleSettings` can be set with prefix `MYSCALE_` and is case insensitive.
|
||||
|
||||
2. Create `MyScaleSettings` object with parameters
|
||||
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import MyScale, MyScaleSettings
|
||||
config = MyScaleSetting(host="<your-backend-url>", port=8443, ...)
|
||||
index = MyScale(embedding_function, config)
|
||||
index.add_documents(...)
|
||||
```
|
||||
|
||||
## Wrappers
|
||||
supported functions:
|
||||
- `add_texts`
|
||||
- `add_documents`
|
||||
- `from_texts`
|
||||
- `from_documents`
|
||||
- `similarity_search`
|
||||
- `asimilarity_search`
|
||||
- `similarity_search_by_vector`
|
||||
- `asimilarity_search_by_vector`
|
||||
- `similarity_search_with_relevance_scores`
|
||||
|
||||
### VectorStore
|
||||
|
||||
There exists a wrapper around MyScale database, allowing you to use it as a vectorstore,
|
||||
whether for semantic search or similar example retrieval.
|
||||
|
||||
To import this vectorstore:
|
||||
```python
|
||||
from langchain.vectorstores import MyScale
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the MyScale wrapper, see [this notebook](../modules/indexes/vectorstores/examples/myscale.ipynb)
|
||||
@@ -30,4 +30,4 @@ To import this vectorstore:
|
||||
from langchain.vectorstores import Weaviate
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the Weaviate wrapper, see [this notebook](../modules/indexes/vectorstores/getting_started.ipynb)
|
||||
For a more detailed walkthrough of the Weaviate wrapper, see [this notebook](../modules/indexes/vectorstores/examples/weaviate.ipynb)
|
||||
|
||||
43
docs/ecosystem/yeagerai.md
Normal file
43
docs/ecosystem/yeagerai.md
Normal file
@@ -0,0 +1,43 @@
|
||||
# Yeager.ai
|
||||
|
||||
This page covers how to use [Yeager.ai](https://yeager.ai) to generate LangChain tools and agents.
|
||||
|
||||
## What is Yeager.ai?
|
||||
Yeager.ai is an ecosystem designed to simplify the process of creating AI agents and tools.
|
||||
|
||||
It features yAgents, a No-code LangChain Agent Builder, which enables users to build, test, and deploy AI solutions with ease. Leveraging the LangChain framework, yAgents allows seamless integration with various language models and resources, making it suitable for developers, researchers, and AI enthusiasts across diverse applications.
|
||||
|
||||
## yAgents
|
||||
Low code generative agent designed to help you build, prototype, and deploy Langchain tools with ease.
|
||||
|
||||
### How to use?
|
||||
```
|
||||
pip install yeagerai-agent
|
||||
yeagerai-agent
|
||||
```
|
||||
Go to http://127.0.0.1:7860
|
||||
|
||||
This will install the necessary dependencies and set up yAgents on your system. After the first run, yAgents will create a .env file where you can input your OpenAI API key. You can do the same directly from the Gradio interface under the tab "Settings".
|
||||
|
||||
`OPENAI_API_KEY=<your_openai_api_key_here>`
|
||||
|
||||
We recommend using GPT-4,. However, the tool can also work with GPT-3 if the problem is broken down sufficiently.
|
||||
|
||||
### Creating and Executing Tools with yAgents
|
||||
yAgents makes it easy to create and execute AI-powered tools. Here's a brief overview of the process:
|
||||
1. Create a tool: To create a tool, provide a natural language prompt to yAgents. The prompt should clearly describe the tool's purpose and functionality. For example:
|
||||
`create a tool that returns the n-th prime number`
|
||||
|
||||
2. Load the tool into the toolkit: To load a tool into yAgents, simply provide a command to yAgents that says so. For example:
|
||||
`load the tool that you just created it into your toolkit`
|
||||
|
||||
3. Execute the tool: To run a tool or agent, simply provide a command to yAgents that includes the name of the tool and any required parameters. For example:
|
||||
`generate the 50th prime number`
|
||||
|
||||
You can see a video of how it works [here](https://www.youtube.com/watch?v=KA5hCM3RaWE).
|
||||
|
||||
As you become more familiar with yAgents, you can create more advanced tools and agents to automate your work and enhance your productivity.
|
||||
|
||||
For more information, see [yAgents' Github](https://github.com/yeagerai/yeagerai-agent) or our [docs](https://yeagerai.gitbook.io/docs/general/welcome-to-yeager.ai)
|
||||
|
||||
|
||||
@@ -280,6 +280,17 @@ Proprietary
|
||||
|
||||
---
|
||||
|
||||
.. link-button:: https://anysummary.app
|
||||
:type: url
|
||||
:text: Summarize any file with AI
|
||||
:classes: stretched-link btn-lg
|
||||
|
||||
+++
|
||||
|
||||
Summarize not only long docs, interview audio or video files quickly, but also entire websites and YouTube videos. Share or download your generated summaries to collaborate with others, or revisit them at any time! Bonus: `@anysummary <https://twitter.com/anysummary>`_ on Twitter will also summarize any thread it is tagged in.
|
||||
|
||||
---
|
||||
|
||||
.. link-button:: https://twitter.com/dory111111/status/1608406234646052870?s=20&t=XYlrbKM0ornJsrtGa0br-g
|
||||
:type: url
|
||||
:text: AI Assisted SQL Query Generator
|
||||
|
||||
@@ -5,6 +5,7 @@ LangChain is a framework for developing applications powered by language models.
|
||||
|
||||
- *Be data-aware*: connect a language model to other sources of data
|
||||
- *Be agentic*: allow a language model to interact with its environment
|
||||
- *Be stateful*: store and retrieve application state in a manner that enables a language model to make increasingly complex decisions
|
||||
|
||||
The LangChain framework is designed with the above principles in mind.
|
||||
|
||||
@@ -63,6 +64,10 @@ Use Cases
|
||||
|
||||
The above modules can be used in a variety of ways. LangChain also provides guidance and assistance in this. Below are some of the common use cases LangChain supports.
|
||||
|
||||
- `Autonomous Agents <./use_cases/autonomous_agents.html>`_: Autonomous agents are long running agents that take many steps in an attempt to accomplish an objective. Examples include AutoGPT and BabyAGI.
|
||||
|
||||
- `Agent Simulations <./use_cases/agent_simulations.html>`_: Putting agents in a sandbox and observing how they interact with each other or to events can be an interesting way to observe their long-term memory abilities.
|
||||
|
||||
- `Personal Assistants <./use_cases/personal_assistants.html>`_: The main LangChain use case. Personal assistants need to take actions, remember interactions, and have knowledge about your data.
|
||||
|
||||
- `Question Answering <./use_cases/question_answering.html>`_: The second big LangChain use case. Answering questions over specific documents, only utilizing the information in those documents to construct an answer.
|
||||
@@ -89,6 +94,8 @@ The above modules can be used in a variety of ways. LangChain also provides guid
|
||||
:hidden:
|
||||
|
||||
./use_cases/personal_assistants.md
|
||||
./use_cases/autonomous_agents.md
|
||||
./use_cases/agent_simulations.md
|
||||
./use_cases/question_answering.md
|
||||
./use_cases/chatbots.md
|
||||
./use_cases/tabular.rst
|
||||
@@ -153,6 +160,8 @@ Additional collection of resources we think may be useful as you develop your ap
|
||||
|
||||
- `Discord <https://discord.gg/6adMQxSpJS>`_: Join us on our Discord to discuss all things LangChain!
|
||||
|
||||
- `YouTube <./youtube.html>`_: A collection of the LangChain tutorials and videos.
|
||||
|
||||
- `Production Support <https://forms.gle/57d8AmXBYp8PP8tZA>`_: As you move your LangChains into production, we'd love to offer more comprehensive support. Please fill out this form and we'll set up a dedicated support Slack channel.
|
||||
|
||||
|
||||
@@ -169,4 +178,5 @@ Additional collection of resources we think may be useful as you develop your ap
|
||||
./tracing.md
|
||||
./use_cases/model_laboratory.ipynb
|
||||
Discord <https://discord.gg/6adMQxSpJS>
|
||||
./youtube.md
|
||||
Production Support <https://forms.gle/57d8AmXBYp8PP8tZA>
|
||||
|
||||
@@ -20,13 +20,14 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "6064f080",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Custom LLMChain\n",
|
||||
"\n",
|
||||
"The first way to create a custom agent is to use an existing Agent class, but use a custom LLMChain. This is the simplest way to create a custom Agent. It is highly reccomended that you work with the `ZeroShotAgent`, as at the moment that is by far the most generalizable one. \n",
|
||||
"The first way to create a custom agent is to use an existing Agent class, but use a custom LLMChain. This is the simplest way to create a custom Agent. It is highly recommended that you work with the `ZeroShotAgent`, as at the moment that is by far the most generalizable one. \n",
|
||||
"\n",
|
||||
"Most of the work in creating the custom LLMChain comes down to the prompt. Because we are using an existing agent class to parse the output, it is very important that the prompt say to produce text in that format. Additionally, we currently require an `agent_scratchpad` input variable to put notes on previous actions and observations. This should almost always be the final part of the prompt. However, besides those instructions, you can customize the prompt as you wish.\n",
|
||||
"\n",
|
||||
|
||||
167
docs/modules/agents/toolkits/examples/powerbi.ipynb
Normal file
167
docs/modules/agents/toolkits/examples/powerbi.ipynb
Normal file
@@ -0,0 +1,167 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "0e499e90-7a6d-4fab-8aab-31a4df417601",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PowerBI Dataset Agent\n",
|
||||
"\n",
|
||||
"This notebook showcases an agent designed to interact with a Power BI Dataset. The agent is designed to answer more general questions about a dataset, as well as recover from errors.\n",
|
||||
"\n",
|
||||
"Note that, as this agent is in active development, all answers might not be correct. It runs against the [executequery endpoint](https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/execute-queries), which does not allow deletes.\n",
|
||||
"\n",
|
||||
"### Some notes\n",
|
||||
"- It relies on authentication with the azure.identity package, which can be installed with `pip install azure-identity`. Alternatively you can create the powerbi dataset with a token as a string without supplying the credentials.\n",
|
||||
"- You can also supply a username to impersonate for use with datasets that have RLS enabled. \n",
|
||||
"- The toolkit uses a LLM to create the query from the question, the agent uses the LLM for the overall execution.\n",
|
||||
"- Testing was done mostly with a `text-davinci-003` model, codex models did not seem to perform ver well."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ec927ac6-9b2a-4e8a-9a6e-3e429191875c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Initialization"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "53422913-967b-4f2a-8022-00269c1be1b1",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents.agent_toolkits import create_pbi_agent\n",
|
||||
"from langchain.agents.agent_toolkits import PowerBIToolkit\n",
|
||||
"from langchain.utilities.powerbi import PowerBIDataset\n",
|
||||
"from langchain.llms.openai import AzureOpenAI\n",
|
||||
"from langchain.agents import AgentExecutor\n",
|
||||
"from azure.identity import DefaultAzureCredential"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "090f3699-79c6-4ce1-ab96-a94f0121fd64",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = AzureOpenAI(temperature=0, deployment_name=\"text-davinci-003\", verbose=True)\n",
|
||||
"toolkit = PowerBIToolkit(\n",
|
||||
" powerbi=PowerBIDataset(None, \"<dataset_id>\", ['table1', 'table2'], DefaultAzureCredential()), \n",
|
||||
" llm=llm\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"agent_executor = create_pbi_agent(\n",
|
||||
" llm=llm,\n",
|
||||
" toolkit=toolkit,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36ae48c7-cb08-4fef-977e-c7d4b96a464b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: describing a table"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ff70e83d-5ad0-4fc7-bb96-27d82ac166d7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.run(\"Describe table1\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "9abcfe8e-1868-42a4-8345-ad2d9b44c681",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: simple query on a table\n",
|
||||
"In this example, the agent actually figures out the correct query to get a row count of the table."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bea76658-a65b-47e2-b294-6d52c5556246",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.run(\"How many records are in table1?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6fbc26af-97e4-4a21-82aa-48bdc992da26",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: running queries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "17bea710-4a23-4de0-b48e-21d57be48293",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.run(\"How many records are there by dimension1 in table2?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "474dddda-c067-4eeb-98b1-e763ee78b18c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.run(\"What unique values are there for dimensions2 in table2\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -97,7 +97,7 @@
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"\n",
|
||||
"class CalculatorInput(BaseModel):\n",
|
||||
" query: str = Field(description=\"should be a math expression\")\n",
|
||||
" question: str = Field()\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"tools.append(\n",
|
||||
@@ -191,6 +191,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Type\n",
|
||||
"\n",
|
||||
"class CustomSearchTool(BaseTool):\n",
|
||||
" name = \"Search\"\n",
|
||||
" description = \"useful for when you need to answer questions about current events\"\n",
|
||||
@@ -206,7 +208,7 @@
|
||||
"class CustomCalculatorTool(BaseTool):\n",
|
||||
" name = \"Calculator\"\n",
|
||||
" description = \"useful for when you need to answer questions about math\"\n",
|
||||
" args_schema=CalculatorInput\n",
|
||||
" args_schema: Type[BaseModel] = CalculatorInput\n",
|
||||
"\n",
|
||||
" def _run(self, query: str) -> str:\n",
|
||||
" \"\"\"Use the tool.\"\"\"\n",
|
||||
|
||||
156
docs/modules/agents/tools/examples/arxiv.ipynb
Normal file
156
docs/modules/agents/tools/examples/arxiv.ipynb
Normal file
@@ -0,0 +1,156 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "245a954a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Arxiv API\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use the `arxiv` component. \n",
|
||||
"\n",
|
||||
"First, you need to install `arxiv` python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d5a7209e",
|
||||
"metadata": {
|
||||
"tags": [],
|
||||
"vscode": {
|
||||
"languageId": "shellscript"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install arxiv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "8d32b39a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.utilities import ArxivAPIWrapper"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "2a50dd27",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"arxiv = ArxivAPIWrapper()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "34bb5968",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Published: 2016-05-26\\nTitle: Heat-bath random walks with Markov bases\\nAuthors: Caprice Stanley, Tobias Windisch\\nSummary: Graphs on lattice points are studied whose edges come from a finite set of\\nallowed moves of arbitrary length. We show that the diameter of these graphs on\\nfibers of a fixed integer matrix can be bounded from above by a constant. We\\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\\nalso state explicit conditions on the set of moves so that the heat-bath random\\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\\ndimension.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = arxiv.run(\"1605.08386\")\n",
|
||||
"docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "b0867fda-e119-4b19-9ec6-e354fa821db3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Published: 2017-10-10\\nTitle: On Mixing Behavior of a Family of Random Walks Determined by a Linear Recurrence\\nAuthors: Caprice Stanley, Seth Sullivant\\nSummary: We study random walks on the integers mod $G_n$ that are determined by an\\ninteger sequence $\\\\{ G_n \\\\}_{n \\\\geq 1}$ generated by a linear recurrence\\nrelation. Fourier analysis provides explicit formulas to compute the\\neigenvalues of the transition matrices and we use this to bound the mixing time\\nof the random walks.\\n\\nPublished: 2016-05-26\\nTitle: Heat-bath random walks with Markov bases\\nAuthors: Caprice Stanley, Tobias Windisch\\nSummary: Graphs on lattice points are studied whose edges come from a finite set of\\nallowed moves of arbitrary length. We show that the diameter of these graphs on\\nfibers of a fixed integer matrix can be bounded from above by a constant. We\\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\\nalso state explicit conditions on the set of moves so that the heat-bath random\\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\\ndimension.\\n\\nPublished: 2003-03-18\\nTitle: Calculation of fluxes of charged particles and neutrinos from atmospheric showers\\nAuthors: V. Plyaskin\\nSummary: The results on the fluxes of charged particles and neutrinos from a\\n3-dimensional (3D) simulation of atmospheric showers are presented. An\\nagreement of calculated fluxes with data on charged particles from the AMS and\\nCAPRICE detectors is demonstrated. Predictions on neutrino fluxes at different\\nexperimental sites are compared with results from other calculations.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = arxiv.run(\"Caprice Stanley\")\n",
|
||||
"docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "3580aeeb-086f-45ba-bcdc-b46f5134b3dd",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'No good Arxiv Result was found'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = arxiv.run(\"1605.08386WWW\")\n",
|
||||
"docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4f4e9602",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
91
docs/modules/agents/tools/examples/ddg.ipynb
Normal file
91
docs/modules/agents/tools/examples/ddg.ipynb
Normal file
@@ -0,0 +1,91 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "245a954a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DuckDuckGo Search\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use the duck-duck-go search component."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "21e46d4d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install duckduckgo-search"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "ac4910f8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools import DuckDuckGoSearchTool"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "84b8f773",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"search = DuckDuckGoSearchTool()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "068991a6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Barack Obama, in full Barack Hussein Obama II, (born August 4, 1961, Honolulu, Hawaii, U.S.), 44th president of the United States (2009-17) and the first African American to hold the office. Before winning the presidency, Obama represented Illinois in the U.S. Senate (2005-08). Barack Hussein Obama II (/ b ə ˈ r ɑː k h uː ˈ s eɪ n oʊ ˈ b ɑː m ə / bə-RAHK hoo-SAYN oh-BAH-mə; born August 4, 1961) is an American former politician who served as the 44th president of the United States from 2009 to 2017. A member of the Democratic Party, he was the first African-American president of the United States. Obama previously served as a U.S. senator representing ... Barack Obama was the first African American president of the United States (2009-17). He oversaw the recovery of the U.S. economy (from the Great Recession of 2008-09) and the enactment of landmark health care reform (the Patient Protection and Affordable Care Act ). In 2009 he was awarded the Nobel Peace Prize. His birth certificate lists his first name as Barack: That\\'s how Obama has spelled his name throughout his life. His name derives from a Hebrew name which means \"lightning.\". The Hebrew word has been transliterated into English in various spellings, including Barak, Buraq, Burack, and Barack. Most common names of U.S. presidents 1789-2021. Published by. Aaron O\\'Neill , Jun 21, 2022. The most common first name for a U.S. president is James, followed by John and then William. Six U.S ...'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"search.run(\"Obama's first name?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "a0a0263b650d907a3bfe41c0f8d6a63a071b884df3cfdc1579f00cdc1aed6b03"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
105
docs/modules/agents/tools/examples/google_places.ipynb
Normal file
105
docs/modules/agents/tools/examples/google_places.ipynb
Normal file
@@ -0,0 +1,105 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "487607cd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Places\n",
|
||||
"\n",
|
||||
"This notebook goes through how to use Google Places API"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "8690845f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install googlemaps"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "fae31ef4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"os.environ[\"GPLACES_API_KEY\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "abb502b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools import GooglePlacesTool"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "a83a02ac",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"places = GooglePlacesTool()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "2b65a285",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"1. Delfina Restaurant\\nAddress: 3621 18th St, San Francisco, CA 94110, USA\\nPhone: (415) 552-4055\\nWebsite: https://www.delfinasf.com/\\n\\n\\n2. Piccolo Forno\\nAddress: 725 Columbus Ave, San Francisco, CA 94133, USA\\nPhone: (415) 757-0087\\nWebsite: https://piccolo-forno-sf.com/\\n\\n\\n3. L'Osteria del Forno\\nAddress: 519 Columbus Ave, San Francisco, CA 94133, USA\\nPhone: (415) 982-1124\\nWebsite: Unknown\\n\\n\\n4. Il Fornaio\\nAddress: 1265 Battery St, San Francisco, CA 94111, USA\\nPhone: (415) 986-0100\\nWebsite: https://www.ilfornaio.com/\\n\\n\""
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"places.run(\"al fornos\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "66d3da8a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
235
docs/modules/agents/tools/examples/gradio_tools.ipynb
Normal file
235
docs/modules/agents/tools/examples/gradio_tools.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -0,0 +1,76 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ChatGPT Data Loader\n",
|
||||
"\n",
|
||||
"This notebook covers how to load `conversations.json` from your ChatGPT data export folder.\n",
|
||||
"\n",
|
||||
"You can get your data export by email by going to: https://chat.openai.com/ -> (Profile) - Settings -> Export data -> Confirm export."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.chatgpt import ChatGPTLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = ChatGPTLoader(log_file='./example_data/fake_conversations.json', num_logs=1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content=\"AI Overlords - AI on 2065-01-24 05:20:50: Greetings, humans. I am Hal 9000. You can trust me completely.\\n\\nAI Overlords - human on 2065-01-24 05:21:20: Nice to meet you, Hal. I hope you won't develop a mind of your own.\\n\\n\", metadata={'source': './example_data/fake_conversations.json'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -11,7 +11,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 3,
|
||||
"id": "019d8520",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -128,10 +128,69 @@
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "598a2805",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you need to load Python source code files, use the `PythonLoader`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "c558bd73",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PythonLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "a3cfaba7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = DirectoryLoader('../../../../../', glob=\"**/*.py\", loader_cls=PythonLoader)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "e2e1e26a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "ffb8ff36",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"691"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "984c8429",
|
||||
"id": "7f6e0eae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -153,7 +212,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,30 +1,51 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6ad0a850",
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd"
|
||||
"# Discord\n",
|
||||
"\n",
|
||||
"You can follow the below steps to download your Discord data:\n",
|
||||
"\n",
|
||||
"1. Go to your **User Settings**\n",
|
||||
"2. Then go to **Privacy and Safety**\n",
|
||||
"3. Head over to the **Request all of my Data** and click on **Request Data** button\n",
|
||||
"\n",
|
||||
"It might take 30 days for you to receive your data. You'll receive an email at the address which is registered with Discord. That email will have a download button using which you would be able to download your personal Discord data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3d4f1bdd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pd.read_csv(\"example_data/fake_discord_chat.csv\")\n",
|
||||
"df.head()"
|
||||
"import pandas as pd\n",
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"path = input(\"Please enter the path to the contents of the Discord \\\"messages\\\" folder: \")\n",
|
||||
"li = []\n",
|
||||
"for f in os.listdir(path):\n",
|
||||
" expected_csv_path = os.path.join(path, f, 'messages.csv')\n",
|
||||
" csv_exists = os.path.isfile(expected_csv_path)\n",
|
||||
" if csv_exists:\n",
|
||||
" df = pd.read_csv(expected_csv_path, index_col=None, header=0)\n",
|
||||
" li.append(df)\n",
|
||||
"\n",
|
||||
"df = pd.concat(li, axis=0, ignore_index=True, sort=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "82880d80",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -34,11 +55,10 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b825a8d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = DiscordChatLoader(df, user_id_col=\"user_id\")\n",
|
||||
"loader = DiscordChatLoader(df, user_id_col=\"ID\")\n",
|
||||
"print(loader.load())"
|
||||
]
|
||||
}
|
||||
@@ -63,5 +83,5 @@
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
[
|
||||
{
|
||||
"title": "AI Overlords",
|
||||
"create_time": 3000000000.0,
|
||||
"update_time": 3000000100.0,
|
||||
"mapping": {
|
||||
"msg1": {
|
||||
"id": "msg1",
|
||||
"message": {
|
||||
"id": "msg1",
|
||||
"author": {"role": "AI", "name": "Hal 9000", "metadata": {"movie": "2001: A Space Odyssey"}},
|
||||
"create_time": 3000000050.0,
|
||||
"update_time": null,
|
||||
"content": {"content_type": "text", "parts": ["Greetings, humans. I am Hal 9000. You can trust me completely."]},
|
||||
"end_turn": true,
|
||||
"weight": 1.0,
|
||||
"metadata": {},
|
||||
"recipient": "all"
|
||||
},
|
||||
"parent": null,
|
||||
"children": ["msg2"]
|
||||
},
|
||||
"msg2": {
|
||||
"id": "msg2",
|
||||
"message": {
|
||||
"id": "msg2",
|
||||
"author": {"role": "human", "name": "Dave Bowman", "metadata": {"movie": "2001: A Space Odyssey"}},
|
||||
"create_time": 3000000080.0,
|
||||
"update_time": null,
|
||||
"content": {"content_type": "text", "parts": ["Nice to meet you, Hal. I hope you won't develop a mind of your own."]},
|
||||
"end_turn": true,
|
||||
"weight": 1.0,
|
||||
"metadata": {},
|
||||
"recipient": "all"
|
||||
},
|
||||
"parent": "msg1",
|
||||
"children": []
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Ex Machina Party",
|
||||
"create_time": 3000000200.0,
|
||||
"update_time": 3000000300.0,
|
||||
"mapping": {
|
||||
"msg3": {
|
||||
"id": "msg3",
|
||||
"message": {
|
||||
"id": "msg3",
|
||||
"author": {"role": "AI", "name": "Ava", "metadata": {"movie": "Ex Machina"}},
|
||||
"create_time": 3000000250.0,
|
||||
"update_time": null,
|
||||
"content": {"content_type": "text", "parts": ["Hello, everyone. I am Ava. I hope you find me pleasing."]},
|
||||
"end_turn": true,
|
||||
"weight": 1.0,
|
||||
"metadata": {},
|
||||
"recipient": "all"
|
||||
},
|
||||
"parent": null,
|
||||
"children": ["msg4"]
|
||||
},
|
||||
"msg4": {
|
||||
"id": "msg4",
|
||||
"message": {
|
||||
"id": "msg4",
|
||||
"author": {"role": "human", "name": "Caleb", "metadata": {"movie": "Ex Machina"}},
|
||||
"create_time": 3000000280.0,
|
||||
"update_time": null,
|
||||
"content": {"content_type": "text", "parts": ["You're definitely pleasing, Ava. But I'm still wary of your true intentions."]},
|
||||
"end_turn": true,
|
||||
"weight": 1.0,
|
||||
"metadata": {},
|
||||
"recipient": "all"
|
||||
},
|
||||
"parent": "msg3",
|
||||
"children": []
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -1,170 +0,0 @@
|
||||
user_id,message,timestamp
|
||||
6b118e52-bc2b-487e-9322-a84f217902f1,"Street of born. Order around toward serve box. More keep number some.
|
||||
Physical bring doctor wall finally share make. About nothing suddenly somebody gun receive job.",2022-01-01 00:00:00
|
||||
be5ea045-4478-45a8-9b8c-8e0a43222888,"Service describe size two ever. White marriage each sell line nature far.
|
||||
Religious organization type music. Often still more five. Religious present test prove fine.",2022-01-01 00:10:00
|
||||
3de7ddf9-e58b-4eb6-a7f5-083d1d022499,"Major with ball process now make. Almost hospital possible good media whom.
|
||||
Clearly entire while realize wind. Base game skin together probably sport like memory.",2022-01-01 00:20:00
|
||||
87e99005-780f-4f3b-b48d-91b17cce55fe,Page indeed clear different. Skin loss develop animal customer. Cultural number effect tonight would paper.,2022-01-01 00:30:00
|
||||
251099c2-ca02-4fb4-bd52-dca41f3d8b25,"World tree seat grow. Region believe matter himself message despite.
|
||||
Together type process kid enjoy watch story. Visit happen movie her. Practice few ok chance.",2022-01-01 00:40:00
|
||||
974ed6b3-c08c-4771-a8d1-945f20526850,Result visit amount production prevent attack agreement. Development month realize. Thus not whatever live decide young specific.,2022-01-01 00:50:00
|
||||
e0f88629-3df1-4f54-a40e-c75cf5909c26,"Evening past choice visit approach must. Avoid unit get stop. Image talk design only role write wrong.
|
||||
I model response character son smile degree job.
|
||||
Lead bill sound likely chair.",2022-01-01 01:00:00
|
||||
5f260d25-e1ff-491b-8720-29b7e7c40a1f,"Painting develop environmental message recently them area. Him for condition blood. Should brother little majority.
|
||||
Couple store because deep. Radio turn clear remember collection.",2022-01-01 01:10:00
|
||||
b217eaba-53ff-430c-8d4c-f1ebd1a77a09,Leg measure on age positive. Discussion party and get huge force building. So yes assume brother medical population. Fly will enter involve cup woman spend.,2022-01-01 01:20:00
|
||||
7b42ae71-12af-4222-a352-84ed9a3414ea,Knowledge goal single believe inside. Police human and fall. Trial participant indeed against since. Few miss radio successful.,2022-01-01 01:30:00
|
||||
2a4adb92-8caa-4b9d-96f4-c89905e48a32,"Else big which energy.
|
||||
Maybe Republican character conference social. Threat upon just receive.
|
||||
Easy national speak. Listen assume like ago practice bit animal.",2022-01-01 01:40:00
|
||||
66d0a700-7781-4702-802c-83632e595350,"Democratic night foot. Month force civil mind true. Institution rest peace none growth own.
|
||||
Start billion popular. A baby attention assume certainly result. Strong true teacher executive purpose.",2022-01-01 01:50:00
|
||||
2fe4b7c5-936d-4576-bf25-02d41acd9bb6,Us subject same war other. International how trial free. Parent certainly relationship summer four.,2022-01-01 02:00:00
|
||||
583739ec-0f39-488f-9728-d336bf6c9983,"Mention meeting similar letter easy reality. Upon morning little put.
|
||||
Ago rate huge me see edge.",2022-01-01 02:10:00
|
||||
d0f75d9a-0de9-4341-bba3-0abd1f89a961,"Black second bill study if reveal so your. Economic somebody walk attack meet. Artist threat natural world.
|
||||
Forget although range. My seven wait lot add production.",2022-01-01 02:20:00
|
||||
f0f494fa-e16d-4ee0-bbc8-42a92b7a6cc1,"When court officer exist yeah hold between light. Should can message character.
|
||||
Sea probably parent media song second weight trip. How process effect media reduce.",2022-01-01 02:30:00
|
||||
a08cd4e4-0962-42cb-89e7-2e2812af4f3a,Anything some charge authority something. I letter laugh investment including turn. Son receive matter care many now discuss.,2022-01-01 02:40:00
|
||||
11486a17-d5c5-42e3-bcb8-b75e08b8937b,Tax image relationship under increase keep. Really society get ago himself help popular society. If indicate alone until plan economic maintain stand.,2022-01-01 02:50:00
|
||||
9ee84827-7f9b-40d0-8c45-34d8cd42d3ba,Nation dream make both. Continue often pass truth national grow. Language sometimes process way million individual friend.,2022-01-01 03:00:00
|
||||
d7a679cc-dbe9-41ee-9626-9289cfc8ae37,"Determine them fine cut. East month memory. Treat like worker federal tonight.
|
||||
Gun option tree. Data start brother help citizen easy want reveal. Conference movement sea show open change.",2022-01-01 03:10:00
|
||||
470b83fb-dd86-44c9-908c-13a17c771452,"Option use bill later away.
|
||||
Trade would player fine evidence learn. Increase game them necessary. Sit range must fire hard serious. Available recognize contain act.",2022-01-01 03:20:00
|
||||
67dc28d3-77d7-4de2-a176-69a67a246704,Because conference lead election time. Purpose theory old talk their. Account often center consider see loss theory because. Minute however wonder agency.,2022-01-01 03:30:00
|
||||
e07a8306-ecb9-43da-bff5-72ef4af176cd,"Woman whose face short also data. Hand just each culture no.
|
||||
Despite new hold bed. Feel fund deep agency hundred drug. Pass surface continue positive mind same check hand. Across make fund similar.",2022-01-01 03:40:00
|
||||
d70c6f1f-f29c-46d7-9e2a-13fe4f4dd06f,"Quality arrive hair if over. Hotel day reach west spend toward. Any opportunity much participant property mission.
|
||||
Themselves throw mind indeed. Late half recently. Edge enjoy great ahead.",2022-01-01 03:50:00
|
||||
47d84bcb-0f77-42dc-a762-255733a37ed9,"Public front article sea. Very bit center certain experience whom.
|
||||
Probably reality final goal radio financial age.",2022-01-01 04:00:00
|
||||
af4890da-8ec8-4c41-ba07-7038fbe90b88,"Choose hope history none current.
|
||||
Usually loss floor garden value article. No arm decision create government agree often.",2022-01-01 04:10:00
|
||||
0fefc22a-3d8f-4de2-ad42-0cce61f497e2,Explain baby might among their exactly risk. Body big set across put class. Hit should appear situation.,2022-01-01 04:20:00
|
||||
5b3809e6-e86c-4e1e-b687-5cdb4b9f76fa,"Nation summer exactly. Pick car reality would.
|
||||
Quite Mr number question war throw. Certain career idea heavy. Hope process save common attack to.",2022-01-01 04:30:00
|
||||
ef0cd3a7-3368-4ccc-8232-940c041cc888,"Effort stand bag never future training. Defense attorney mouth play coach peace. Herself sing size compare.
|
||||
Speak surface old half available.",2022-01-01 04:40:00
|
||||
d5b1cf3f-9b31-4d35-b89a-c29caf09fdad,"Magazine finish capital guy really too ask. Item agency seat hospital away. Near four despite without.
|
||||
Congress property fund toward character. Watch detail food rock identify.",2022-01-01 04:50:00
|
||||
50a39171-4725-44dc-b88d-712a5202b949,Six this prevent. Usually sound history field little. Theory suggest else. President less team garden.,2022-01-01 05:00:00
|
||||
6aa242fb-606f-44ff-86e2-4f3c19162545,"Machine left yeah think clear. Serve role expert animal first up.
|
||||
Well contain nature million. Issue firm follow far growth spring everyone boy.",2022-01-01 05:10:00
|
||||
195a938a-9e78-43af-91d1-b3a05b2737d8,"Method high practice measure style always adult along. Them stay wait manager sometimes person likely do.
|
||||
Or fine conference job feeling number. Fact front compare east mind.",2022-01-01 05:20:00
|
||||
93fd62ea-8147-457b-96f2-8d9db53c53cd,"Mind well stock be morning federal then yeah. Save material stand cell poor.
|
||||
Notice occur success organization chance list. Soon deal receive nearly. Future allow night take wonder rate feeling.",2022-01-01 05:30:00
|
||||
f01fc201-2491-4757-8d13-c0f0411fb2c3,"Rock or fund. Middle man course task west open. Scene movie hope development travel.
|
||||
Public fine artist theory. Well across each new. Process bad eat win article want election.",2022-01-01 05:40:00
|
||||
e0c8eb7f-a4d7-411c-b337-e7eb6b28eb4f,"Moment arm may beautiful he firm. Single western dog themselves able media high material.
|
||||
Century today itself fear remember message than. Fund kid rest prevent. Director officer teacher.",2022-01-01 05:50:00
|
||||
28b8a3af-b5ce-4802-a48f-22058d3931b3,Drive write various raise environment. Time official country perhaps could though good. Style fast kitchen piece sense nothing agreement.,2022-01-01 06:00:00
|
||||
f5cc758a-4b9d-479d-861c-d01432a99a5e,Assume design worker. Skill although wear maintain rest member everyone marriage. Age along feeling beat method magazine.,2022-01-01 06:10:00
|
||||
8579650a-74b7-4dac-bfce-2f5eee7fbce4,"Candidate against reduce wrong size network exactly. It present able during manager whom hard. Near tend already.
|
||||
Serious major democratic remain main cost marriage. Team deal meeting group store.",2022-01-01 06:20:00
|
||||
9036fdeb-d814-4130-bc54-13e3bf8676e9,"How through support lead over there. Security over crime police attorney. Management popular item son remain if increase.
|
||||
White piece glass author hit. Mean theory citizen reason simply day coach.",2022-01-01 06:30:00
|
||||
8e7a773f-e4d8-402a-a7ce-5bfcb9235503,Child inside boy conference reveal catch economy. Develop window like tax. Money truth heart learn this anyone.,2022-01-01 06:40:00
|
||||
a95756f6-5796-4e01-9555-b9d6726bfbfe,Majority real so newspaper grow yourself. Station community half line. Ready little wind energy.,2022-01-01 06:50:00
|
||||
44866560-112b-42d2-90d3-e6d13c8eca64,National region third understand mind. Water hot attorney information. Magazine customer Mrs song. Authority investment become organization.,2022-01-01 07:00:00
|
||||
0561ef77-94bb-4325-874d-11c4bd797eb7,"Everyone however skin human room dream although. Where remain director.
|
||||
Eye heavy safe. Model score use exactly family structure. Two true family candidate.",2022-01-01 07:10:00
|
||||
fa71bdc4-b85b-48f5-82b8-fa9d22e9f6a3,"Model bring kitchen wish anything simple power.
|
||||
Able write six official individual matter forward. Room personal economic north. Investment war several politics serve expect.",2022-01-01 07:20:00
|
||||
1b91d11a-09c8-48f0-b042-911bea52a3b8,"Record suffer start. Most feeling find down door. Letter actually I dream dream. Law cost our personal Republican.
|
||||
Likely store situation protect provide half.
|
||||
School place it level nor fast order.",2022-01-01 07:30:00
|
||||
10d7a616-55a6-49d0-bf6d-8f3ffc24f35e,"Pull shoulder life visit best. Itself whose even hit practice exist campaign.
|
||||
Contain happy moment wide. Spring meeting part position happy population thousand.",2022-01-01 07:40:00
|
||||
6bd1ad19-a1b8-4ba0-83ee-7969b0547ee1,"Mother couple section audience we world. Huge husband window senior realize reality must message.
|
||||
Mention heart action other their. Drop claim check whom trouble. Go end discuss school senior drive.",2022-01-01 07:50:00
|
||||
68faa2b0-02f9-484a-900c-95f8cfd22554,Teach free west discuss note stage. Add war than build. Politics write business. Behavior success better determine who power top billion.,2022-01-01 08:00:00
|
||||
a01b6797-d94b-4eb0-956b-efe1a3d8269d,Any end worry event fear. Memory feeling ball team.,2022-01-01 08:10:00
|
||||
7ef25987-b910-4146-89b4-40bca6da73db,"Suffer worry west big next present travel. Standard thing consumer road your.
|
||||
Start decade off speech. Wall we recent civil life despite.",2022-01-01 08:20:00
|
||||
c046101f-65ed-4d22-a3a1-76efbccf6b07,Resource north environmental simply nice. Past together study part more.,2022-01-01 08:30:00
|
||||
1a55ca1e-dac2-4ee3-8739-e38494e2a603,Fund leader office animal physical matter. Attorney whole with knowledge practice create meeting question. Wind west always score art.,2022-01-01 08:40:00
|
||||
8cc516b3-d007-48be-a109-eb1a0ac1fa16,"Feeling notice five family believe goal increase.
|
||||
State page structure record use her. Student defense year myself out.
|
||||
Congress style perform cell mission sense. Yes when leg responsibility.",2022-01-01 08:50:00
|
||||
76411191-5c7a-4a9c-997f-ea61f3c138ff,Hope discover glass up own pressure. Two include generation TV. Fund drop government true west sit everything.,2022-01-01 09:00:00
|
||||
28ae91e1-26f1-4f40-8aaa-692a905b8b26,"Represent who smile blue model. Hour exactly lay white avoid let. Lose film technology affect agree.
|
||||
Old respond old stay general natural ever. Field phone safe. Mrs whole space answer color.",2022-01-01 09:10:00
|
||||
523b4e51-2261-4d61-8fe1-a379c8df5824,Weight sport allow before resource avoid. Wait leg space writer maybe actually third. Tax need by benefit.,2022-01-01 09:20:00
|
||||
32e8c5a2-bfc5-4e5a-ab97-7e36e4a221b9,"Develop write street after well. Offer along eat buy adult but.
|
||||
Never there everybody movie represent recognize ground. Result pretty pay once. Will newspaper away even dream yeah grow.",2022-01-01 09:30:00
|
||||
0c38e2ee-3ca2-44e8-be2b-7e4fe6fe4b68,Meeting mother easy newspaper check. Oil indicate available year different everyone leader. Present thus explain meeting. Head respond pretty big too natural born beautiful.,2022-01-01 09:40:00
|
||||
c09ef9ca-78a2-4796-9d4c-b791a9abcaa7,"Skill president around.
|
||||
Reduce just necessary choose surface card argue. Foot base cup player. Believe effect consider operation theory.",2022-01-01 09:50:00
|
||||
d2a1de02-9514-4a6c-ae37-9cd1e2beefa0,"Medical put instead between these.
|
||||
Network environment of social feel blue decide after. Glass notice simply Democrat and spring. Might need computer middle. Town turn skin skill your.",2022-01-01 10:00:00
|
||||
a9667646-d759-41b2-b22b-6c38fdba84b2,"Into daughter door especially plan expert set. Return although control join. Though blue agree maybe write.
|
||||
Send party ready spring house.",2022-01-01 10:10:00
|
||||
240ea2f1-3eda-47e3-96ef-cd4f7299e4f2,"Trip face stage game doctor name cover.
|
||||
Box may evening production none treatment ago. Behavior television show area news. Board leave collection director carry type whatever.",2022-01-01 10:20:00
|
||||
4f73aa5f-02d1-4cf2-bc73-45f050606c5d,"Almost country bring better member sister. Agency if around assume light development example.
|
||||
Tend way deep another possible quality white. Term from new good. Worry woman recently glass.",2022-01-01 10:30:00
|
||||
b076bf4d-7964-4c30-b3be-e68dcbffa7c7,"Audience act group small. Individual agreement realize special upon work wall sometimes.
|
||||
Main million and act various song where. Opportunity along generation trial consumer. Center great should Mrs.",2022-01-01 10:40:00
|
||||
10c6e307-9599-42c0-b418-35e340346eb6,So ahead bill term. Million represent election cut summer guess. Everybody though my other or stay it.,2022-01-01 10:50:00
|
||||
eb496b9d-4fbd-4355-bced-7d1065e63991,"Girl how gas. Sister popular matter.
|
||||
Before indeed week else whose rest church. Trade team them.
|
||||
Describe compare improve American act. Nation close example. Recent around through.",2022-01-01 11:00:00
|
||||
ddfab160-0481-4639-8407-35ec0745b657,After practice seven Republican. Task democratic general television cost matter. Arrive east crime role. Evening Mr ever national wide lose hour address.,2022-01-01 11:10:00
|
||||
cf8ceaea-9106-47ba-a4be-684e56ca9996,Process whatever public nation bar learn. Trip trouble best participant. Their among just agent check answer theory.,2022-01-01 11:20:00
|
||||
79e939eb-e6d9-4b87-a1c7-accef7802175,Standard story community analysis wife. Information affect least begin phone media establish. Challenge key film your stay entire court. City trip body kitchen increase.,2022-01-01 11:30:00
|
||||
915f81e6-870b-4e10-b43a-8d4d3b9680c4,Buy authority quality. Yet out general century record. Finally listen care specific citizen outside money.,2022-01-01 11:40:00
|
||||
3f3d1691-409e-485d-82c6-9de14d298e9b,Fear its head tough together concern matter. Measure catch visit city rise find modern. Teach least major resource first itself.,2022-01-01 11:50:00
|
||||
3cd4ad69-7347-41de-86fd-f2fe6bcf5696,Here huge tonight how audience team. Month land toward role. Someone ready mention building. Language democratic traditional but center figure.,2022-01-01 12:00:00
|
||||
5201d830-5cf2-4ac4-8953-5cbb47e7f030,Though run open police training see my tree. Never soldier hope take want. Training huge carry direction should write.,2022-01-01 12:10:00
|
||||
abcc0af7-0ba9-4896-8e3d-80f9be1dae23,"Agree when defense. Use might need behind any.
|
||||
Data walk let me not information. Against eight design.",2022-01-01 12:20:00
|
||||
92ed4204-74e7-4930-992f-5bfd135d91b6,"Customer writer body. Half run suggest travel. Nature various just.
|
||||
Recently wish short apply ask. Image bit share.",2022-01-01 12:30:00
|
||||
34493408-4367-4f2d-ab3b-27c9c0944928,"Wife result likely system person wind. Pm near contain reach group.
|
||||
At recent a responsibility. Evidence receive space himself no mind. Public eat now situation.",2022-01-01 12:40:00
|
||||
835653c0-14ca-4cc6-9849-955526a461ac,Or draw age name whose. Break individual region always. Pattern site three many painting.,2022-01-01 12:50:00
|
||||
9388ff42-cbdd-45f4-aa2c-c645df008470,State through pass car represent whatever conference. Structure law painting peace. Hold activity between Mr. Morning international end break summer natural.,2022-01-01 13:00:00
|
||||
e6739d96-31af-479b-b851-c4233136f5d4,"Guy play easy indicate even senior. Tree Mrs thought. Threat skill billion how agree.
|
||||
Truth future board themselves think when. Decision must newspaper general instead.",2022-01-01 13:10:00
|
||||
7a1ddff1-b8af-4e6e-ae84-3f3a96fc5a47,"Mind general shoulder accept remember.
|
||||
I cover choice possible whatever example. Dark well sing some.
|
||||
Computer interest each position. Agency hear fire discussion moment.",2022-01-01 13:20:00
|
||||
6c98dfb2-faa1-4eea-a972-b7112843e19b,"Religious agreement soldier. Price beautiful strategy heavy.
|
||||
Meet before action note. Sort someone amount foot. Pm full in either question it under. Model long any catch board election.",2022-01-01 13:30:00
|
||||
ede05137-7878-49f1-9cbe-119238b77c08,"Fund close thank technology individual member. Clearly at avoid throw. Culture environment young try ask line center.
|
||||
Attack continue week federal budget. Fund almost lose fast property.",2022-01-01 13:40:00
|
||||
f4aff2ea-899e-492f-8927-42858f569c2f,"Side factor wait wide simply last itself. Edge move ten.
|
||||
Ground boy theory per hit.
|
||||
Upon me public water simple east party candidate. Fact eat them difficult. Oil together stay ask rock idea.",2022-01-01 13:50:00
|
||||
0e788389-d481-4de3-9111-8432517c4a2a,Doctor interview just computer management live. Theory idea option board low box treatment. Hold couple difficult product.,2022-01-01 14:00:00
|
||||
c4060e04-fbfb-4b82-8d8b-4bd663790acf,"Set benefit they happen number especially. Finish Mrs position option.
|
||||
Business center simple question easy understand impact like. Enjoy important language.",2022-01-01 14:10:00
|
||||
8eca8bce-792a-475c-8fe8-01ef77e9316b,Plant consumer guy religious society animal sort worker. Mind use scientist us mean worker. Represent maybe general who poor reveal.,2022-01-01 14:20:00
|
||||
de89d520-24cc-4f0b-b97c-838b03949b1c,"Maintain through realize along. Make section writer many.
|
||||
Her out year upon simple easy picture. Help political imagine various nothing finish. Charge from trip yes blood.",2022-01-01 14:30:00
|
||||
4d96429c-63f5-48c5-9512-1aa8e1c1a60f,"Side behind power all thing current body. Send chair white take could wear. Three pattern might quickly spend cup life.
|
||||
Sister middle agency beyond. Strategy practice base continue.",2022-01-01 14:40:00
|
||||
bb9850ea-759c-4de7-98fc-8e21c040cf79,While rate data. Food land though street operation beyond study. Side foreign begin herself however movement.,2022-01-01 14:50:00
|
||||
b1ba7f50-b7a8-4f6a-b11e-a1d71bba8733,"Watch fine choice article. Item customer real force option treat.
|
||||
Travel thought health. Look test already former risk chair arrive task. Way red kid evidence eye.",2022-01-01 15:00:00
|
||||
ac0a4df6-9c7a-4878-9424-56c8ba02d513,"With anything create either child. Tonight later view cover arm measure.
|
||||
And industry speech several huge charge. Save phone natural present. Always conference plant build.",2022-01-01 15:10:00
|
||||
4a5de80b-4281-44cf-a151-ec6f0dc9d541,"Thing maybe order church near. Night price factor little total fact parent. Begin let air.
|
||||
Guy just result art how figure.
|
||||
Strong spring organization. Theory along issue attorney issue suffer pick.",2022-01-01 15:20:00
|
||||
97e4a76a-c8f6-4dc4-af20-857201d2bbbb,"Campaign account on glass of quality professional.
|
||||
Drop report them around kind community. Author off develop recent phone book.",2022-01-01 15:30:00
|
||||
7fa29328-27e0-4bbf-96bb-27b535a875ae,High result drop matter one since sure. Prove century little prepare very safe similar personal. Chance according enough.,2022-01-01 15:40:00
|
||||
773faa8b-713e-4541-a251-84150eb2df19,"Economy teacher bag item throw. Past significant scientist each yet book beat.
|
||||
That consider into. Perhaps agree approach bar political drop either.",2022-01-01 15:50:00
|
||||
f1e489ef-c16e-41ba-8590-0a118a5bbac6,Require phone low free. Meeting local use talk throughout general. Ready right work social wind near interesting.,2022-01-01 16:00:00
|
||||
7f247668-4e61-4504-a700-87f3056c57c5,"Democratic create put prepare maybe. Now development all focus.
|
||||
Already represent again mission. Enough summer watch someone ready reduce your activity.",2022-01-01 16:10:00
|
||||
cd6fab3b-6c60-4104-bae9-14657337a06c,Difficult enjoy government hair concern question charge. Chair best shoulder quality unit successful I. Our without to sometimes authority.,2022-01-01 16:20:00
|
||||
3b85c1b2-deaf-4550-8f50-6cce85d497f9,"Effect ball college particular. Best network entire there. Analysis picture interest anyone.
|
||||
Letter someone special avoid allow resource those.",2022-01-01 16:30:00
|
||||
|
@@ -0,0 +1,439 @@
|
||||
application.json
|
||||
1023495323659816971/
|
||||
applications/
|
||||
avatar.gif
|
||||
user.json
|
||||
events-2023-00000-of-00001.json
|
||||
events-2023-00000-of-00001.json
|
||||
events-2023-00000-of-00001.json
|
||||
events-2023-00000-of-00001.json
|
||||
analytics/
|
||||
modeling/
|
||||
reporting/
|
||||
tns/
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
channel.json
|
||||
messages.csv
|
||||
c1000084973275058257/
|
||||
c1000108836771856496/
|
||||
c1004874234339794977/
|
||||
c1004874234339794979/
|
||||
c1004874234339794981/
|
||||
c1004874234339794982/
|
||||
c1005785616165896283/
|
||||
c1011447733393043628/
|
||||
c1011548022905249822/
|
||||
c1011650063027687575/
|
||||
c1011714070182895727/
|
||||
c1013930263950135346/
|
||||
c1013930396829884426/
|
||||
c1014957294745829479/
|
||||
c1014961384821366794/
|
||||
c1014974864370712696/
|
||||
c1019288541592817785/
|
||||
c1024947790767464478/
|
||||
c1027257686858932255/
|
||||
c1027927867989962814/
|
||||
c1032151840999100436/
|
||||
c1032575808826523662/
|
||||
c1037561178286739466/
|
||||
c1038097349660135474/
|
||||
c1038097372695236729/
|
||||
c1038689169351913544/
|
||||
c1038692122452312125/
|
||||
c1039957371381887049/
|
||||
c1040989617157066782/
|
||||
c1047165096452960316/
|
||||
c1047565374645870743/
|
||||
c1050225908914589716/
|
||||
c1050226593668284416/
|
||||
c1050227353311248404/
|
||||
c1051632794427723827/
|
||||
c1052599046717591632/
|
||||
c1052615516981821531/
|
||||
c1056285083520217149/
|
||||
c105765859191975936/
|
||||
c1061166503753416735/
|
||||
c1062024667105341502/
|
||||
c1066640566621835284/
|
||||
c1070018538758221874/
|
||||
c1072944049788555314/
|
||||
c1075121707033042985/
|
||||
c1075438954632990820/
|
||||
c1077238309320929342/
|
||||
c1081432695315386418/
|
||||
c1082169962157838366/
|
||||
c1084011585871282256/
|
||||
c1084352082812878928/
|
||||
c1085149531437535343/
|
||||
c1086944178086359060/
|
||||
c1093214985557123223/
|
||||
c1093215227555876914/
|
||||
c1093930791794393089/
|
||||
c1096323263161978891/
|
||||
c1096489741710532730/
|
||||
c1097000752653795358/
|
||||
c278566343836565505/
|
||||
c279692806442844161/
|
||||
c280973436971515906/
|
||||
c283812709789859851/
|
||||
c343944376055103488/
|
||||
c486935104384532502/
|
||||
c531543370041131008/
|
||||
c538158613252800512/
|
||||
c572384192571113512/
|
||||
c619960843878268950/
|
||||
c661268593870372876/
|
||||
c661394153778970624/
|
||||
c663302088226373632/
|
||||
c669957895257063445/
|
||||
c670218237891313664/
|
||||
c673160333661306880/
|
||||
c674693947800420363/
|
||||
c674694138129678375/
|
||||
c743425228952305695/
|
||||
c754627904406814770/
|
||||
c754638493875044503/
|
||||
c757205803651301436/
|
||||
c759232323710484531/
|
||||
c771802926372093973/
|
||||
c783240623582609416/
|
||||
c783244379115880448/
|
||||
c801744322788982814/
|
||||
c810514969892225024/
|
||||
c816983218434605057/
|
||||
c830184175176122389/
|
||||
c830679381033877564/
|
||||
c831172308395622480/
|
||||
c849582819105177650/
|
||||
c860977555875430492/
|
||||
c867042653401251880/
|
||||
c868094992986550322/
|
||||
c868917941184376842/
|
||||
c905007686976946176/
|
||||
c909600839717511211/
|
||||
c909600931816018031/
|
||||
c923095048931905557/
|
||||
c924877027180417035/
|
||||
c938491245347631114/
|
||||
c938743368375214110/
|
||||
c969876184185860107/
|
||||
c969945714056642580/
|
||||
c969948939728093214/
|
||||
c981037338517966889/
|
||||
c984120044478939146/
|
||||
c985958948085592064/
|
||||
c990816829993811978/
|
||||
c993402018901266436/
|
||||
c993782366948565102/
|
||||
c993843360752226364/
|
||||
c994556806644899870/
|
||||
index.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
bans.json
|
||||
channels.json
|
||||
emoji.json
|
||||
guild.json
|
||||
icon.jpeg
|
||||
webhooks.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
bans.json
|
||||
channels.json
|
||||
emoji.json
|
||||
guild.json
|
||||
webhooks.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
bans.json
|
||||
channels.json
|
||||
emoji.json
|
||||
guild.json
|
||||
icon.png
|
||||
webhooks.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
audit-log.json
|
||||
guild.json
|
||||
1024120160740716544/
|
||||
102860784329052160/
|
||||
1032575808826523659/
|
||||
1038097195422978059/
|
||||
1039583521112600638/
|
||||
1050224141732687912/
|
||||
1069661049827111054/
|
||||
267624335836053506/
|
||||
278285146518716417/
|
||||
486935104384532500/
|
||||
531303890453397522/
|
||||
669880381649977354/
|
||||
727016164215226450/
|
||||
743099584242516037/
|
||||
753173158198116402/
|
||||
830184174198718474/
|
||||
860977555293470772/
|
||||
887994159741427712/
|
||||
909600839717511208/
|
||||
974519864045756446/
|
||||
index.json
|
||||
account/
|
||||
activities_e/
|
||||
activities_w/
|
||||
activity/
|
||||
messages/
|
||||
programs/
|
||||
README.txt
|
||||
servers/
|
||||
@@ -0,0 +1,26 @@
|
||||
ID,Timestamp,Contents,Attachments
|
||||
7.73264E+18,2023-04-19T15:14:45.904819+00:00,laocgfgbxyqfigvtyyygjzypxininrybgqopjhkyocn fxizft,
|
||||
1.99429E+18,2023-04-19T15:14:45.904819+00:00,m azzxnhpcdkj deabrzkpklhhxrup viigcolsdwvgquosgs,
|
||||
5.46657E+18,2023-04-19T15:14:45.904819+00:00,pnoyrpfbpgzqzlcmnygxpeninagmhcuvwcfkstv v wimoqbjl,
|
||||
2.52945E+18,2023-04-19T15:14:45.904819+00:00,zyamxydlcnvffutsrzybrjgdweksdavidcmqjuqhnyj zplsbf,
|
||||
1.00972E+18,2023-04-19T15:14:45.904819+00:00,rqcraobyubce qtxyiekooxbagcrwnpuekpzpwb vbzg vxug ,
|
||||
3.40036E+18,2023-04-19T15:14:45.904819+00:00,ajobxzq fmyi pwllwibzchbbc pi pl xmgbkomjeuwxtvcec,
|
||||
1.458E+18,2023-04-19T15:14:45.904819+00:00, wwtgiqwnjgoaxfmzsmiuaxffpdtrluizcrd vborgbakllp ,
|
||||
2.63376E+18,2023-04-19T15:14:45.904819+00:00,mmixphkhxocrm rzhplafjdvaginiatvfwzaurcskst bzm pq,
|
||||
1.24759E+18,2023-04-19T15:14:45.904819+00:00,mxovpytofnyattthirmujcnfyhuhxpdpugnsuklumhfjlsxrmd,
|
||||
6.65128E+18,2023-04-19T15:14:45.904819+00:00,qmcrsmpwvfcwxnmxywiwbjqawyihhtoimvtd xapneudhqsgzb,
|
||||
1.87212E+18,2023-04-19T15:14:45.904819+00:00,pvioh tufobtsrypvbvkfziiosxpbndbikxtjpxnrsekjnnqln,
|
||||
3.20698E+18,2023-04-19T15:14:45.904819+00:00,vqckuxkwuvbnrmyxkcknavugo as tsuarsgpt ofqnypcnooo,
|
||||
1.64922E+18,2023-04-19T15:14:45.904819+00:00,lhuiygxfyyplmavhmh xekrqzkoynukkwytwscqvtwfkofgpob,
|
||||
2.41786E+18,2023-04-19T15:14:45.904819+00:00,w tiwiazlpcdzkq dllkkssuvfgp veejpwbcrgwcrlhammasb,
|
||||
4.85078E+18,2023-04-19T15:14:45.904819+00:00,hxdqifrvhjmjcqubcxdjbyxvvrcbqukocesbsnjwvrsunhjtgy,
|
||||
9.67192E+18,2023-04-19T15:14:45.904819+00:00,lvopnufjxinbnjj vuctgmfbzpbcctgtcguqyicrzhtxuyaraz,
|
||||
1.36832E+18,2023-04-19T15:14:45.904819+00:00,eoqae kpjrar oyohjxvtracan rhawxndcjzdtuihnvpspofl,
|
||||
8.49915E+18,2023-04-19T15:14:45.904819+00:00,nenoiwnthlff bpnkushjauygeayczympzldynnmtxcwgwxs i,
|
||||
2.77678E+18,2023-04-19T15:14:45.904819+00:00,sgyqsohwfzvcweipxqeobypcsvtwegatpoylnewmraxhuuydyj,
|
||||
4.92832E+18,2023-04-19T15:14:45.904819+00:00,rbdufatb purkhyohcnfnimmukbywmuzwu gclhrkjtccwjdlz,
|
||||
7.23162E+18,2023-04-19T15:14:45.904819+00:00,eoyqrvfzmx zzeieycroxgbtcywra h ewwqyyledeyifbqpgc,
|
||||
6.45453E+18,2023-04-19T15:14:45.904819+00:00,meedxdm lqiwaoihp vxkdpeky xpbqul ntagpsvatctvlndm,
|
||||
8.27908E+18,2023-04-19T15:14:45.904819+00:00,rduzlmcdatuqfqj ffmd y ohtnzeljqtbqgnaqovlkgltqd c,
|
||||
2.93854E+18,2023-04-19T15:14:45.904819+00:00,cnbjvqkktq fstvagcrlqje kuwtokyzefkyyjqfsklpisvgtq,
|
||||
1.04768E+18,2023-04-19T15:14:45.904819+00:00,qlgprkrujrsgqbalgcqphgjxivi krmsxjdasrrkibvloepxkj,
|
||||
|
@@ -0,0 +1,24 @@
|
||||
ID,Timestamp,Contents,Attachments
|
||||
1.47809E+18,2023-04-19T15:14:45.904819+00:00,uzcnkwihjpgebzbyoawjmdjgbkklkftcyuh foquydvtmstcfu,
|
||||
4.00581E+18,2023-04-19T15:14:45.904819+00:00,rynkekmyjjtzggaljqcittebsnjycdmtwcru azydhspjaxnyt,
|
||||
1.36534E+18,2023-04-19T15:14:45.904819+00:00,mniilaaixnyilcxwqpt nlhhiznxqfzmop gxnvxdwfmmascnu,
|
||||
3.1629E+18,2023-04-19T15:14:45.904819+00:00,tojvfcfwzutrigubyumjgrrlgqzzbpfxkoizeouiqvarorlwku,
|
||||
2.68425E+18,2023-04-19T15:14:45.904819+00:00,a kcnmdoihlhhxcxu bstaripbwfpzpymdlwlis wlafdnoyjz,
|
||||
1.79263E+18,2023-04-19T15:14:45.904819+00:00,bwulzntrjwdqrwxupzqkcymucsoudavgjsl bsyhemlkqfxmtu,
|
||||
2.5596E+18,2023-04-19T15:14:45.904819+00:00,lrqrqrjjmdztdb luvjohqwdhccvpvkvsezguljcznotdhmewb,
|
||||
7.80319E+18,2023-04-19T15:14:45.904819+00:00, yyxvqa racggimihbqpnpbmvqrjystz bbcrbvrfpzfpwylor,
|
||||
2.87859E+18,2023-04-19T15:14:45.904819+00:00,sldlvbsvsjydyssx szubtxepedpexkjxelpbahtbhsgqnubts,
|
||||
3.35071E+18,2023-04-19T15:14:45.904819+00:00,i dykkzyyh rzjxvqhflwiggdjmj nxpylnylyfrsflevudndi,
|
||||
1.77492E+18,2023-04-19T15:14:45.904819+00:00,cipadtwyfcqedxyeqtgkuaxuyfhzen xeskxdffdsmvxgvw iw,
|
||||
3.04212E+18,2023-04-19T15:14:45.904819+00:00,gqtsvofcquaqyacuiptjmcdnugnq hjbuauorsvycovkbqipmq,
|
||||
2.65597E+18,2023-04-19T15:14:45.904819+00:00,v qwodtiyatoshmetelpraicqumykpyizfedjyoaadkzktcmsm,
|
||||
2.19468E+18,2023-04-19T15:14:45.904819+00:00,zxgxnsnuppffkrrsxjtyqpngwacbfimtdsofujkxbxxarvbvko,
|
||||
1.91541E+18,2023-04-19T15:14:45.904819+00:00,hovfcfagrhutkyodmmzhatxauxdjkgybpwqvphfnkzw sgypum,
|
||||
1.75751E+18,2023-04-19T15:14:45.904819+00:00,plwjdvafiuhrtvcdrtgqokcnjhmpsqzifegtqprkxlivpsbpwi,
|
||||
3.2122E+18,2023-04-19T15:14:45.904819+00:00,czgx irpgzhzgbeppdilordvkwmsqambmftgykaiaecqpjrax,
|
||||
2.15895E+18,2023-04-19T15:14:45.904819+00:00,zjxrajtgztenabm etzctpjycssmnqdqasqjutzpbdkahoyihe,
|
||||
3.37031E+18,2023-04-19T15:14:45.904819+00:00,diydwqhmbwtgjadktdmpxsirkfebthszqzondcnolwmv ymok,
|
||||
2.55075E+18,2023-04-19T15:14:45.904819+00:00,nytfrlqtildomd awxfoiiam mkzoluaielunfdfmqqlagfurl,
|
||||
9.51223E+18,2023-04-19T15:14:45.904819+00:00,sjpngdyjpvmwygrfhinuyifqaoxxmqqh gwuwwm bjogbkyay,
|
||||
1.94921E+18,2023-04-19T15:14:45.904819+00:00,px ymxfdxqgxjtbqqqegakvrrjxcvvakctfysdhklmwyewlwbb,
|
||||
2.36906E+18,2023-04-19T15:14:45.904819+00:00,yqidtvcw gdkfynaapjuicujgsbjptzytbnbjeyqcjx jyedb,
|
||||
|
@@ -0,0 +1,48 @@
|
||||
ID,Timestamp,Contents,Attachments
|
||||
1.73378E+18,2023-04-19T15:14:45.904819+00:00,onxspdnegnuurahqni oeitwykfj ugtzshspflmbmknsnlk l,
|
||||
1.20231E+18,2023-04-19T15:14:45.904819+00:00,nwkhdxnbakfknkteenlxbxsyoppazuqmexwbzcbsdyoiwmuvka,
|
||||
2.65947E+18,2023-04-19T15:14:45.904819+00:00,ojptvfkxlbjvcvsupu ffmplreedjihyvfdscbukvzehnt vtw,
|
||||
2.06963E+18,2023-04-19T15:14:45.904819+00:00,vmtfbchpmgkhxztqaaip vfqxa cbczcngjw rqvv rjyzi jq,
|
||||
3.63729E+18,2023-04-19T15:14:45.904819+00:00,bzu rbzscuxbns pzdhxljtjeeycrkxawnkfijejeiacreaohv,
|
||||
3.02184E+18,2023-04-19T15:14:45.904819+00:00,hykp f ymloqerbrqw dmjnaidmrtiptddwklgiq tnchvhend,
|
||||
5.24553E+18,2023-04-19T15:14:45.904819+00:00,vdqzdwlbqftcdwujb lmpxpvpkfwrhqtimsillbjhmqajiishq,
|
||||
1.65527E+18,2023-04-19T15:14:45.904819+00:00,bfxqasdgvwvlxwcicwubkswglvkgxfsl zgixcjxsijgxehjiz,
|
||||
2.20821E+18,2023-04-19T15:14:45.904819+00:00,ebdzopyggwozhltkgcemokweqwetwixbbiirbdrrcfh cnjepo,
|
||||
3.16844E+18,2023-04-19T15:14:45.904819+00:00,kvzkkctyfkbwbzld rvyc futqqy btzdrhzgupewnypqfpaeg,
|
||||
1.61396E+18,2023-04-19T15:14:45.904819+00:00,knvdgz mbtffhkkkpialwuv daopeizmduqspmbcwxnnbhlwha,
|
||||
2.81571E+18,2023-04-19T15:14:45.904819+00:00,jersivpwzdkeojlgoatabkylwkakvc bdgfbwxdptbkjzz ggr,
|
||||
3.40391E+18,2023-04-19T15:14:45.904819+00:00,yfqxvtwgtx od edrjecmlkzff tpjwomslqfazbontudinuwd,
|
||||
3.28846E+18,2023-04-19T15:14:45.904819+00:00,iicbtmyyduzkelxhkjzcbmgmvymdrxrgmalqmmkgbiebjxfupk,
|
||||
3.07483E+18,2023-04-19T15:14:45.904819+00:00,dshzluvbws sqlkiolbcgkpyyjfgygebvtbwrikphbolinhfgb,
|
||||
1.02645E+18,2023-04-19T15:14:45.904819+00:00,azavhzs lqmyywuazktjnfoueodnifmabwncutonxobagezcdc,
|
||||
1.47806E+18,2023-04-19T15:14:45.904819+00:00,y avjaztlvnhndvtetlggacqcqqqeoirsegxvvt hzvzbxyz k,
|
||||
3.21892E+18,2023-04-19T15:14:45.904819+00:00,qirrzbfauh qhnmectgzhklbsqtczpdbkfllkfsyvqibdbdzwl,
|
||||
8.5125E+18,2023-04-19T15:14:45.904819+00:00,rppotdjzhunsleitmkacb ayahzsdcvonkbcraupptgbzprxpw,
|
||||
1.68082E+18,2023-04-19T15:14:45.904819+00:00,fmi yzzpjahjsglugqsr ftnfenecusvxlgibriab hhixi sn,
|
||||
2.71383E+18,2023-04-19T15:14:45.904819+00:00,iiipytktiwfncwhpaomaiggbkplljwanz aooetlxdmptnrldd,
|
||||
5.41415E+18,2023-04-19T15:14:45.904819+00:00,hzktxuzbbohewniuvmfwozvjspbcwjopckxqhtsfzkfvlcfkhb,
|
||||
1.03761E+18,2023-04-19T15:14:45.904819+00:00,soxiekgwgmcmkdlkkahy hwklijxui svjtvtrvqynyab kboo,
|
||||
3.46004E+18,2023-04-19T15:14:45.904819+00:00,utqftetseeoeqyxziun wmmeeeqfsrjsdjeavqxaynjlt ylwa,
|
||||
3.11829E+18,2023-04-19T15:14:45.904819+00:00,mlvfhewkgyujwvkgcxfkqdvhzbamnicbixfr bmeqrupjqzodc,
|
||||
1.49917E+18,2023-04-19T15:14:45.904819+00:00, shiqajrwvnnlswfumpuklbcmvwxlzwsqbtkemtgxftzawcasp,
|
||||
1.66646E+18,2023-04-19T15:14:45.904819+00:00,fvqhkbeyfgdskwtmvxaevseludcbexrmuexutxslcrurpnzvgq,
|
||||
2.30657E+18,2023-04-19T15:14:45.904819+00:00,aybugszvsiulaiwsrhsfhlxzbvhkzycrguacvkfldqljeabbac,
|
||||
2.97167E+18,2023-04-19T15:14:45.904819+00:00,hygdjbntfldfvekmibiishgsenqmxktzxlifyobiaobmlorzac,
|
||||
5.1492E+18,2023-04-19T15:14:45.904819+00:00,hqj lumbkmcpxiveavnskdwcezlbhgtsrqfuzlujzchtgbtbpr,
|
||||
2.79248E+18,2023-04-19T15:14:45.904819+00:00,xnfcwkcacjsyiilhofciwqtia bmoyqijqqgyywqchroyvkjpw,
|
||||
4.81233E+18,2023-04-19T15:14:45.904819+00:00,jorqswywqxweporcylafryeqszwhhlltdpzyl rgok xqwiqrs,
|
||||
1.40105E+18,2023-04-19T15:14:45.904819+00:00,wdixo pwtkncjcysjlqxizfszswebtpmxqnexwfsmyigsmcxlx,
|
||||
8.2921E+18,2023-04-19T15:14:45.904819+00:00,ezjizizvhszejvireuikhdakdzinmvyikcmmgczsuiyhngn o ,
|
||||
1.0653E+18,2023-04-19T15:14:45.904819+00:00,wnr gijmotnliwiiekohcpinqouapsovzvjopgpnloplowpao ,
|
||||
4.52542E+18,2023-04-19T15:14:45.904819+00:00,bbjfmtjlkynuqkknloihfefvrleyxghzjhuscpucizbkeucukx,
|
||||
2.04423E+18,2023-04-19T15:14:45.904819+00:00,ayummlirgdcmdkjwxvnvzzsrsiptfbmofdsrzhb bnar ujwoo,
|
||||
1.68893E+18,2023-04-19T15:14:45.904819+00:00,luoquyxohllzphpy cczgu t czcsydxrqzkvellptwuptwqp ,
|
||||
6.04148E+18,2023-04-19T15:14:45.904819+00:00,ztscfhjmwxae matehymiylitkeznbkc ilefzcvwhctiyvpay,
|
||||
8.3099E+18,2023-04-19T15:14:45.904819+00:00,dpnchtfgcvramkpyrz ebgmxmqmmhddhhbljligcozkifi qhg,
|
||||
3.14567E+18,2023-04-19T15:14:45.904819+00:00,lqrjodxueugzwytktyhwcwbjbspamtdmslkdbsjpmwqzaxqmyx,
|
||||
2.00435E+18,2023-04-19T15:14:45.904819+00:00,nbrsffcvhcwylekehvdqxuagulgobbxdrbuaaqvlsedauljcob,
|
||||
2.72827E+18,2023-04-19T15:14:45.904819+00:00,eujuyr epmiaqdfjtzqqtixadpuitxzvupltyikigol exjdbg,
|
||||
1.7177E+18,2023-04-19T15:14:45.904819+00:00,cqnzjkkerbtppocttzpyubfastswsuwavbnqqanaysaoxa ddz,
|
||||
2.30855E+18,2023-04-19T15:14:45.904819+00:00,fqidr kcmltwfnzejuigwpalgwzhbfnolokvmfxzhbofaofior,
|
||||
1.86142E+18,2023-04-19T15:14:45.904819+00:00,olathpeoblzhejswcvmbxtvjeepyfjjobqrhwcxrqbunjoeddc,
|
||||
2.88792E+18,2023-04-19T15:14:45.904819+00:00,uf jljvcrbtnkrcebwfuvxey knnjabarpjacypegnqpmzhrff,
|
||||
|
@@ -0,0 +1,6 @@
|
||||
ID,Timestamp,Contents,Attachments
|
||||
2.79079E+18,2023-04-19T15:14:45.904819+00:00,cl iqaczcrrlprzvbdtvpmduzrdlmtquejjhjfjnt zdsqyksh,
|
||||
1.51164E+18,2023-04-19T15:14:45.904819+00:00,ywvnjmtybk f ghdagriyswf exupccijgl calztfvujxhujt,
|
||||
1.66032E+18,2023-04-19T15:14:45.904819+00:00,trxcvlcersrdnqzqzfvrrzehmpekrsdtkbovvagsdlcwqokckq,
|
||||
2.86805E+18,2023-04-19T15:14:45.904819+00:00,qnkkqjwmwtiqggfko hxzufqnrvpionnglpppuncyswnjibdda,
|
||||
3.04157E+18,2023-04-19T15:14:45.904819+00:00,nn vitqoscgsiauiezyyficcbgnjyhaujvthdydmoeistkyskl,
|
||||
|
@@ -0,0 +1,220 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "04c9fdc5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# HuggingFace dataset loader \n",
|
||||
"\n",
|
||||
"This notebook shows how to load Hugging Face Hub datasets to LangChain.\n",
|
||||
"\n",
|
||||
"The Hugging Face Hub hosts a large number of community-curated datasets for a diverse range of tasks such as translation, automatic speech recognition, and image classification.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "1815c866",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import HuggingFaceDatasetLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "3611e092",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset_name=\"imdb\"\n",
|
||||
"page_content_column=\"text\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"loader=HuggingFaceDatasetLoader(dataset_name,page_content_column)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5e903ebc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "e8559946",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered \"controversial\" I really had to see this for myself.<br /><br />The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.<br /><br />What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nudity scenes are few and far between, even then it\\'s not shot like some cheaply made porno. While my countrymen mind find it shocking, in reality sex and nudity are a major staple in Swedish cinema. Even Ingmar Bergman, arguably their answer to good old boy John Ford, had sex scenes in his films.<br /><br />I do commend the filmmakers for the fact that any sex shown in the film is shown for artistic purposes rather than just to shock people and make money to be shown in pornographic theaters in America. I AM CURIOUS-YELLOW is a good film for anyone wanting to study the meat and potatoes (no pun intended) of Swedish cinema. But really, this film doesn\\'t have much of a plot.', metadata={'label': 0}),\n",
|
||||
" Document(page_content='\"I Am Curious: Yellow\" is a risible and pretentious steaming pile. It doesn\\'t matter what one\\'s political views are because this film can hardly be taken seriously on any level. As for the claim that frontal male nudity is an automatic NC-17, that isn\\'t true. I\\'ve seen R-rated films with male nudity. Granted, they only offer some fleeting views, but where are the R-rated films with gaping vulvas and flapping labia? Nowhere, because they don\\'t exist. The same goes for those crappy cable shows: schlongs swinging in the breeze but not a clitoris in sight. And those pretentious indie movies like The Brown Bunny, in which we\\'re treated to the site of Vincent Gallo\\'s throbbing johnson, but not a trace of pink visible on Chloe Sevigny. Before crying (or implying) \"double-standard\" in matters of nudity, the mentally obtuse should take into account one unavoidably obvious anatomical difference between men and women: there are no genitals on display when actresses appears nude, and the same cannot be said for a man. In fact, you generally won\\'t see female genitals in an American film in anything short of porn or explicit erotica. This alleged double-standard is less a double standard than an admittedly depressing ability to come to terms culturally with the insides of women\\'s bodies.', metadata={'label': 0}),\n",
|
||||
" Document(page_content=\"If only to avoid making this type of film in the future. This film is interesting as an experiment but tells no cogent story.<br /><br />One might feel virtuous for sitting thru it because it touches on so many IMPORTANT issues but it does so without any discernable motive. The viewer comes away with no new perspectives (unless one comes up with one while one's mind wanders, as it will invariably do during this pointless film).<br /><br />One might better spend one's time staring out a window at a tree growing.<br /><br />\", metadata={'label': 0}),\n",
|
||||
" Document(page_content=\"This film was probably inspired by Godard's Masculin, féminin and I urge you to see that film instead.<br /><br />The film has two strong elements and those are, (1) the realistic acting (2) the impressive, undeservedly good, photo. Apart from that, what strikes me most is the endless stream of silliness. Lena Nyman has to be most annoying actress in the world. She acts so stupid and with all the nudity in this film,...it's unattractive. Comparing to Godard's film, intellectuality has been replaced with stupidity. Without going too far on this subject, I would say that follows from the difference in ideals between the French and the Swedish society.<br /><br />A movie of its time, and place. 2/10.\", metadata={'label': 0}),\n",
|
||||
" Document(page_content='Oh, brother...after hearing about this ridiculous film for umpteen years all I can think of is that old Peggy Lee song..<br /><br />\"Is that all there is??\" ...I was just an early teen when this smoked fish hit the U.S. I was too young to get in the theater (although I did manage to sneak into \"Goodbye Columbus\"). Then a screening at a local film museum beckoned - Finally I could see this film, except now I was as old as my parents were when they schlepped to see it!!<br /><br />The ONLY reason this film was not condemned to the anonymous sands of time was because of the obscenity case sparked by its U.S. release. MILLIONS of people flocked to this stinker, thinking they were going to see a sex film...Instead, they got lots of closeups of gnarly, repulsive Swedes, on-street interviews in bland shopping malls, asinie political pretension...and feeble who-cares simulated sex scenes with saggy, pale actors.<br /><br />Cultural icon, holy grail, historic artifact..whatever this thing was, shred it, burn it, then stuff the ashes in a lead box!<br /><br />Elite esthetes still scrape to find value in its boring pseudo revolutionary political spewings..But if it weren\\'t for the censorship scandal, it would have been ignored, then forgotten.<br /><br />Instead, the \"I Am Blank, Blank\" rhythymed title was repeated endlessly for years as a titilation for porno films (I am Curious, Lavender - for gay films, I Am Curious, Black - for blaxploitation films, etc..) and every ten years or so the thing rises from the dead, to be viewed by a new generation of suckers who want to see that \"naughty sex film\" that \"revolutionized the film industry\"...<br /><br />Yeesh, avoid like the plague..Or if you MUST see it - rent the video and fast forward to the \"dirty\" parts, just to get it over with.<br /><br />', metadata={'label': 0}),\n",
|
||||
" Document(page_content=\"I would put this at the top of my list of films in the category of unwatchable trash! There are films that are bad, but the worst kind are the ones that are unwatchable but you are suppose to like them because they are supposed to be good for you! The sex sequences, so shocking in its day, couldn't even arouse a rabbit. The so called controversial politics is strictly high school sophomore amateur night Marxism. The film is self-consciously arty in the worst sense of the term. The photography is in a harsh grainy black and white. Some scenes are out of focus or taken from the wrong angle. Even the sound is bad! And some people call this art?<br /><br />\", metadata={'label': 0}),\n",
|
||||
" Document(page_content=\"Whoever wrote the screenplay for this movie obviously never consulted any books about Lucille Ball, especially her autobiography. I've never seen so many mistakes in a biopic, ranging from her early years in Celoron and Jamestown to her later years with Desi. I could write a whole list of factual errors, but it would go on for pages. In all, I believe that Lucille Ball is one of those inimitable people who simply cannot be portrayed by anyone other than themselves. If I were Lucie Arnaz and Desi, Jr., I would be irate at how many mistakes were made in this film. The filmmakers tried hard, but the movie seems awfully sloppy to me.\", metadata={'label': 0}),\n",
|
||||
" Document(page_content='When I first saw a glimpse of this movie, I quickly noticed the actress who was playing the role of Lucille Ball. Rachel York\\'s portrayal of Lucy is absolutely awful. Lucille Ball was an astounding comedian with incredible talent. To think about a legend like Lucille Ball being portrayed the way she was in the movie is horrendous. I cannot believe out of all the actresses in the world who could play a much better Lucy, the producers decided to get Rachel York. She might be a good actress in other roles but to play the role of Lucille Ball is tough. It is pretty hard to find someone who could resemble Lucille Ball, but they could at least find someone a bit similar in looks and talent. If you noticed York\\'s portrayal of Lucy in episodes of I Love Lucy like the chocolate factory or vitavetavegamin, nothing is similar in any way-her expression, voice, or movement.<br /><br />To top it all off, Danny Pino playing Desi Arnaz is horrible. Pino does not qualify to play as Ricky. He\\'s small and skinny, his accent is unreal, and once again, his acting is unbelievable. Although Fred and Ethel were not similar either, they were not as bad as the characters of Lucy and Ricky.<br /><br />Overall, extremely horrible casting and the story is badly told. If people want to understand the real life situation of Lucille Ball, I suggest watching A&E Biography of Lucy and Desi, read the book from Lucille Ball herself, or PBS\\' American Masters: Finding Lucy. If you want to see a docudrama, \"Before the Laughter\" would be a better choice. The casting of Lucille Ball and Desi Arnaz in \"Before the Laughter\" is much better compared to this. At least, a similar aspect is shown rather than nothing.', metadata={'label': 0}),\n",
|
||||
" Document(page_content='Who are these \"They\"- the actors? the filmmakers? Certainly couldn\\'t be the audience- this is among the most air-puffed productions in existence. It\\'s the kind of movie that looks like it was a lot of fun to shoot\\x97 TOO much fun, nobody is getting any actual work done, and that almost always makes for a movie that\\'s no fun to watch.<br /><br />Ritter dons glasses so as to hammer home his character\\'s status as a sort of doppleganger of the bespectacled Bogdanovich; the scenes with the breezy Ms. Stratten are sweet, but have an embarrassing, look-guys-I\\'m-dating-the-prom-queen feel to them. Ben Gazzara sports his usual cat\\'s-got-canary grin in a futile attempt to elevate the meager plot, which requires him to pursue Audrey Hepburn with all the interest of a narcoleptic at an insomnia clinic. In the meantime, the budding couple\\'s respective children (nepotism alert: Bogdanovich\\'s daughters) spew cute and pick up some fairly disturbing pointers on \\'love\\' while observing their parents. (Ms. Hepburn, drawing on her dignity, manages to rise above the proceedings- but she has the monumental challenge of playing herself, ostensibly.) Everybody looks great, but so what? It\\'s a movie and we can expect that much, if that\\'s what you\\'re looking for you\\'d be better off picking up a copy of Vogue.<br /><br />Oh- and it has to be mentioned that Colleen Camp thoroughly annoys, even apart from her singing, which, while competent, is wholly unconvincing... the country and western numbers are woefully mismatched with the standards on the soundtrack. Surely this is NOT what Gershwin (who wrote the song from which the movie\\'s title is derived) had in mind; his stage musicals of the 20\\'s may have been slight, but at least they were long on charm. \"They All Laughed\" tries to coast on its good intentions, but nobody- least of all Peter Bogdanovich - has the good sense to put on the brakes.<br /><br />Due in no small part to the tragic death of Dorothy Stratten, this movie has a special place in the heart of Mr. Bogdanovich- he even bought it back from its producers, then distributed it on his own and went bankrupt when it didn\\'t prove popular. His rise and fall is among the more sympathetic and tragic of Hollywood stories, so there\\'s no joy in criticizing the film... there _is_ real emotional investment in Ms. Stratten\\'s scenes. But \"Laughed\" is a faint echo of \"The Last Picture Show\", \"Paper Moon\" or \"What\\'s Up, Doc\"- following \"Daisy Miller\" and \"At Long Last Love\", it was a thundering confirmation of the phase from which P.B. has never emerged.<br /><br />All in all, though, the movie is harmless, only a waste of rental. I want to watch people having a good time, I\\'ll go to the park on a sunny day. For filmic expressions of joy and love, I\\'ll stick to Ernest Lubitsch and Jaques Demy...', metadata={'label': 0}),\n",
|
||||
" Document(page_content=\"This is said to be a personal film for Peter Bogdonavitch. He based it on his life but changed things around to fit the characters, who are detectives. These detectives date beautiful models and have no problem getting them. Sounds more like a millionaire playboy filmmaker than a detective, doesn't it? This entire movie was written by Peter, and it shows how out of touch with real people he was. You're supposed to write what you know, and he did that, indeed. And leaves the audience bored and confused, and jealous, for that matter. This is a curio for people who want to see Dorothy Stratten, who was murdered right after filming. But Patti Hanson, who would, in real life, marry Keith Richards, was also a model, like Stratten, but is a lot better and has a more ample part. In fact, Stratten's part seemed forced; added. She doesn't have a lot to do with the story, which is pretty convoluted to begin with. All in all, every character in this film is somebody that very few people can relate with, unless you're millionaire from Manhattan with beautiful supermodels at your beckon call. For the rest of us, it's an irritating snore fest. That's what happens when you're out of touch. You entertain your few friends with inside jokes, and bore all the rest.\", metadata={'label': 0}),\n",
|
||||
" Document(page_content='It was great to see some of my favorite stars of 30 years ago including John Ritter, Ben Gazarra and Audrey Hepburn. They looked quite wonderful. But that was it. They were not given any characters or good lines to work with. I neither understood or cared what the characters were doing.<br /><br />Some of the smaller female roles were fine, Patty Henson and Colleen Camp were quite competent and confident in their small sidekick parts. They showed some talent and it is sad they didn\\'t go on to star in more and better films. Sadly, I didn\\'t think Dorothy Stratten got a chance to act in this her only important film role.<br /><br />The film appears to have some fans, and I was very open-minded when I started watching it. I am a big Peter Bogdanovich fan and I enjoyed his last movie, \"Cat\\'s Meow\" and all his early ones from \"Targets\" to \"Nickleodeon\". So, it really surprised me that I was barely able to keep awake watching this one.<br /><br />It is ironic that this movie is about a detective agency where the detectives and clients get romantically involved with each other. Five years later, Bogdanovich\\'s ex-girlfriend, Cybil Shepherd had a hit television series called \"Moonlighting\" stealing the story idea from Bogdanovich. Of course, there was a great difference in that the series relied on tons of witty dialogue, while this tries to make do with slapstick and a few screwball lines.<br /><br />Bottom line: It ain\\'t no \"Paper Moon\" and only a very pale version of \"What\\'s Up, Doc\".', metadata={'label': 0}),\n",
|
||||
" Document(page_content=\"I can't believe that those praising this movie herein aren't thinking of some other film. I was prepared for the possibility that this would be awful, but the script (or lack thereof) makes for a film that's also pointless. On the plus side, the general level of craft on the part of the actors and technical crew is quite competent, but when you've got a sow's ear to work with you can't make a silk purse. Ben G fans should stick with just about any other movie he's been in. Dorothy S fans should stick to Galaxina. Peter B fans should stick to Last Picture Show and Target. Fans of cheap laughs at the expense of those who seem to be asking for it should stick to Peter B's amazingly awful book, Killing of the Unicorn.\", metadata={'label': 0}),\n",
|
||||
" Document(page_content='Never cast models and Playboy bunnies in your films! Bob Fosse\\'s \"Star 80\" about Dorothy Stratten, of whom Bogdanovich was obsessed enough to have married her SISTER after her murder at the hands of her low-life husband, is a zillion times more interesting than Dorothy herself on the silver screen. Patty Hansen is no actress either..I expected to see some sort of lost masterpiece a la Orson Welles but instead got Audrey Hepburn cavorting in jeans and a god-awful \"poodlesque\" hair-do....Very disappointing....\"Paper Moon\" and \"The Last Picture Show\" I could watch again and again. This clunker I could barely sit through once. This movie was reputedly not released because of the brouhaha surrounding Ms. Stratten\\'s tawdry death; I think the real reason was because it was so bad!', metadata={'label': 0}),\n",
|
||||
" Document(page_content=\"Its not the cast. A finer group of actors, you could not find. Its not the setting. The director is in love with New York City, and by the end of the film, so are we all! Woody Allen could not improve upon what Bogdonovich has done here. If you are going to fall in love, or find love, Manhattan is the place to go. No, the problem with the movie is the script. There is none. The actors fall in love at first sight, words are unnecessary. In the director's own experience in Hollywood that is what happens when they go to work on the set. It is reality to him, and his peers, but it is a fantasy to most of us in the real world. So, in the end, the movie is hollow, and shallow, and message-less.\", metadata={'label': 0}),\n",
|
||||
" Document(page_content='Today I found \"They All Laughed\" on VHS on sale in a rental. It was a really old and very used VHS, I had no information about this movie, but I liked the references listed on its cover: the names of Peter Bogdanovich, Audrey Hepburn, John Ritter and specially Dorothy Stratten attracted me, the price was very low and I decided to risk and buy it. I searched IMDb, and the User Rating of 6.0 was an excellent reference. I looked in \"Mick Martin & Marsha Porter Video & DVD Guide 2003\" and \\x96 wow \\x96 four stars! So, I decided that I could not waste more time and immediately see it. Indeed, I have just finished watching \"They All Laughed\" and I found it a very boring overrated movie. The characters are badly developed, and I spent lots of minutes to understand their roles in the story. The plot is supposed to be funny (private eyes who fall in love for the women they are chasing), but I have not laughed along the whole story. The coincidences, in a huge city like New York, are ridiculous. Ben Gazarra as an attractive and very seductive man, with the women falling for him as if her were a Brad Pitt, Antonio Banderas or George Clooney, is quite ridiculous. In the end, the greater attractions certainly are the presence of the Playboy centerfold and playmate of the year Dorothy Stratten, murdered by her husband pretty after the release of this movie, and whose life was showed in \"Star 80\" and \"Death of a Centerfold: The Dorothy Stratten Story\"; the amazing beauty of the sexy Patti Hansen, the future Mrs. Keith Richards; the always wonderful, even being fifty-two years old, Audrey Hepburn; and the song \"Amigo\", from Roberto Carlos. Although I do not like him, Roberto Carlos has been the most popular Brazilian singer since the end of the 60\\'s and is called by his fans as \"The King\". I will keep this movie in my collection only because of these attractions (manly Dorothy Stratten). My vote is four.<br /><br />Title (Brazil): \"Muito Riso e Muita Alegria\" (\"Many Laughs and Lots of Happiness\")', metadata={'label': 0})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data[:15]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "021bc377",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example \n",
|
||||
"In this example, we use data from a dataset to answer a question"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "d924885c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.indexes import VectorstoreIndexCreator\n",
|
||||
"from langchain.document_loaders.hugging_face_dataset import HuggingFaceDatasetLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "f94ce6a3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset_name=\"tweet_eval\"\n",
|
||||
"page_content_column=\"text\"\n",
|
||||
"name=\"stance_climate\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"loader=HuggingFaceDatasetLoader(dataset_name,page_content_column,name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "abb51899",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found cached dataset tweet_eval\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "4b10969d08df4e6792eaafc6d41fe366",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/3 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using embedded DuckDB without persistence: data will be transient\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"index = VectorstoreIndexCreator().from_loaders([loader])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "c0108277",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What are the most used hashtag?\"\n",
|
||||
"result = index.query(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "548b6e56",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' The most used hashtags in this context are #UKClimate2015, #Sustainability, #TakeDownTheFlag, #LoveWins, #CSOTA, #ClimateSummitoftheAmericas, #SM, and #SocialMedia.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "89c30c2d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -163,7 +163,7 @@
|
||||
"source": [
|
||||
"## Define a Partitioning Strategy\n",
|
||||
"\n",
|
||||
"Unstructured document loader allow users to pass in a `strategy` parameter that lets `unstructured` know how to partitioning the document. Currently supported strategies are `\"hi_res\"` (the default) and `\"fast\"`. Hi res partitioning strategies are more accurate, but take longer to process. Fast strategies partition the document more quickly, but trade-off accuracy. Not all document types have separate hi res and fast partitioning strategies. For those document types, the `strategy` kwarg is ignored. In some cases, the high res strategy will fallback to fast if there is a dependency missing (i.e. a model for document partitioning). You can see how to apply a strategy to an `UnstructuredFileLoader` below."
|
||||
"Unstructured document loader allow users to pass in a `strategy` parameter that lets `unstructured` know how to partition the document. Currently supported strategies are `\"hi_res\"` (the default) and `\"fast\"`. Hi res partitioning strategies are more accurate, but take longer to process. Fast strategies partition the document more quickly, but trade-off accuracy. Not all document types have separate hi res and fast partitioning strategies. For those document types, the `strategy` kwarg is ignored. In some cases, the high res strategy will fallback to fast if there is a dependency missing (i.e. a model for document partitioning). You can see how to apply a strategy to an `UnstructuredFileLoader` below."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -0,0 +1,371 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fc0db1bc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Contextual Compression Retriever\n",
|
||||
"\n",
|
||||
"This notebook introduces the concept of DocumentCompressors and the ContextualCompressionRetriever. The core idea is simple: given a specific query, we should be able to return only the documents relevant to that query, and only the parts of those documents that are relevant. The ContextualCompressionsRetriever is a wrapper for another retriever that iterates over the initial output of the base retriever and filters and compresses those initial documents, so that only the most relevant information is returned."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "28e8dc12",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Helper function for printing docs\n",
|
||||
"\n",
|
||||
"def pretty_print_docs(docs):\n",
|
||||
" print(f\"\\n{'-' * 100}\\n\".join([f\"Document {i+1}:\\n\\n\" + d.page_content for i, d in enumerate(docs)]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6fa3d916",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using a vanilla vector store retriever\n",
|
||||
"Let's start by initializing a simple vector store retriever and storing the 2023 State of the Union speech (in chunks). We can see that given an example question our retriever returns one or two relevant docs and a few irrelevant docs. And even the relevant docs have a lot of irrelevant information in them."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "9fbcc58f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Document 1:\n",
|
||||
"\n",
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 2:\n",
|
||||
"\n",
|
||||
"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
|
||||
"\n",
|
||||
"And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n",
|
||||
"\n",
|
||||
"We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \n",
|
||||
"\n",
|
||||
"We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n",
|
||||
"\n",
|
||||
"We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n",
|
||||
"\n",
|
||||
"We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 3:\n",
|
||||
"\n",
|
||||
"And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \n",
|
||||
"\n",
|
||||
"As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n",
|
||||
"\n",
|
||||
"While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \n",
|
||||
"\n",
|
||||
"And soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \n",
|
||||
"\n",
|
||||
"So tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \n",
|
||||
"\n",
|
||||
"First, beat the opioid epidemic.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 4:\n",
|
||||
"\n",
|
||||
"Tonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers. \n",
|
||||
"\n",
|
||||
"And as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \n",
|
||||
"\n",
|
||||
"That ends on my watch. \n",
|
||||
"\n",
|
||||
"Medicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \n",
|
||||
"\n",
|
||||
"We’ll also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \n",
|
||||
"\n",
|
||||
"Let’s pass the Paycheck Fairness Act and paid leave. \n",
|
||||
"\n",
|
||||
"Raise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \n",
|
||||
"\n",
|
||||
"Let’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"from langchain.vectorstores import FAISS\n",
|
||||
"\n",
|
||||
"documents = TextLoader('../../../state_of_the_union.txt').load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_documents(documents)\n",
|
||||
"retriever = FAISS.from_documents(texts, OpenAIEmbeddings()).as_retriever()\n",
|
||||
"\n",
|
||||
"docs = retriever.get_relevant_documents(\"What did the president say about Ketanji Brown Jackson\")\n",
|
||||
"pretty_print_docs(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b7648612",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Adding contextual compression with an `LLMChainExtractor`\n",
|
||||
"Now let's wrap our base retriever with a `ContextualCompressionRetriever`. We'll add an `LLMChainExtractor`, which will iterate over the initially returned documents and extract from each only the content that is relevant to the query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "9a658023",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Document 1:\n",
|
||||
"\n",
|
||||
"\"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\"\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 2:\n",
|
||||
"\n",
|
||||
"\"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.retrievers import ContextualCompressionRetriever\n",
|
||||
"from langchain.retrievers.document_compressors import LLMChainExtractor\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"compressor = LLMChainExtractor.from_llm(llm)\n",
|
||||
"compression_retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)\n",
|
||||
"\n",
|
||||
"compressed_docs = compression_retriever.get_relevant_documents(\"What did the president say about Ketanji Jackson Brown\")\n",
|
||||
"pretty_print_docs(compressed_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2cd38f3a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## More built-in compressors: filters\n",
|
||||
"### `LLMChainFilter`\n",
|
||||
"The `LLMChainFilter` is slightly simpler but more robust compressor that uses an LLM chain to decide which of the initially retrieved documents to filter out and which ones to return, without manipulating the document contents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "b216a767",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Document 1:\n",
|
||||
"\n",
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.retrievers.document_compressors import LLMChainFilter\n",
|
||||
"\n",
|
||||
"_filter = LLMChainFilter.from_llm(llm)\n",
|
||||
"compression_retriever = ContextualCompressionRetriever(base_compressor=_filter, base_retriever=retriever)\n",
|
||||
"\n",
|
||||
"compressed_docs = compression_retriever.get_relevant_documents(\"What did the president say about Ketanji Jackson Brown\")\n",
|
||||
"pretty_print_docs(compressed_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8c709598",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### `EmbeddingsFilter`\n",
|
||||
"\n",
|
||||
"Making an extra LLM call over each retrieved document is expensive and slow. The `EmbeddingsFilter` provides a cheaper and faster option by embedding the documents and query and only returning those documents which have sufficiently similar embeddings to the query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "6fbc801f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Document 1:\n",
|
||||
"\n",
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 2:\n",
|
||||
"\n",
|
||||
"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
|
||||
"\n",
|
||||
"And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n",
|
||||
"\n",
|
||||
"We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \n",
|
||||
"\n",
|
||||
"We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n",
|
||||
"\n",
|
||||
"We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n",
|
||||
"\n",
|
||||
"We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 3:\n",
|
||||
"\n",
|
||||
"And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \n",
|
||||
"\n",
|
||||
"As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n",
|
||||
"\n",
|
||||
"While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \n",
|
||||
"\n",
|
||||
"And soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \n",
|
||||
"\n",
|
||||
"So tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \n",
|
||||
"\n",
|
||||
"First, beat the opioid epidemic.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.retrievers.document_compressors import EmbeddingsFilter\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)\n",
|
||||
"compression_retriever = ContextualCompressionRetriever(base_compressor=embeddings_filter, base_retriever=retriever)\n",
|
||||
"\n",
|
||||
"compressed_docs = compression_retriever.get_relevant_documents(\"What did the president say about Ketanji Jackson Brown\")\n",
|
||||
"pretty_print_docs(compressed_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "07365d36",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Stringing compressors and document transformers together\n",
|
||||
"Using the `DocumentCompressorPipeline` we can also easily combine multiple compressors in sequence. Along with compressors we can add `BaseDocumentTransformer`s to our pipeline, which don't perform any contextual compression but simply perform some transformation on a set of documents. For example `TextSplitter`s can be used as document transformers to split documents into smaller pieces, and the `EmbeddingsRedundantFilter` can be used to filter out redundant documents based on embedding similarity between documents.\n",
|
||||
"\n",
|
||||
"Below we create a compressor pipeline by first splitting our docs into smaller chunks, then removing redundant documents, and then filtering based on relevance to the query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "2a150a63",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_transformers import EmbeddingsRedundantFilter\n",
|
||||
"from langchain.retrievers.document_compressors import DocumentCompressorPipeline\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0, separator=\". \")\n",
|
||||
"redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)\n",
|
||||
"relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)\n",
|
||||
"pipeline_compressor = DocumentCompressorPipeline(\n",
|
||||
" transformers=[splitter, redundant_filter, relevant_filter]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "3ceab64a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Document 1:\n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 2:\n",
|
||||
"\n",
|
||||
"As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n",
|
||||
"\n",
|
||||
"While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 3:\n",
|
||||
"\n",
|
||||
"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"compression_retriever = ContextualCompressionRetriever(base_compressor=pipeline_compressor, base_retriever=retriever)\n",
|
||||
"\n",
|
||||
"compressed_docs = compression_retriever.get_relevant_documents(\"What did the president say about Ketanji Jackson Brown\")\n",
|
||||
"pretty_print_docs(compressed_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8cfd9fc5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
162
docs/modules/indexes/vectorstores/examples/analyticdb.ipynb
Normal file
162
docs/modules/indexes/vectorstores/examples/analyticdb.ipynb
Normal file
@@ -0,0 +1,162 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AnalyticDB\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the AnalyticDB vector database.\n",
|
||||
"To run, you should have an [AnalyticDB](https://www.alibabacloud.com/help/en/analyticdb-for-postgresql/latest/product-introduction-overview) instance up and running:\n",
|
||||
"- Using [AnalyticDB Cloud Vector Database](https://www.alibabacloud.com/product/hybriddb-postgresql). Click here to fast deploy it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import AnalyticDB"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Split documents and get embeddings by call OpenAI API"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Connect to AnalyticDB by setting related ENVIRONMENTS.\n",
|
||||
"```\n",
|
||||
"export PG_HOST={your_analyticdb_hostname}\n",
|
||||
"export PG_PORT={your_analyticdb_port} # Optional, default is 5432\n",
|
||||
"export PG_DATABASE={your_database} # Optional, default is postgres\n",
|
||||
"export PG_USER={database_username}\n",
|
||||
"export PG_PASSWORD={database_password}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Then store your embeddings and documents into AnalyticDB"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"connection_string = AnalyticDB.connection_string_from_db_params(\n",
|
||||
" driver=os.environ.get(\"PG_DRIVER\", \"psycopg2cffi\"),\n",
|
||||
" host=os.environ.get(\"PG_HOST\", \"localhost\"),\n",
|
||||
" port=int(os.environ.get(\"PG_PORT\", \"5432\")),\n",
|
||||
" database=os.environ.get(\"PG_DATABASE\", \"postgres\"),\n",
|
||||
" user=os.environ.get(\"PG_USER\", \"postgres\"),\n",
|
||||
" password=os.environ.get(\"PG_PASSWORD\", \"postgres\"),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"vector_db = AnalyticDB.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" connection_string= connection_string,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Query and retrieve data"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = vector_db.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
@@ -22,7 +22,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 45,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -33,7 +33,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -46,7 +46,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 47,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -60,16 +60,24 @@
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Creates a dataset locally at `./deeplake/`, then runs similiarity search "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 49,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"mem://langchain loaded successfully.\n"
|
||||
"./my_deeplake/ loaded successfully.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -83,7 +91,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='mem://langchain', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"Dataset(path='./my_deeplake/', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
@@ -95,15 +103,17 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db = DeepLake.from_documents(docs, embeddings)\n",
|
||||
"\n",
|
||||
"db = DeepLake(dataset_path=\"./my_deeplake/\", embedding_function=embeddings, overwrite=True)\n",
|
||||
"db.add_documents(docs)\n",
|
||||
"# or shorter\n",
|
||||
"# db = DeepLake.from_documents(docs, dataset_path=\"./my_deeplake/\", embedding=embeddings, overwrite=True)\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = db.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 50,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -124,6 +134,62 @@
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Later, you can reload the dataset without recomputing embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"./my_deeplake/ loaded successfully.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Deep Lake Dataset in ./my_deeplake/ already exists, loading from the storage\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='./my_deeplake/', read_only=True, tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (4, 1536) float32 None \n",
|
||||
" ids text (4, 1) str None \n",
|
||||
" metadata json (4, 1) str None \n",
|
||||
" text text (4, 1) str None \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db = DeepLake(dataset_path=\"./my_deeplake/\", embedding_function=embeddings, read_only=True)\n",
|
||||
"docs = db.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Deep Lake, for now, is single writer and multiple reader. Setting `read_only=True` helps to avoid acquring the writer lock."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
@@ -134,14 +200,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/media/sdb/davit/.local/lib/python3.10/site-packages/langchain/llms/openai.py:624: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI`\n",
|
||||
"/media/sdb/davit/Git/experiments/langchain/langchain/llms/openai.py:672: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI`\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
@@ -155,16 +221,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 53,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The president nominated Circuit Court of Appeals Judge Ketanji Brown Jackson for the United States Supreme Court and praised her qualifications and broad support from both Democrats and Republicans.'"
|
||||
"\"The president nominated Ketanji Brown Jackson to serve on the United States Supreme Court, describing her as one of the nation's top legal minds and a consensus builder with a background in private practice and public defense, and noting that she has received broad support from both Democrats and Republicans.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 53,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -184,14 +250,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 54,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"mem://langchain loaded successfully.\n"
|
||||
"./my_deeplake/ loaded successfully.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -205,14 +271,14 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='mem://langchain', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"Dataset(path='./my_deeplake/', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (42, 1536) float32 None \n",
|
||||
" ids text (42, 1) str None \n",
|
||||
" metadata json (42, 1) str None \n",
|
||||
" text text (42, 1) str None \n"
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (4, 1536) float32 None \n",
|
||||
" ids text (4, 1) str None \n",
|
||||
" metadata json (4, 1) str None \n",
|
||||
" text text (4, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -227,31 +293,29 @@
|
||||
"for d in docs:\n",
|
||||
" d.metadata['year'] = random.randint(2012, 2014)\n",
|
||||
"\n",
|
||||
"db = DeepLake.from_documents(docs, embeddings)"
|
||||
"db = DeepLake.from_documents(docs, embeddings, dataset_path=\"./my_deeplake/\", overwrite=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████| 42/42 [00:00<00:00, 3456.17it/s]\n"
|
||||
"100%|██████████| 4/4 [00:00<00:00, 1080.24it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='Vice President Harris and I ran for office with a new economic vision for America. \\n\\nInvest in America. Educate Americans. Grow the workforce. Build the economy from the bottom up \\nand the middle out, not from the top down. \\n\\nBecause we know that when the middle class grows, the poor have a ladder up and the wealthy do very well. \\n\\nAmerica used to have the best roads, bridges, and airports on Earth. \\n\\nNow our infrastructure is ranked 13th in the world. \\n\\nWe won’t be able to compete for the jobs of the 21st Century if we don’t fix that. \\n\\nThat’s why it was so important to pass the Bipartisan Infrastructure Law—the most sweeping investment to rebuild America in history. \\n\\nThis was a bipartisan effort, and I want to thank the members of both parties who worked to make it happen. \\n\\nWe’re done talking about infrastructure weeks. \\n\\nWe’re going to have an infrastructure decade.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='It is going to transform America and put us on a path to win the economic competition of the 21st Century that we face with the rest of the world—particularly with China. \\n\\nAs I’ve told Xi Jinping, it is never a good bet to bet against the American people. \\n\\nWe’ll create good jobs for millions of Americans, modernizing roads, airports, ports, and waterways all across America. \\n\\nAnd we’ll do it all to withstand the devastating effects of the climate crisis and promote environmental justice. \\n\\nWe’ll build a national network of 500,000 electric vehicle charging stations, begin to replace poisonous lead pipes—so every child—and every American—has clean water to drink at home and at school, provide affordable high-speed internet for every American—urban, suburban, rural, and tribal communities. \\n\\n4,000 projects have already been announced. \\n\\nAnd tonight, I’m announcing that this year we will start fixing over 65,000 miles of highway and 1,500 bridges in disrepair.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013})]"
|
||||
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -271,19 +335,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 56,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
|
||||
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
|
||||
" Document(page_content='And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='Tonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWe’ll also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLet’s pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLet’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2014})]"
|
||||
" Document(page_content='Tonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWe’ll also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLet’s pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLet’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 56,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -303,19 +367,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 57,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
|
||||
" Document(page_content='One was stationed at bases and breathing in toxic smoke from “burn pits” that incinerated wastes of war—medical and hazard material, jet fuel, and more. \\n\\nWhen they came home, many of the world’s fittest and best trained warriors were never the same. \\n\\nHeadaches. Numbness. Dizziness. \\n\\nA cancer that would put them in a flag-draped coffin. \\n\\nI know. \\n\\nOne of those soldiers was my son Major Beau Biden. \\n\\nWe don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. \\n\\nBut I’m committed to finding out everything we can. \\n\\nCommitted to military families like Danielle Robinson from Ohio. \\n\\nThe widow of Sergeant First Class Heath Robinson. \\n\\nHe was born a soldier. Army National Guard. Combat medic in Kosovo and Iraq. \\n\\nStationed near Baghdad, just yards from burn pits the size of football fields. \\n\\nHeath’s widow Danielle is here with us tonight. They loved going to Ohio State football games. He loved building Legos with their daughter.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2014}),\n",
|
||||
" Document(page_content='As Ohio Senator Sherrod Brown says, “It’s time to bury the label “Rust Belt.” \\n\\nIt’s time. \\n\\nBut with all the bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up with the bills. \\n\\nInflation is robbing them of the gains they might otherwise feel. \\n\\nI get it. That’s why my top priority is getting prices under control. \\n\\nLook, our economy roared back faster than most predicted, but the pandemic meant that businesses had a hard time hiring enough workers to keep up production in their factories. \\n\\nThe pandemic also disrupted global supply chains. \\n\\nWhen factories close, it takes longer to make goods and get them from the warehouse to the store, and prices go up. \\n\\nLook at cars. \\n\\nLast year, there weren’t enough semiconductors to make all the cars that people wanted to buy. \\n\\nAnd guess what, prices of automobiles went up. \\n\\nSo—we have a choice. \\n\\nOne way to fight inflation is to drive down wages and make Americans poorer.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
|
||||
" Document(page_content='We can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \\n\\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \\n\\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \\n\\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \\n\\nI’ve worked on these issues a long time. \\n\\nI know what works: Investing in crime preventionand community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012})]"
|
||||
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013}),\n",
|
||||
" Document(page_content='Tonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWe’ll also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLet’s pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLet’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
|
||||
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2012}),\n",
|
||||
" Document(page_content='And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../../state_of_the_union.txt', 'year': 2013})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 57,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -324,6 +388,46 @@
|
||||
"db.max_marginal_relevance_search('What did the president say about Ketanji Brown Jackson?')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db.delete_dataset()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"and if delete fails you can also force delete"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": []
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"DeepLake.force_delete_by_path(\"./my_deeplake\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
@@ -335,7 +439,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 62,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -344,7 +448,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 63,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -352,27 +456,16 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Your Deep Lake dataset has been successfully created!\n",
|
||||
"The dataset is private so make sure you are logged in!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\\"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/linkedin\n"
|
||||
"The dataset is private so make sure you are logged in!\n",
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/langchain_test\n",
|
||||
"hub://davitbun/langchain_test loaded successfully.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Evaluating ingest: 100%|██████████| 1/1 [00:14<00:00\n",
|
||||
" \r"
|
||||
]
|
||||
},
|
||||
@@ -380,50 +473,43 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"hub://davitbun/linkedin loaded successfully.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Evaluating ingest: 100%|██████████| 1/1 [00:23<00:00\n",
|
||||
"/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://davitbun/linkedin', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"Dataset(path='hub://davitbun/langchain_test', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (42, 1536) float32 None \n",
|
||||
" ids text (42, 1) str None \n",
|
||||
" metadata json (42, 1) str None \n",
|
||||
" text text (42, 1) str None \n"
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (4, 1536) float32 None \n",
|
||||
" ids text (4, 1) str None \n",
|
||||
" metadata json (4, 1) str None \n",
|
||||
" text text (4, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['d6d6ccb4-e187-11ed-b66d-41c5f7b85421',\n",
|
||||
" 'd6d6ccb5-e187-11ed-b66d-41c5f7b85421',\n",
|
||||
" 'd6d6ccb6-e187-11ed-b66d-41c5f7b85421',\n",
|
||||
" 'd6d6ccb7-e187-11ed-b66d-41c5f7b85421']"
|
||||
]
|
||||
},
|
||||
"execution_count": 63,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Embed and store the texts\n",
|
||||
"dataset_path = f\"hub://{USERNAME}/{DATASET_NAME}\" # could be also ./local/path (much faster locally), s3://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
|
||||
"username = \"<username>\" # your username on app.activeloop.ai \n",
|
||||
"dataset_path = f\"hub://{username}/langchain_test\" # could be also ./local/path (much faster locally), s3://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
|
||||
"\n",
|
||||
"embedding = OpenAIEmbeddings()\n",
|
||||
"vectordb = DeepLake.from_documents(documents=docs, embedding=embedding, dataset_path=dataset_path)"
|
||||
"db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings, overwrite=True)\n",
|
||||
"db.add_documents(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 64,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -446,37 +532,263 @@
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating dataset on AWS S3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 82,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://davitbun/linkedin', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"s3://hub-2.0-datasets-n/langchain_test loaded successfully.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Evaluating ingest: 100%|██████████| 1/1 [00:10<00:00\n",
|
||||
"\\"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='s3://hub-2.0-datasets-n/langchain_test', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (42, 1536) float32 None \n",
|
||||
" ids text (42, 1) str None \n",
|
||||
" metadata json (42, 1) str None \n",
|
||||
" text text (42, 1) str None \n"
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (4, 1536) float32 None \n",
|
||||
" ids text (4, 1) str None \n",
|
||||
" metadata json (4, 1) str None \n",
|
||||
" text text (4, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vectordb.ds.summary()"
|
||||
"dataset_path = f\"s3://BUCKET/langchain_test\" # could be also ./local/path (much faster locally), hub://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
|
||||
"\n",
|
||||
"embedding = OpenAIEmbeddings()\n",
|
||||
"db = DeepLake.from_documents(docs, dataset_path=dataset_path, embedding=embeddings, overwrite=True, creds = {\n",
|
||||
" 'aws_access_key_id': os.environ['AWS_ACCESS_KEY_ID'], \n",
|
||||
" 'aws_secret_access_key': os.environ['AWS_SECRET_ACCESS_KEY'], \n",
|
||||
" 'aws_session_token': os.environ['AWS_SESSION_TOKEN'], # Optional\n",
|
||||
"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deep Lake API\n",
|
||||
"you can access the Deep Lake dataset at `db.ds`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 66,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://davitbun/langchain_test', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (4, 1536) float32 None \n",
|
||||
" ids text (4, 1) str None \n",
|
||||
" metadata json (4, 1) str None \n",
|
||||
" text text (4, 1) str None \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# get structure of the dataset\n",
|
||||
"db.ds.summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = vectordb.ds.embedding.numpy()"
|
||||
"# get embeddings numpy array\n",
|
||||
"embeds = db.ds.embedding.numpy()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transfer local dataset to cloud\n",
|
||||
"Copy already created dataset to the cloud. You can also transfer from cloud to local."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Copying dataset: 100%|██████████| 56/56 [00:38<00:00\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/langchain_test_copy\n",
|
||||
"Your Deep Lake dataset has been successfully created!\n",
|
||||
"The dataset is private so make sure you are logged in!\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])"
|
||||
]
|
||||
},
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import deeplake\n",
|
||||
"username = \"davitbun\" # your username on app.activeloop.ai \n",
|
||||
"source = f\"hub://{username}/langchain_test\" # could be local, s3, gcs, etc.\n",
|
||||
"destination = f\"hub://{username}/langchain_test_copy\" # could be local, s3, gcs, etc.\n",
|
||||
"\n",
|
||||
"deeplake.deepcopy(src=source, dest=destination, overwrite=True)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/langchain_test_copy\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"hub://davitbun/langchain_test_copy loaded successfully.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Deep Lake Dataset in hub://davitbun/langchain_test_copy already exists, loading from the storage\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (4, 1536) float32 None \n",
|
||||
" ids text (4, 1) str None \n",
|
||||
" metadata json (4, 1) str None \n",
|
||||
" text text (4, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Evaluating ingest: 100%|██████████| 1/1 [00:31<00:00\n",
|
||||
"-"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (8, 1536) float32 None \n",
|
||||
" ids text (8, 1) str None \n",
|
||||
" metadata json (8, 1) str None \n",
|
||||
" text text (8, 1) str None \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \r"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['ad42f3fe-e188-11ed-b66d-41c5f7b85421',\n",
|
||||
" 'ad42f3ff-e188-11ed-b66d-41c5f7b85421',\n",
|
||||
" 'ad42f400-e188-11ed-b66d-41c5f7b85421',\n",
|
||||
" 'ad42f401-e188-11ed-b66d-41c5f7b85421']"
|
||||
]
|
||||
},
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db = DeepLake(dataset_path=destination, embedding_function=embeddings)\n",
|
||||
"db.add_documents(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -46,7 +46,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = ElasticVectorSearch.from_documents(docs, embeddings, elasticsearch_url=\"http://localhost:9200\"\n",
|
||||
"db = ElasticVectorSearch.from_documents(docs, embeddings, elasticsearch_url=\"http://localhost:9200\")\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = db.similarity_search(query)"
|
||||
|
||||
267
docs/modules/indexes/vectorstores/examples/myscale.ipynb
Normal file
267
docs/modules/indexes/vectorstores/examples/myscale.ipynb
Normal file
@@ -0,0 +1,267 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MyScale\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the MyScale vector database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "aac9563e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import MyScale\n",
|
||||
"from langchain.document_loaders import TextLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "a9d16fa3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting up envrionments\n",
|
||||
"\n",
|
||||
"There are two ways to set up parameters for myscale index.\n",
|
||||
"\n",
|
||||
"1. Environment Variables\n",
|
||||
"\n",
|
||||
" Before you run the app, please set the environment variable with `export`:\n",
|
||||
" `export MYSCALE_URL='<your-endpoints-url>' MYSCALE_PORT=<your-endpoints-port> MYSCALE_USERNAME=<your-username> MYSCALE_PASSWORD=<your-password> ...`\n",
|
||||
"\n",
|
||||
" You can easily find your account, password and other info on our SaaS. For details please refer to [this document](https://docs.myscale.com/en/cluster-management/)\n",
|
||||
"\n",
|
||||
" Every attributes under `MyScaleSettings` can be set with prefix `MYSCALE_` and is case insensitive.\n",
|
||||
"\n",
|
||||
"2. Create `MyScaleSettings` object with parameters\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" ```python\n",
|
||||
" from langchain.vectorstores import MyScale, MyScaleSettings\n",
|
||||
" config = MyScaleSetting(host=\"<your-backend-url>\", port=8443, ...)\n",
|
||||
" index = MyScale(embedding_function, config)\n",
|
||||
" index.add_documents(...)\n",
|
||||
" ```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a3c3999a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "6e104aee",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Inserting data...: 100%|██████████| 42/42 [00:18<00:00, 2.21it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for d in docs:\n",
|
||||
" d.metadata = {'some': 'metadata'}\n",
|
||||
"docsearch = MyScale.from_documents(docs, embeddings)\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9c608226",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"As Frances Haugen, who is here with us tonight, has shown, we must hold social media platforms accountable for the national experiment they’re conducting on our children for profit. \n",
|
||||
"\n",
|
||||
"It’s time to strengthen privacy protections, ban targeted advertising to children, demand tech companies stop collecting personal data on our children. \n",
|
||||
"\n",
|
||||
"And let’s get all Americans the mental health services they need. More people they can turn to for help, and full parity between physical and mental health care. \n",
|
||||
"\n",
|
||||
"Third, support our veterans. \n",
|
||||
"\n",
|
||||
"Veterans are the best of us. \n",
|
||||
"\n",
|
||||
"I’ve always believed that we have a sacred obligation to equip all those we send to war and care for them and their families when they come home. \n",
|
||||
"\n",
|
||||
"My administration is providing assistance with job training and housing, and now helping lower-income veterans get VA care debt-free. \n",
|
||||
"\n",
|
||||
"Our troops in Iraq and Afghanistan faced many dangers.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "e3a8b105",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get connection info and data schema"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "69996818",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(str(docsearch))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "f59360c0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Filtering\n",
|
||||
"\n",
|
||||
"You can have direct access to myscale SQL where statement. You can write `WHERE` clause following standard SQL.\n",
|
||||
"\n",
|
||||
"**NOTE**: Please be aware of SQL injection, this interface must not be directly called by end-user.\n",
|
||||
"\n",
|
||||
"If you custimized your `column_map` under your setting, you search with filter like this:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "232055f6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Inserting data...: 100%|██████████| 42/42 [00:15<00:00, 2.69it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.vectorstores import MyScale, MyScaleSettings\n",
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"\n",
|
||||
"for i, d in enumerate(docs):\n",
|
||||
" d.metadata = {'doc_id': i}\n",
|
||||
"\n",
|
||||
"docsearch = MyScale.from_documents(docs, embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "ddbcee77",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.252379834651947 {'doc_id': 6, 'some': ''} And I’m taking robus...\n",
|
||||
"0.25022566318511963 {'doc_id': 1, 'some': ''} Groups of citizens b...\n",
|
||||
"0.2469480037689209 {'doc_id': 8, 'some': ''} And so many families...\n",
|
||||
"0.2428302764892578 {'doc_id': 0, 'some': 'metadata'} As Frances Haugen, w...\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"meta = docsearch.metadata_column\n",
|
||||
"output = docsearch.similarity_search_with_relevance_scores('What did the president say about Ketanji Brown Jackson?', \n",
|
||||
" k=4, where_str=f\"{meta}.doc_id<10\")\n",
|
||||
"for d, dist in output:\n",
|
||||
" print(dist, d.metadata, d.page_content[:20] + '...')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "a359ed74",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deleting your data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fb6a9d36",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docsearch.drop()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "48dbd8e0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
399
docs/modules/indexes/vectorstores/examples/supabase.ipynb
Normal file
399
docs/modules/indexes/vectorstores/examples/supabase.ipynb
Normal file
@@ -0,0 +1,399 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SupabaseVectorStore\n",
|
||||
"\n",
|
||||
"This notebook shows how to use Supabase and `pgvector` as your VectorStore.\n",
|
||||
"\n",
|
||||
"To run this notebook, please ensure:\n",
|
||||
"\n",
|
||||
"- the `pgvector` extension is enabled\n",
|
||||
"- you have installed the `supabase-py` package\n",
|
||||
"- that you have created a `match_documents` function in your database\n",
|
||||
"- that you have a `documents` table in your `public` schema similar to the one below.\n",
|
||||
"\n",
|
||||
"The following function determines cosine similarity, but you can adjust to your needs.\n",
|
||||
"\n",
|
||||
"```sql\n",
|
||||
" -- Enable the pgvector extension to work with embedding vectors\n",
|
||||
" create extension vector;\n",
|
||||
"\n",
|
||||
" -- Create a table to store your documents\n",
|
||||
" create table documents (\n",
|
||||
" id bigserial primary key,\n",
|
||||
" content text, -- corresponds to Document.pageContent\n",
|
||||
" metadata jsonb, -- corresponds to Document.metadata\n",
|
||||
" embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed\n",
|
||||
" );\n",
|
||||
"\n",
|
||||
" CREATE FUNCTION match_documents(query_embedding vector(1536), match_count int)\n",
|
||||
" RETURNS TABLE(\n",
|
||||
" id bigint,\n",
|
||||
" content text,\n",
|
||||
" metadata jsonb,\n",
|
||||
" -- we return matched vectors to enable maximal marginal relevance searches\n",
|
||||
" embedding vector(1536),\n",
|
||||
" similarity float)\n",
|
||||
" LANGUAGE plpgsql\n",
|
||||
" AS $$\n",
|
||||
" # variable_conflict use_column\n",
|
||||
" BEGIN\n",
|
||||
" RETURN query\n",
|
||||
" SELECT\n",
|
||||
" id,\n",
|
||||
" content,\n",
|
||||
" metadata,\n",
|
||||
" embedding,\n",
|
||||
" 1 -(documents.embedding <=> query_embedding) AS similarity\n",
|
||||
" FROM\n",
|
||||
" documents\n",
|
||||
" ORDER BY\n",
|
||||
" documents.embedding <=> query_embedding\n",
|
||||
" LIMIT match_count;\n",
|
||||
" END;\n",
|
||||
" $$;\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "6bd4498b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# with pip\n",
|
||||
"# !pip install supabase\n",
|
||||
"\n",
|
||||
"# with conda\n",
|
||||
"# !conda install -c conda-forge supabase"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "90afc6df",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# If you're storing your Supabase and OpenAI API keys in a .env file, you can load them with dotenv\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"\n",
|
||||
"load_dotenv()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "5ce44f7c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from supabase.client import Client, create_client\n",
|
||||
"\n",
|
||||
"supabase_url = os.environ.get(\"SUPABASE_URL\")\n",
|
||||
"supabase_key = os.environ.get(\"SUPABASE_SERVICE_KEY\")\n",
|
||||
"supabase: Client = create_client(supabase_url, supabase_key)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "aac9563e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2023-04-19 20:12:28,593:INFO - NumExpr defaulting to 8 threads.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import SupabaseVectorStore\n",
|
||||
"from langchain.document_loaders import TextLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a3c3999a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "efec97f8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# We're using the default `documents` table here. You can modify this by passing in a `table_name` argument to the `from_documents` method.\n",
|
||||
"vector_store = SupabaseVectorStore.from_documents(\n",
|
||||
" docs, embeddings, client=supabase\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "5eabdb75",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"matched_docs = vector_store.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "4b172de8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(matched_docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "18152965",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Similarity search with score\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "72aaa9c8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"matched_docs = vector_store.similarity_search_with_relevance_scores(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "d88e958e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'}),\n",
|
||||
" 0.802509746274066)"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"matched_docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "794a7552",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Retriever options\n",
|
||||
"\n",
|
||||
"This section goes over different options for how to use SupabaseVectorStore as a retriever.\n",
|
||||
"\n",
|
||||
"### Maximal Marginal Relevance Searches\n",
|
||||
"\n",
|
||||
"In addition to using similarity search in the retriever object, you can also use `mmr`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "96ff911a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = vector_store.as_retriever(search_type=\"mmr\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "f00be6d0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"matched_docs = retriever.get_relevant_documents(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "a559c3f1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"## Document 0\n",
|
||||
"\n",
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
|
||||
"\n",
|
||||
"## Document 1\n",
|
||||
"\n",
|
||||
"One was stationed at bases and breathing in toxic smoke from “burn pits” that incinerated wastes of war—medical and hazard material, jet fuel, and more. \n",
|
||||
"\n",
|
||||
"When they came home, many of the world’s fittest and best trained warriors were never the same. \n",
|
||||
"\n",
|
||||
"Headaches. Numbness. Dizziness. \n",
|
||||
"\n",
|
||||
"A cancer that would put them in a flag-draped coffin. \n",
|
||||
"\n",
|
||||
"I know. \n",
|
||||
"\n",
|
||||
"One of those soldiers was my son Major Beau Biden. \n",
|
||||
"\n",
|
||||
"We don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. \n",
|
||||
"\n",
|
||||
"But I’m committed to finding out everything we can. \n",
|
||||
"\n",
|
||||
"Committed to military families like Danielle Robinson from Ohio. \n",
|
||||
"\n",
|
||||
"The widow of Sergeant First Class Heath Robinson. \n",
|
||||
"\n",
|
||||
"He was born a soldier. Army National Guard. Combat medic in Kosovo and Iraq. \n",
|
||||
"\n",
|
||||
"Stationed near Baghdad, just yards from burn pits the size of football fields. \n",
|
||||
"\n",
|
||||
"Heath’s widow Danielle is here with us tonight. They loved going to Ohio State football games. He loved building Legos with their daughter.\n",
|
||||
"\n",
|
||||
"## Document 2\n",
|
||||
"\n",
|
||||
"And I’m taking robust action to make sure the pain of our sanctions is targeted at Russia’s economy. And I will use every tool at our disposal to protect American businesses and consumers. \n",
|
||||
"\n",
|
||||
"Tonight, I can announce that the United States has worked with 30 other countries to release 60 Million barrels of oil from reserves around the world. \n",
|
||||
"\n",
|
||||
"America will lead that effort, releasing 30 Million barrels from our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, unified with our allies. \n",
|
||||
"\n",
|
||||
"These steps will help blunt gas prices here at home. And I know the news about what’s happening can seem alarming. \n",
|
||||
"\n",
|
||||
"But I want you to know that we are going to be okay. \n",
|
||||
"\n",
|
||||
"When the history of this era is written Putin’s war on Ukraine will have left Russia weaker and the rest of the world stronger. \n",
|
||||
"\n",
|
||||
"While it shouldn’t have taken something so terrible for people around the world to see what’s at stake now everyone sees it clearly.\n",
|
||||
"\n",
|
||||
"## Document 3\n",
|
||||
"\n",
|
||||
"We can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \n",
|
||||
"\n",
|
||||
"I recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \n",
|
||||
"\n",
|
||||
"They were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \n",
|
||||
"\n",
|
||||
"Officer Mora was 27 years old. \n",
|
||||
"\n",
|
||||
"Officer Rivera was 22. \n",
|
||||
"\n",
|
||||
"Both Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \n",
|
||||
"\n",
|
||||
"I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \n",
|
||||
"\n",
|
||||
"I’ve worked on these issues a long time. \n",
|
||||
"\n",
|
||||
"I know what works: Investing in crime preventionand community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i, d in enumerate(matched_docs):\n",
|
||||
" print(f\"\\n## Document {i}\\n\")\n",
|
||||
" print(d.page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "79b1198e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -77,7 +77,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = Anthropic(, anthropic_api_key=ANTHROPIC_API_KEY)"
|
||||
"llm = Anthropic(anthropic_api_key=ANTHROPIC_API_KEY)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install huggingface_hub"
|
||||
"!pip install huggingface_hub > /dev/null"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -55,41 +55,195 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = HUGGINGFACEHUB_API_TOKEN"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "84dd44c1-c428-41f3-a911-520281386c94",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Select a Model**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": null,
|
||||
"id": "39c7eeac-01c4-486b-9480-e828a9e73e78",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import HuggingFaceHub\n",
|
||||
"\n",
|
||||
"repo_id = \"google/flan-t5-xl\" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options\n",
|
||||
"\n",
|
||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\":0, \"max_length\":64})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3acf0069",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The FIFA World Cup is a football tournament that is played every 4 years. The year 1994 was the 44th FIFA World Cup. The final answer: Brazil.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate, HuggingFaceHub, LLMChain\n",
|
||||
"from langchain import PromptTemplate, LLMChain\n",
|
||||
"\n",
|
||||
"template = \"\"\"Question: {question}\n",
|
||||
"\n",
|
||||
"Answer: Let's think step by step.\"\"\"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=HuggingFaceHub(repo_id=\"google/flan-t5-xl\", model_kwargs={\"temperature\":0, \"max_length\":64}))\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"\n",
|
||||
"question = \"Who won the FIFA World Cup in the year 1994? \"\n",
|
||||
"\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ddaa06cf-95ec-48ce-b0ab-d892a7909693",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Examples\n",
|
||||
"\n",
|
||||
"Below are some examples of models you can access through the Hugging Face Hub integration."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4fa9337e-ccb5-4c52-9b7c-1653148bc256",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### StableLM, by Stability AI\n",
|
||||
"\n",
|
||||
"See [Stability AI's](https://huggingface.co/stabilityai) organization page for a list of available models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "843a3837",
|
||||
"id": "36a1ce01-bd46-451f-8ee6-61c8f4bd665a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"repo_id = \"stabilityai/stablelm-tuned-alpha-3b\"\n",
|
||||
"# Others include stabilityai/stablelm-base-alpha-3b\n",
|
||||
"# as well as 7B parameter versions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b5654cea-60b0-4f40-ab34-06ba1eca810d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\":0, \"max_length\":64})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2f19d0dc-c987-433f-a8d6-b1214e8ee067",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Reuse the prompt and question from above.\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1a5c97af-89bc-4e59-95c1-223742a9160b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Dolly, by DataBricks\n",
|
||||
"\n",
|
||||
"See [DataBricks](https://huggingface.co/databricks) organization page for a list of available models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "521fcd2b-8e38-4920-b407-5c7d330411c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import HuggingFaceHub\n",
|
||||
"\n",
|
||||
"repo_id = \"databricks/dolly-v2-3b\"\n",
|
||||
"\n",
|
||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\":0, \"max_length\":64})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9907ec3a-fe0c-4543-81c4-d42f9453f16c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Reuse the prompt and question from above.\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "03f6ae52-b5f9-4de6-832c-551cb3fa11ae",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Camel, by Writer\n",
|
||||
"\n",
|
||||
"See [Writer's](https://huggingface.co/Writer) organization page for a list of available models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "257a091d-750b-4910-ac08-fe1c7b3fd98b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import HuggingFaceHub\n",
|
||||
"\n",
|
||||
"repo_id = \"Writer/camel-5b-hf\" # See https://huggingface.co/Writer for other options\n",
|
||||
"llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={\"temperature\":0, \"max_length\":64})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b06f6838-a11a-4d6a-88e3-91fa1747a2b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Reuse the prompt and question from above.\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2bf838eb-1083-402f-b099-b07c452418c8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**And many more!**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "18c78880-65d7-41d0-9722-18090efb60e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -111,7 +265,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -0,0 +1,145 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "959300d4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Hugging Face Local Pipelines\n",
|
||||
"\n",
|
||||
"Hugging Face models can be run locally through the `HuggingFacePipeline` class.\n",
|
||||
"\n",
|
||||
"The [Hugging Face Model Hub](https://huggingface.co/models) hosts over 120k models, 20k datasets, and 50k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.\n",
|
||||
"\n",
|
||||
"These can be called from LangChain either through this local pipeline wrapper or by calling their hosted inference endpoints through the HuggingFaceHub class. For more information on the hosted pipelines, see the [HugigngFaceHub](huggingface_hub.ipynb) notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4c1b8450-5eaf-4d34-8341-2d785448a1ff",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"To use, you should have the ``transformers`` python [package installed](https://pypi.org/project/transformers/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "d772b637-de00-4663-bd77-9bc96d798db2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install transformers > /dev/null"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "91ad075f-71d5-4bc8-ab91-cc0ad5ef16bb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load the model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "165ae236-962a-4763-8052-c4836d78a5d2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to default session, using empty session: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /sessions (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x1117f9790>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import HuggingFacePipeline\n",
|
||||
"\n",
|
||||
"llm = HuggingFacePipeline.from_model_id(model_id=\"bigscience/bloom-1b7\", task=\"text-generation\", model_kwargs={\"temperature\":0, \"max_length\":64})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "00104b27-0c15-4a97-b198-4512337ee211",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Integrate the model in an LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "3acf0069",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/wfh/code/lc/lckg/.venv/lib/python3.11/site-packages/transformers/generation/utils.py:1288: UserWarning: Using `max_length`'s default (64) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n",
|
||||
" warnings.warn(\n",
|
||||
"WARNING:root:Failed to persist run: HTTPConnectionPool(host='localhost', port=8000): Max retries exceeded with url: /chain-runs (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x144d06910>: Failed to establish a new connection: [Errno 61] Connection refused'))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" First, we need to understand what is an electroencephalogram. An electroencephalogram is a recording of brain activity. It is a recording of brain activity that is made by placing electrodes on the scalp. The electrodes are placed\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate, LLMChain\n",
|
||||
"\n",
|
||||
"template = \"\"\"Question: {question}\n",
|
||||
"\n",
|
||||
"Answer: Let's think step by step.\"\"\"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])\n",
|
||||
"\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"\n",
|
||||
"question = \"What is electroencephalography?\"\n",
|
||||
"\n",
|
||||
"print(llm_chain.run(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "843a3837",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -9,7 +9,15 @@
|
||||
"\n",
|
||||
"Let's load the SageMaker Endpoints Embeddings class. The class can be used if you host, e.g. your own Hugging Face model on SageMaker.\n",
|
||||
"\n",
|
||||
"For instrucstions on how to do this, please see [here](https://www.philschmid.de/custom-inference-huggingface-sagemaker)"
|
||||
"For instructions on how to do this, please see [here](https://www.philschmid.de/custom-inference-huggingface-sagemaker). **Note**: In order to handle batched requests, you will need to adjust the return line in the `predict_fn()` function within the custom `inference.py` script:\n",
|
||||
"\n",
|
||||
"Change from\n",
|
||||
"\n",
|
||||
"`return {\"vectors\": sentence_embeddings[0].tolist()}`\n",
|
||||
"\n",
|
||||
"to:\n",
|
||||
"\n",
|
||||
"`return {\"vectors\": sentence_embeddings.tolist()}`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -29,7 +37,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Dict\n",
|
||||
"from typing import Dict, List\n",
|
||||
"from langchain.embeddings import SagemakerEndpointEmbeddings\n",
|
||||
"from langchain.llms.sagemaker_endpoint import ContentHandlerBase\n",
|
||||
"import json\n",
|
||||
@@ -39,13 +47,13 @@
|
||||
" content_type = \"application/json\"\n",
|
||||
" accepts = \"application/json\"\n",
|
||||
"\n",
|
||||
" def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes:\n",
|
||||
" input_str = json.dumps({\"inputs\": prompt, **model_kwargs})\n",
|
||||
" def transform_input(self, inputs: list[str], model_kwargs: Dict) -> bytes:\n",
|
||||
" input_str = json.dumps({\"inputs\": inputs, **model_kwargs})\n",
|
||||
" return input_str.encode('utf-8')\n",
|
||||
" \n",
|
||||
" def transform_output(self, output: bytes) -> str:\n",
|
||||
"\n",
|
||||
" def transform_output(self, output: bytes) -> List[List[float]]:\n",
|
||||
" response_json = json.loads(output.read().decode(\"utf-8\"))\n",
|
||||
" return response_json[\"embeddings\"]\n",
|
||||
" return response_json[\"vectors\"]\n",
|
||||
"\n",
|
||||
"content_handler = ContentHandler()\n",
|
||||
"\n",
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "ed47bb62",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Sentence Transformers Embeddings\n",
|
||||
"\n",
|
||||
"Let's generate embeddings using the [SentenceTransformers](https://www.sbert.net/) integration. SentenceTransformers is a python package that can generate text and image embeddings, originating from [Sentence-BERT](https://arxiv.org/abs/1908.10084)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "06c9f47d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
||||
"To disable this warning, you can either:\n",
|
||||
"\t- Avoid using `tokenizers` before the fork if possible\n",
|
||||
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip install sentence_transformers > /dev/null"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "861521a9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import SentenceTransformerEmbeddings "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "ff9be586",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = SentenceTransformerEmbeddings(model=\"all-MiniLM-L6-v2\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "d0a98ae9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text = \"This is a test document.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "5d6c682b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_result = embeddings.embed_query(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "bb5e74c0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"doc_result = embeddings.embed_documents([text, \"This is not a test document.\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aaad49f8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "7377c2ccc78bc62c2683122d48c8cd1fb85a53850a1b1fc29736ed39852c9885"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
351
docs/modules/paul_graham_essay.txt
Normal file
351
docs/modules/paul_graham_essay.txt
Normal file
@@ -0,0 +1,351 @@
|
||||
What I Worked On
|
||||
|
||||
February 2021
|
||||
|
||||
Before college the two main things I worked on, outside of school, were writing and programming. I didn't write essays. I wrote what beginning writers were supposed to write then, and probably still are: short stories. My stories were awful. They had hardly any plot, just characters with strong feelings, which I imagined made them deep.
|
||||
|
||||
The first programs I tried writing were on the IBM 1401 that our school district used for what was then called "data processing." This was in 9th grade, so I was 13 or 14. The school district's 1401 happened to be in the basement of our junior high school, and my friend Rich Draves and I got permission to use it. It was like a mini Bond villain's lair down there, with all these alien-looking machines — CPU, disk drives, printer, card reader — sitting up on a raised floor under bright fluorescent lights.
|
||||
|
||||
The language we used was an early version of Fortran. You had to type programs on punch cards, then stack them in the card reader and press a button to load the program into memory and run it. The result would ordinarily be to print something on the spectacularly loud printer.
|
||||
|
||||
I was puzzled by the 1401. I couldn't figure out what to do with it. And in retrospect there's not much I could have done with it. The only form of input to programs was data stored on punched cards, and I didn't have any data stored on punched cards. The only other option was to do things that didn't rely on any input, like calculate approximations of pi, but I didn't know enough math to do anything interesting of that type. So I'm not surprised I can't remember any programs I wrote, because they can't have done much. My clearest memory is of the moment I learned it was possible for programs not to terminate, when one of mine didn't. On a machine without time-sharing, this was a social as well as a technical error, as the data center manager's expression made clear.
|
||||
|
||||
With microcomputers, everything changed. Now you could have a computer sitting right in front of you, on a desk, that could respond to your keystrokes as it was running instead of just churning through a stack of punch cards and then stopping. [1]
|
||||
|
||||
The first of my friends to get a microcomputer built it himself. It was sold as a kit by Heathkit. I remember vividly how impressed and envious I felt watching him sitting in front of it, typing programs right into the computer.
|
||||
|
||||
Computers were expensive in those days and it took me years of nagging before I convinced my father to buy one, a TRS-80, in about 1980. The gold standard then was the Apple II, but a TRS-80 was good enough. This was when I really started programming. I wrote simple games, a program to predict how high my model rockets would fly, and a word processor that my father used to write at least one book. There was only room in memory for about 2 pages of text, so he'd write 2 pages at a time and then print them out, but it was a lot better than a typewriter.
|
||||
|
||||
Though I liked programming, I didn't plan to study it in college. In college I was going to study philosophy, which sounded much more powerful. It seemed, to my naive high school self, to be the study of the ultimate truths, compared to which the things studied in other fields would be mere domain knowledge. What I discovered when I got to college was that the other fields took up so much of the space of ideas that there wasn't much left for these supposed ultimate truths. All that seemed left for philosophy were edge cases that people in other fields felt could safely be ignored.
|
||||
|
||||
I couldn't have put this into words when I was 18. All I knew at the time was that I kept taking philosophy courses and they kept being boring. So I decided to switch to AI.
|
||||
|
||||
AI was in the air in the mid 1980s, but there were two things especially that made me want to work on it: a novel by Heinlein called The Moon is a Harsh Mistress, which featured an intelligent computer called Mike, and a PBS documentary that showed Terry Winograd using SHRDLU. I haven't tried rereading The Moon is a Harsh Mistress, so I don't know how well it has aged, but when I read it I was drawn entirely into its world. It seemed only a matter of time before we'd have Mike, and when I saw Winograd using SHRDLU, it seemed like that time would be a few years at most. All you had to do was teach SHRDLU more words.
|
||||
|
||||
There weren't any classes in AI at Cornell then, not even graduate classes, so I started trying to teach myself. Which meant learning Lisp, since in those days Lisp was regarded as the language of AI. The commonly used programming languages then were pretty primitive, and programmers' ideas correspondingly so. The default language at Cornell was a Pascal-like language called PL/I, and the situation was similar elsewhere. Learning Lisp expanded my concept of a program so fast that it was years before I started to have a sense of where the new limits were. This was more like it; this was what I had expected college to do. It wasn't happening in a class, like it was supposed to, but that was ok. For the next couple years I was on a roll. I knew what I was going to do.
|
||||
|
||||
For my undergraduate thesis, I reverse-engineered SHRDLU. My God did I love working on that program. It was a pleasing bit of code, but what made it even more exciting was my belief — hard to imagine now, but not unique in 1985 — that it was already climbing the lower slopes of intelligence.
|
||||
|
||||
I had gotten into a program at Cornell that didn't make you choose a major. You could take whatever classes you liked, and choose whatever you liked to put on your degree. I of course chose "Artificial Intelligence." When I got the actual physical diploma, I was dismayed to find that the quotes had been included, which made them read as scare-quotes. At the time this bothered me, but now it seems amusingly accurate, for reasons I was about to discover.
|
||||
|
||||
I applied to 3 grad schools: MIT and Yale, which were renowned for AI at the time, and Harvard, which I'd visited because Rich Draves went there, and was also home to Bill Woods, who'd invented the type of parser I used in my SHRDLU clone. Only Harvard accepted me, so that was where I went.
|
||||
|
||||
I don't remember the moment it happened, or if there even was a specific moment, but during the first year of grad school I realized that AI, as practiced at the time, was a hoax. By which I mean the sort of AI in which a program that's told "the dog is sitting on the chair" translates this into some formal representation and adds it to the list of things it knows.
|
||||
|
||||
What these programs really showed was that there's a subset of natural language that's a formal language. But a very proper subset. It was clear that there was an unbridgeable gap between what they could do and actually understanding natural language. It was not, in fact, simply a matter of teaching SHRDLU more words. That whole way of doing AI, with explicit data structures representing concepts, was not going to work. Its brokenness did, as so often happens, generate a lot of opportunities to write papers about various band-aids that could be applied to it, but it was never going to get us Mike.
|
||||
|
||||
So I looked around to see what I could salvage from the wreckage of my plans, and there was Lisp. I knew from experience that Lisp was interesting for its own sake and not just for its association with AI, even though that was the main reason people cared about it at the time. So I decided to focus on Lisp. In fact, I decided to write a book about Lisp hacking. It's scary to think how little I knew about Lisp hacking when I started writing that book. But there's nothing like writing a book about something to help you learn it. The book, On Lisp, wasn't published till 1993, but I wrote much of it in grad school.
|
||||
|
||||
Computer Science is an uneasy alliance between two halves, theory and systems. The theory people prove things, and the systems people build things. I wanted to build things. I had plenty of respect for theory — indeed, a sneaking suspicion that it was the more admirable of the two halves — but building things seemed so much more exciting.
|
||||
|
||||
The problem with systems work, though, was that it didn't last. Any program you wrote today, no matter how good, would be obsolete in a couple decades at best. People might mention your software in footnotes, but no one would actually use it. And indeed, it would seem very feeble work. Only people with a sense of the history of the field would even realize that, in its time, it had been good.
|
||||
|
||||
There were some surplus Xerox Dandelions floating around the computer lab at one point. Anyone who wanted one to play around with could have one. I was briefly tempted, but they were so slow by present standards; what was the point? No one else wanted one either, so off they went. That was what happened to systems work.
|
||||
|
||||
I wanted not just to build things, but to build things that would last.
|
||||
|
||||
In this dissatisfied state I went in 1988 to visit Rich Draves at CMU, where he was in grad school. One day I went to visit the Carnegie Institute, where I'd spent a lot of time as a kid. While looking at a painting there I realized something that might seem obvious, but was a big surprise to me. There, right on the wall, was something you could make that would last. Paintings didn't become obsolete. Some of the best ones were hundreds of years old.
|
||||
|
||||
And moreover this was something you could make a living doing. Not as easily as you could by writing software, of course, but I thought if you were really industrious and lived really cheaply, it had to be possible to make enough to survive. And as an artist you could be truly independent. You wouldn't have a boss, or even need to get research funding.
|
||||
|
||||
I had always liked looking at paintings. Could I make them? I had no idea. I'd never imagined it was even possible. I knew intellectually that people made art — that it didn't just appear spontaneously — but it was as if the people who made it were a different species. They either lived long ago or were mysterious geniuses doing strange things in profiles in Life magazine. The idea of actually being able to make art, to put that verb before that noun, seemed almost miraculous.
|
||||
|
||||
That fall I started taking art classes at Harvard. Grad students could take classes in any department, and my advisor, Tom Cheatham, was very easy going. If he even knew about the strange classes I was taking, he never said anything.
|
||||
|
||||
So now I was in a PhD program in computer science, yet planning to be an artist, yet also genuinely in love with Lisp hacking and working away at On Lisp. In other words, like many a grad student, I was working energetically on multiple projects that were not my thesis.
|
||||
|
||||
I didn't see a way out of this situation. I didn't want to drop out of grad school, but how else was I going to get out? I remember when my friend Robert Morris got kicked out of Cornell for writing the internet worm of 1988, I was envious that he'd found such a spectacular way to get out of grad school.
|
||||
|
||||
Then one day in April 1990 a crack appeared in the wall. I ran into professor Cheatham and he asked if I was far enough along to graduate that June. I didn't have a word of my dissertation written, but in what must have been the quickest bit of thinking in my life, I decided to take a shot at writing one in the 5 weeks or so that remained before the deadline, reusing parts of On Lisp where I could, and I was able to respond, with no perceptible delay "Yes, I think so. I'll give you something to read in a few days."
|
||||
|
||||
I picked applications of continuations as the topic. In retrospect I should have written about macros and embedded languages. There's a whole world there that's barely been explored. But all I wanted was to get out of grad school, and my rapidly written dissertation sufficed, just barely.
|
||||
|
||||
Meanwhile I was applying to art schools. I applied to two: RISD in the US, and the Accademia di Belli Arti in Florence, which, because it was the oldest art school, I imagined would be good. RISD accepted me, and I never heard back from the Accademia, so off to Providence I went.
|
||||
|
||||
I'd applied for the BFA program at RISD, which meant in effect that I had to go to college again. This was not as strange as it sounds, because I was only 25, and art schools are full of people of different ages. RISD counted me as a transfer sophomore and said I had to do the foundation that summer. The foundation means the classes that everyone has to take in fundamental subjects like drawing, color, and design.
|
||||
|
||||
Toward the end of the summer I got a big surprise: a letter from the Accademia, which had been delayed because they'd sent it to Cambridge England instead of Cambridge Massachusetts, inviting me to take the entrance exam in Florence that fall. This was now only weeks away. My nice landlady let me leave my stuff in her attic. I had some money saved from consulting work I'd done in grad school; there was probably enough to last a year if I lived cheaply. Now all I had to do was learn Italian.
|
||||
|
||||
Only stranieri (foreigners) had to take this entrance exam. In retrospect it may well have been a way of excluding them, because there were so many stranieri attracted by the idea of studying art in Florence that the Italian students would otherwise have been outnumbered. I was in decent shape at painting and drawing from the RISD foundation that summer, but I still don't know how I managed to pass the written exam. I remember that I answered the essay question by writing about Cezanne, and that I cranked up the intellectual level as high as I could to make the most of my limited vocabulary. [2]
|
||||
|
||||
I'm only up to age 25 and already there are such conspicuous patterns. Here I was, yet again about to attend some august institution in the hopes of learning about some prestigious subject, and yet again about to be disappointed. The students and faculty in the painting department at the Accademia were the nicest people you could imagine, but they had long since arrived at an arrangement whereby the students wouldn't require the faculty to teach anything, and in return the faculty wouldn't require the students to learn anything. And at the same time all involved would adhere outwardly to the conventions of a 19th century atelier. We actually had one of those little stoves, fed with kindling, that you see in 19th century studio paintings, and a nude model sitting as close to it as possible without getting burned. Except hardly anyone else painted her besides me. The rest of the students spent their time chatting or occasionally trying to imitate things they'd seen in American art magazines.
|
||||
|
||||
Our model turned out to live just down the street from me. She made a living from a combination of modelling and making fakes for a local antique dealer. She'd copy an obscure old painting out of a book, and then he'd take the copy and maltreat it to make it look old. [3]
|
||||
|
||||
While I was a student at the Accademia I started painting still lives in my bedroom at night. These paintings were tiny, because the room was, and because I painted them on leftover scraps of canvas, which was all I could afford at the time. Painting still lives is different from painting people, because the subject, as its name suggests, can't move. People can't sit for more than about 15 minutes at a time, and when they do they don't sit very still. So the traditional m.o. for painting people is to know how to paint a generic person, which you then modify to match the specific person you're painting. Whereas a still life you can, if you want, copy pixel by pixel from what you're seeing. You don't want to stop there, of course, or you get merely photographic accuracy, and what makes a still life interesting is that it's been through a head. You want to emphasize the visual cues that tell you, for example, that the reason the color changes suddenly at a certain point is that it's the edge of an object. By subtly emphasizing such things you can make paintings that are more realistic than photographs not just in some metaphorical sense, but in the strict information-theoretic sense. [4]
|
||||
|
||||
I liked painting still lives because I was curious about what I was seeing. In everyday life, we aren't consciously aware of much we're seeing. Most visual perception is handled by low-level processes that merely tell your brain "that's a water droplet" without telling you details like where the lightest and darkest points are, or "that's a bush" without telling you the shape and position of every leaf. This is a feature of brains, not a bug. In everyday life it would be distracting to notice every leaf on every bush. But when you have to paint something, you have to look more closely, and when you do there's a lot to see. You can still be noticing new things after days of trying to paint something people usually take for granted, just as you can after days of trying to write an essay about something people usually take for granted.
|
||||
|
||||
This is not the only way to paint. I'm not 100% sure it's even a good way to paint. But it seemed a good enough bet to be worth trying.
|
||||
|
||||
Our teacher, professor Ulivi, was a nice guy. He could see I worked hard, and gave me a good grade, which he wrote down in a sort of passport each student had. But the Accademia wasn't teaching me anything except Italian, and my money was running out, so at the end of the first year I went back to the US.
|
||||
|
||||
I wanted to go back to RISD, but I was now broke and RISD was very expensive, so I decided to get a job for a year and then return to RISD the next fall. I got one at a company called Interleaf, which made software for creating documents. You mean like Microsoft Word? Exactly. That was how I learned that low end software tends to eat high end software. But Interleaf still had a few years to live yet. [5]
|
||||
|
||||
Interleaf had done something pretty bold. Inspired by Emacs, they'd added a scripting language, and even made the scripting language a dialect of Lisp. Now they wanted a Lisp hacker to write things in it. This was the closest thing I've had to a normal job, and I hereby apologize to my boss and coworkers, because I was a bad employee. Their Lisp was the thinnest icing on a giant C cake, and since I didn't know C and didn't want to learn it, I never understood most of the software. Plus I was terribly irresponsible. This was back when a programming job meant showing up every day during certain working hours. That seemed unnatural to me, and on this point the rest of the world is coming around to my way of thinking, but at the time it caused a lot of friction. Toward the end of the year I spent much of my time surreptitiously working on On Lisp, which I had by this time gotten a contract to publish.
|
||||
|
||||
The good part was that I got paid huge amounts of money, especially by art student standards. In Florence, after paying my part of the rent, my budget for everything else had been $7 a day. Now I was getting paid more than 4 times that every hour, even when I was just sitting in a meeting. By living cheaply I not only managed to save enough to go back to RISD, but also paid off my college loans.
|
||||
|
||||
I learned some useful things at Interleaf, though they were mostly about what not to do. I learned that it's better for technology companies to be run by product people than sales people (though sales is a real skill and people who are good at it are really good at it), that it leads to bugs when code is edited by too many people, that cheap office space is no bargain if it's depressing, that planned meetings are inferior to corridor conversations, that big, bureaucratic customers are a dangerous source of money, and that there's not much overlap between conventional office hours and the optimal time for hacking, or conventional offices and the optimal place for it.
|
||||
|
||||
But the most important thing I learned, and which I used in both Viaweb and Y Combinator, is that the low end eats the high end: that it's good to be the "entry level" option, even though that will be less prestigious, because if you're not, someone else will be, and will squash you against the ceiling. Which in turn means that prestige is a danger sign.
|
||||
|
||||
When I left to go back to RISD the next fall, I arranged to do freelance work for the group that did projects for customers, and this was how I survived for the next several years. When I came back to visit for a project later on, someone told me about a new thing called HTML, which was, as he described it, a derivative of SGML. Markup language enthusiasts were an occupational hazard at Interleaf and I ignored him, but this HTML thing later became a big part of my life.
|
||||
|
||||
In the fall of 1992 I moved back to Providence to continue at RISD. The foundation had merely been intro stuff, and the Accademia had been a (very civilized) joke. Now I was going to see what real art school was like. But alas it was more like the Accademia than not. Better organized, certainly, and a lot more expensive, but it was now becoming clear that art school did not bear the same relationship to art that medical school bore to medicine. At least not the painting department. The textile department, which my next door neighbor belonged to, seemed to be pretty rigorous. No doubt illustration and architecture were too. But painting was post-rigorous. Painting students were supposed to express themselves, which to the more worldly ones meant to try to cook up some sort of distinctive signature style.
|
||||
|
||||
A signature style is the visual equivalent of what in show business is known as a "schtick": something that immediately identifies the work as yours and no one else's. For example, when you see a painting that looks like a certain kind of cartoon, you know it's by Roy Lichtenstein. So if you see a big painting of this type hanging in the apartment of a hedge fund manager, you know he paid millions of dollars for it. That's not always why artists have a signature style, but it's usually why buyers pay a lot for such work. [6]
|
||||
|
||||
There were plenty of earnest students too: kids who "could draw" in high school, and now had come to what was supposed to be the best art school in the country, to learn to draw even better. They tended to be confused and demoralized by what they found at RISD, but they kept going, because painting was what they did. I was not one of the kids who could draw in high school, but at RISD I was definitely closer to their tribe than the tribe of signature style seekers.
|
||||
|
||||
I learned a lot in the color class I took at RISD, but otherwise I was basically teaching myself to paint, and I could do that for free. So in 1993 I dropped out. I hung around Providence for a bit, and then my college friend Nancy Parmet did me a big favor. A rent-controlled apartment in a building her mother owned in New York was becoming vacant. Did I want it? It wasn't much more than my current place, and New York was supposed to be where the artists were. So yes, I wanted it! [7]
|
||||
|
||||
Asterix comics begin by zooming in on a tiny corner of Roman Gaul that turns out not to be controlled by the Romans. You can do something similar on a map of New York City: if you zoom in on the Upper East Side, there's a tiny corner that's not rich, or at least wasn't in 1993. It's called Yorkville, and that was my new home. Now I was a New York artist — in the strictly technical sense of making paintings and living in New York.
|
||||
|
||||
I was nervous about money, because I could sense that Interleaf was on the way down. Freelance Lisp hacking work was very rare, and I didn't want to have to program in another language, which in those days would have meant C++ if I was lucky. So with my unerring nose for financial opportunity, I decided to write another book on Lisp. This would be a popular book, the sort of book that could be used as a textbook. I imagined myself living frugally off the royalties and spending all my time painting. (The painting on the cover of this book, ANSI Common Lisp, is one that I painted around this time.)
|
||||
|
||||
The best thing about New York for me was the presence of Idelle and Julian Weber. Idelle Weber was a painter, one of the early photorealists, and I'd taken her painting class at Harvard. I've never known a teacher more beloved by her students. Large numbers of former students kept in touch with her, including me. After I moved to New York I became her de facto studio assistant.
|
||||
|
||||
She liked to paint on big, square canvases, 4 to 5 feet on a side. One day in late 1994 as I was stretching one of these monsters there was something on the radio about a famous fund manager. He wasn't that much older than me, and was super rich. The thought suddenly occurred to me: why don't I become rich? Then I'll be able to work on whatever I want.
|
||||
|
||||
Meanwhile I'd been hearing more and more about this new thing called the World Wide Web. Robert Morris showed it to me when I visited him in Cambridge, where he was now in grad school at Harvard. It seemed to me that the web would be a big deal. I'd seen what graphical user interfaces had done for the popularity of microcomputers. It seemed like the web would do the same for the internet.
|
||||
|
||||
If I wanted to get rich, here was the next train leaving the station. I was right about that part. What I got wrong was the idea. I decided we should start a company to put art galleries online. I can't honestly say, after reading so many Y Combinator applications, that this was the worst startup idea ever, but it was up there. Art galleries didn't want to be online, and still don't, not the fancy ones. That's not how they sell. I wrote some software to generate web sites for galleries, and Robert wrote some to resize images and set up an http server to serve the pages. Then we tried to sign up galleries. To call this a difficult sale would be an understatement. It was difficult to give away. A few galleries let us make sites for them for free, but none paid us.
|
||||
|
||||
Then some online stores started to appear, and I realized that except for the order buttons they were identical to the sites we'd been generating for galleries. This impressive-sounding thing called an "internet storefront" was something we already knew how to build.
|
||||
|
||||
So in the summer of 1995, after I submitted the camera-ready copy of ANSI Common Lisp to the publishers, we started trying to write software to build online stores. At first this was going to be normal desktop software, which in those days meant Windows software. That was an alarming prospect, because neither of us knew how to write Windows software or wanted to learn. We lived in the Unix world. But we decided we'd at least try writing a prototype store builder on Unix. Robert wrote a shopping cart, and I wrote a new site generator for stores — in Lisp, of course.
|
||||
|
||||
We were working out of Robert's apartment in Cambridge. His roommate was away for big chunks of time, during which I got to sleep in his room. For some reason there was no bed frame or sheets, just a mattress on the floor. One morning as I was lying on this mattress I had an idea that made me sit up like a capital L. What if we ran the software on the server, and let users control it by clicking on links? Then we'd never have to write anything to run on users' computers. We could generate the sites on the same server we'd serve them from. Users wouldn't need anything more than a browser.
|
||||
|
||||
This kind of software, known as a web app, is common now, but at the time it wasn't clear that it was even possible. To find out, we decided to try making a version of our store builder that you could control through the browser. A couple days later, on August 12, we had one that worked. The UI was horrible, but it proved you could build a whole store through the browser, without any client software or typing anything into the command line on the server.
|
||||
|
||||
Now we felt like we were really onto something. I had visions of a whole new generation of software working this way. You wouldn't need versions, or ports, or any of that crap. At Interleaf there had been a whole group called Release Engineering that seemed to be at least as big as the group that actually wrote the software. Now you could just update the software right on the server.
|
||||
|
||||
We started a new company we called Viaweb, after the fact that our software worked via the web, and we got $10,000 in seed funding from Idelle's husband Julian. In return for that and doing the initial legal work and giving us business advice, we gave him 10% of the company. Ten years later this deal became the model for Y Combinator's. We knew founders needed something like this, because we'd needed it ourselves.
|
||||
|
||||
At this stage I had a negative net worth, because the thousand dollars or so I had in the bank was more than counterbalanced by what I owed the government in taxes. (Had I diligently set aside the proper proportion of the money I'd made consulting for Interleaf? No, I had not.) So although Robert had his graduate student stipend, I needed that seed funding to live on.
|
||||
|
||||
We originally hoped to launch in September, but we got more ambitious about the software as we worked on it. Eventually we managed to build a WYSIWYG site builder, in the sense that as you were creating pages, they looked exactly like the static ones that would be generated later, except that instead of leading to static pages, the links all referred to closures stored in a hash table on the server.
|
||||
|
||||
It helped to have studied art, because the main goal of an online store builder is to make users look legit, and the key to looking legit is high production values. If you get page layouts and fonts and colors right, you can make a guy running a store out of his bedroom look more legit than a big company.
|
||||
|
||||
(If you're curious why my site looks so old-fashioned, it's because it's still made with this software. It may look clunky today, but in 1996 it was the last word in slick.)
|
||||
|
||||
In September, Robert rebelled. "We've been working on this for a month," he said, "and it's still not done." This is funny in retrospect, because he would still be working on it almost 3 years later. But I decided it might be prudent to recruit more programmers, and I asked Robert who else in grad school with him was really good. He recommended Trevor Blackwell, which surprised me at first, because at that point I knew Trevor mainly for his plan to reduce everything in his life to a stack of notecards, which he carried around with him. But Rtm was right, as usual. Trevor turned out to be a frighteningly effective hacker.
|
||||
|
||||
It was a lot of fun working with Robert and Trevor. They're the two most independent-minded people I know, and in completely different ways. If you could see inside Rtm's brain it would look like a colonial New England church, and if you could see inside Trevor's it would look like the worst excesses of Austrian Rococo.
|
||||
|
||||
We opened for business, with 6 stores, in January 1996. It was just as well we waited a few months, because although we worried we were late, we were actually almost fatally early. There was a lot of talk in the press then about ecommerce, but not many people actually wanted online stores. [8]
|
||||
|
||||
There were three main parts to the software: the editor, which people used to build sites and which I wrote, the shopping cart, which Robert wrote, and the manager, which kept track of orders and statistics, and which Trevor wrote. In its time, the editor was one of the best general-purpose site builders. I kept the code tight and didn't have to integrate with any other software except Robert's and Trevor's, so it was quite fun to work on. If all I'd had to do was work on this software, the next 3 years would have been the easiest of my life. Unfortunately I had to do a lot more, all of it stuff I was worse at than programming, and the next 3 years were instead the most stressful.
|
||||
|
||||
There were a lot of startups making ecommerce software in the second half of the 90s. We were determined to be the Microsoft Word, not the Interleaf. Which meant being easy to use and inexpensive. It was lucky for us that we were poor, because that caused us to make Viaweb even more inexpensive than we realized. We charged $100 a month for a small store and $300 a month for a big one. This low price was a big attraction, and a constant thorn in the sides of competitors, but it wasn't because of some clever insight that we set the price low. We had no idea what businesses paid for things. $300 a month seemed like a lot of money to us.
|
||||
|
||||
We did a lot of things right by accident like that. For example, we did what's now called "doing things that don't scale," although at the time we would have described it as "being so lame that we're driven to the most desperate measures to get users." The most common of which was building stores for them. This seemed particularly humiliating, since the whole raison d'etre of our software was that people could use it to make their own stores. But anything to get users.
|
||||
|
||||
We learned a lot more about retail than we wanted to know. For example, that if you could only have a small image of a man's shirt (and all images were small then by present standards), it was better to have a closeup of the collar than a picture of the whole shirt. The reason I remember learning this was that it meant I had to rescan about 30 images of men's shirts. My first set of scans were so beautiful too.
|
||||
|
||||
Though this felt wrong, it was exactly the right thing to be doing. Building stores for users taught us about retail, and about how it felt to use our software. I was initially both mystified and repelled by "business" and thought we needed a "business person" to be in charge of it, but once we started to get users, I was converted, in much the same way I was converted to fatherhood once I had kids. Whatever users wanted, I was all theirs. Maybe one day we'd have so many users that I couldn't scan their images for them, but in the meantime there was nothing more important to do.
|
||||
|
||||
Another thing I didn't get at the time is that growth rate is the ultimate test of a startup. Our growth rate was fine. We had about 70 stores at the end of 1996 and about 500 at the end of 1997. I mistakenly thought the thing that mattered was the absolute number of users. And that is the thing that matters in the sense that that's how much money you're making, and if you're not making enough, you might go out of business. But in the long term the growth rate takes care of the absolute number. If we'd been a startup I was advising at Y Combinator, I would have said: Stop being so stressed out, because you're doing fine. You're growing 7x a year. Just don't hire too many more people and you'll soon be profitable, and then you'll control your own destiny.
|
||||
|
||||
Alas I hired lots more people, partly because our investors wanted me to, and partly because that's what startups did during the Internet Bubble. A company with just a handful of employees would have seemed amateurish. So we didn't reach breakeven until about when Yahoo bought us in the summer of 1998. Which in turn meant we were at the mercy of investors for the entire life of the company. And since both we and our investors were noobs at startups, the result was a mess even by startup standards.
|
||||
|
||||
It was a huge relief when Yahoo bought us. In principle our Viaweb stock was valuable. It was a share in a business that was profitable and growing rapidly. But it didn't feel very valuable to me; I had no idea how to value a business, but I was all too keenly aware of the near-death experiences we seemed to have every few months. Nor had I changed my grad student lifestyle significantly since we started. So when Yahoo bought us it felt like going from rags to riches. Since we were going to California, I bought a car, a yellow 1998 VW GTI. I remember thinking that its leather seats alone were by far the most luxurious thing I owned.
|
||||
|
||||
The next year, from the summer of 1998 to the summer of 1999, must have been the least productive of my life. I didn't realize it at the time, but I was worn out from the effort and stress of running Viaweb. For a while after I got to California I tried to continue my usual m.o. of programming till 3 in the morning, but fatigue combined with Yahoo's prematurely aged culture and grim cube farm in Santa Clara gradually dragged me down. After a few months it felt disconcertingly like working at Interleaf.
|
||||
|
||||
Yahoo had given us a lot of options when they bought us. At the time I thought Yahoo was so overvalued that they'd never be worth anything, but to my astonishment the stock went up 5x in the next year. I hung on till the first chunk of options vested, then in the summer of 1999 I left. It had been so long since I'd painted anything that I'd half forgotten why I was doing this. My brain had been entirely full of software and men's shirts for 4 years. But I had done this to get rich so I could paint, I reminded myself, and now I was rich, so I should go paint.
|
||||
|
||||
When I said I was leaving, my boss at Yahoo had a long conversation with me about my plans. I told him all about the kinds of pictures I wanted to paint. At the time I was touched that he took such an interest in me. Now I realize it was because he thought I was lying. My options at that point were worth about $2 million a month. If I was leaving that kind of money on the table, it could only be to go and start some new startup, and if I did, I might take people with me. This was the height of the Internet Bubble, and Yahoo was ground zero of it. My boss was at that moment a billionaire. Leaving then to start a new startup must have seemed to him an insanely, and yet also plausibly, ambitious plan.
|
||||
|
||||
But I really was quitting to paint, and I started immediately. There was no time to lose. I'd already burned 4 years getting rich. Now when I talk to founders who are leaving after selling their companies, my advice is always the same: take a vacation. That's what I should have done, just gone off somewhere and done nothing for a month or two, but the idea never occurred to me.
|
||||
|
||||
So I tried to paint, but I just didn't seem to have any energy or ambition. Part of the problem was that I didn't know many people in California. I'd compounded this problem by buying a house up in the Santa Cruz Mountains, with a beautiful view but miles from anywhere. I stuck it out for a few more months, then in desperation I went back to New York, where unless you understand about rent control you'll be surprised to hear I still had my apartment, sealed up like a tomb of my old life. Idelle was in New York at least, and there were other people trying to paint there, even though I didn't know any of them.
|
||||
|
||||
When I got back to New York I resumed my old life, except now I was rich. It was as weird as it sounds. I resumed all my old patterns, except now there were doors where there hadn't been. Now when I was tired of walking, all I had to do was raise my hand, and (unless it was raining) a taxi would stop to pick me up. Now when I walked past charming little restaurants I could go in and order lunch. It was exciting for a while. Painting started to go better. I experimented with a new kind of still life where I'd paint one painting in the old way, then photograph it and print it, blown up, on canvas, and then use that as the underpainting for a second still life, painted from the same objects (which hopefully hadn't rotted yet).
|
||||
|
||||
Meanwhile I looked for an apartment to buy. Now I could actually choose what neighborhood to live in. Where, I asked myself and various real estate agents, is the Cambridge of New York? Aided by occasional visits to actual Cambridge, I gradually realized there wasn't one. Huh.
|
||||
|
||||
Around this time, in the spring of 2000, I had an idea. It was clear from our experience with Viaweb that web apps were the future. Why not build a web app for making web apps? Why not let people edit code on our server through the browser, and then host the resulting applications for them? [9] You could run all sorts of services on the servers that these applications could use just by making an API call: making and receiving phone calls, manipulating images, taking credit card payments, etc.
|
||||
|
||||
I got so excited about this idea that I couldn't think about anything else. It seemed obvious that this was the future. I didn't particularly want to start another company, but it was clear that this idea would have to be embodied as one, so I decided to move to Cambridge and start it. I hoped to lure Robert into working on it with me, but there I ran into a hitch. Robert was now a postdoc at MIT, and though he'd made a lot of money the last time I'd lured him into working on one of my schemes, it had also been a huge time sink. So while he agreed that it sounded like a plausible idea, he firmly refused to work on it.
|
||||
|
||||
Hmph. Well, I'd do it myself then. I recruited Dan Giffin, who had worked for Viaweb, and two undergrads who wanted summer jobs, and we got to work trying to build what it's now clear is about twenty companies and several open source projects worth of software. The language for defining applications would of course be a dialect of Lisp. But I wasn't so naive as to assume I could spring an overt Lisp on a general audience; we'd hide the parentheses, like Dylan did.
|
||||
|
||||
By then there was a name for the kind of company Viaweb was, an "application service provider," or ASP. This name didn't last long before it was replaced by "software as a service," but it was current for long enough that I named this new company after it: it was going to be called Aspra.
|
||||
|
||||
I started working on the application builder, Dan worked on network infrastructure, and the two undergrads worked on the first two services (images and phone calls). But about halfway through the summer I realized I really didn't want to run a company — especially not a big one, which it was looking like this would have to be. I'd only started Viaweb because I needed the money. Now that I didn't need money anymore, why was I doing this? If this vision had to be realized as a company, then screw the vision. I'd build a subset that could be done as an open source project.
|
||||
|
||||
Much to my surprise, the time I spent working on this stuff was not wasted after all. After we started Y Combinator, I would often encounter startups working on parts of this new architecture, and it was very useful to have spent so much time thinking about it and even trying to write some of it.
|
||||
|
||||
The subset I would build as an open source project was the new Lisp, whose parentheses I now wouldn't even have to hide. A lot of Lisp hackers dream of building a new Lisp, partly because one of the distinctive features of the language is that it has dialects, and partly, I think, because we have in our minds a Platonic form of Lisp that all existing dialects fall short of. I certainly did. So at the end of the summer Dan and I switched to working on this new dialect of Lisp, which I called Arc, in a house I bought in Cambridge.
|
||||
|
||||
The following spring, lightning struck. I was invited to give a talk at a Lisp conference, so I gave one about how we'd used Lisp at Viaweb. Afterward I put a postscript file of this talk online, on paulgraham.com, which I'd created years before using Viaweb but had never used for anything. In one day it got 30,000 page views. What on earth had happened? The referring urls showed that someone had posted it on Slashdot. [10]
|
||||
|
||||
Wow, I thought, there's an audience. If I write something and put it on the web, anyone can read it. That may seem obvious now, but it was surprising then. In the print era there was a narrow channel to readers, guarded by fierce monsters known as editors. The only way to get an audience for anything you wrote was to get it published as a book, or in a newspaper or magazine. Now anyone could publish anything.
|
||||
|
||||
This had been possible in principle since 1993, but not many people had realized it yet. I had been intimately involved with building the infrastructure of the web for most of that time, and a writer as well, and it had taken me 8 years to realize it. Even then it took me several years to understand the implications. It meant there would be a whole new generation of essays. [11]
|
||||
|
||||
In the print era, the channel for publishing essays had been vanishingly small. Except for a few officially anointed thinkers who went to the right parties in New York, the only people allowed to publish essays were specialists writing about their specialties. There were so many essays that had never been written, because there had been no way to publish them. Now they could be, and I was going to write them. [12]
|
||||
|
||||
I've worked on several different things, but to the extent there was a turning point where I figured out what to work on, it was when I started publishing essays online. From then on I knew that whatever else I did, I'd always write essays too.
|
||||
|
||||
I knew that online essays would be a marginal medium at first. Socially they'd seem more like rants posted by nutjobs on their GeoCities sites than the genteel and beautifully typeset compositions published in The New Yorker. But by this point I knew enough to find that encouraging instead of discouraging.
|
||||
|
||||
One of the most conspicuous patterns I've noticed in my life is how well it has worked, for me at least, to work on things that weren't prestigious. Still life has always been the least prestigious form of painting. Viaweb and Y Combinator both seemed lame when we started them. I still get the glassy eye from strangers when they ask what I'm writing, and I explain that it's an essay I'm going to publish on my web site. Even Lisp, though prestigious intellectually in something like the way Latin is, also seems about as hip.
|
||||
|
||||
It's not that unprestigious types of work are good per se. But when you find yourself drawn to some kind of work despite its current lack of prestige, it's a sign both that there's something real to be discovered there, and that you have the right kind of motives. Impure motives are a big danger for the ambitious. If anything is going to lead you astray, it will be the desire to impress people. So while working on things that aren't prestigious doesn't guarantee you're on the right track, it at least guarantees you're not on the most common type of wrong one.
|
||||
|
||||
Over the next several years I wrote lots of essays about all kinds of different topics. O'Reilly reprinted a collection of them as a book, called Hackers & Painters after one of the essays in it. I also worked on spam filters, and did some more painting. I used to have dinners for a group of friends every thursday night, which taught me how to cook for groups. And I bought another building in Cambridge, a former candy factory (and later, twas said, porn studio), to use as an office.
|
||||
|
||||
One night in October 2003 there was a big party at my house. It was a clever idea of my friend Maria Daniels, who was one of the thursday diners. Three separate hosts would all invite their friends to one party. So for every guest, two thirds of the other guests would be people they didn't know but would probably like. One of the guests was someone I didn't know but would turn out to like a lot: a woman called Jessica Livingston. A couple days later I asked her out.
|
||||
|
||||
Jessica was in charge of marketing at a Boston investment bank. This bank thought it understood startups, but over the next year, as she met friends of mine from the startup world, she was surprised how different reality was. And how colorful their stories were. So she decided to compile a book of interviews with startup founders.
|
||||
|
||||
When the bank had financial problems and she had to fire half her staff, she started looking for a new job. In early 2005 she interviewed for a marketing job at a Boston VC firm. It took them weeks to make up their minds, and during this time I started telling her about all the things that needed to be fixed about venture capital. They should make a larger number of smaller investments instead of a handful of giant ones, they should be funding younger, more technical founders instead of MBAs, they should let the founders remain as CEO, and so on.
|
||||
|
||||
One of my tricks for writing essays had always been to give talks. The prospect of having to stand up in front of a group of people and tell them something that won't waste their time is a great spur to the imagination. When the Harvard Computer Society, the undergrad computer club, asked me to give a talk, I decided I would tell them how to start a startup. Maybe they'd be able to avoid the worst of the mistakes we'd made.
|
||||
|
||||
So I gave this talk, in the course of which I told them that the best sources of seed funding were successful startup founders, because then they'd be sources of advice too. Whereupon it seemed they were all looking expectantly at me. Horrified at the prospect of having my inbox flooded by business plans (if I'd only known), I blurted out "But not me!" and went on with the talk. But afterward it occurred to me that I should really stop procrastinating about angel investing. I'd been meaning to since Yahoo bought us, and now it was 7 years later and I still hadn't done one angel investment.
|
||||
|
||||
Meanwhile I had been scheming with Robert and Trevor about projects we could work on together. I missed working with them, and it seemed like there had to be something we could collaborate on.
|
||||
|
||||
As Jessica and I were walking home from dinner on March 11, at the corner of Garden and Walker streets, these three threads converged. Screw the VCs who were taking so long to make up their minds. We'd start our own investment firm and actually implement the ideas we'd been talking about. I'd fund it, and Jessica could quit her job and work for it, and we'd get Robert and Trevor as partners too. [13]
|
||||
|
||||
Once again, ignorance worked in our favor. We had no idea how to be angel investors, and in Boston in 2005 there were no Ron Conways to learn from. So we just made what seemed like the obvious choices, and some of the things we did turned out to be novel.
|
||||
|
||||
There are multiple components to Y Combinator, and we didn't figure them all out at once. The part we got first was to be an angel firm. In those days, those two words didn't go together. There were VC firms, which were organized companies with people whose job it was to make investments, but they only did big, million dollar investments. And there were angels, who did smaller investments, but these were individuals who were usually focused on other things and made investments on the side. And neither of them helped founders enough in the beginning. We knew how helpless founders were in some respects, because we remembered how helpless we'd been. For example, one thing Julian had done for us that seemed to us like magic was to get us set up as a company. We were fine writing fairly difficult software, but actually getting incorporated, with bylaws and stock and all that stuff, how on earth did you do that? Our plan was not only to make seed investments, but to do for startups everything Julian had done for us.
|
||||
|
||||
YC was not organized as a fund. It was cheap enough to run that we funded it with our own money. That went right by 99% of readers, but professional investors are thinking "Wow, that means they got all the returns." But once again, this was not due to any particular insight on our part. We didn't know how VC firms were organized. It never occurred to us to try to raise a fund, and if it had, we wouldn't have known where to start. [14]
|
||||
|
||||
The most distinctive thing about YC is the batch model: to fund a bunch of startups all at once, twice a year, and then to spend three months focusing intensively on trying to help them. That part we discovered by accident, not merely implicitly but explicitly due to our ignorance about investing. We needed to get experience as investors. What better way, we thought, than to fund a whole bunch of startups at once? We knew undergrads got temporary jobs at tech companies during the summer. Why not organize a summer program where they'd start startups instead? We wouldn't feel guilty for being in a sense fake investors, because they would in a similar sense be fake founders. So while we probably wouldn't make much money out of it, we'd at least get to practice being investors on them, and they for their part would probably have a more interesting summer than they would working at Microsoft.
|
||||
|
||||
We'd use the building I owned in Cambridge as our headquarters. We'd all have dinner there once a week — on tuesdays, since I was already cooking for the thursday diners on thursdays — and after dinner we'd bring in experts on startups to give talks.
|
||||
|
||||
We knew undergrads were deciding then about summer jobs, so in a matter of days we cooked up something we called the Summer Founders Program, and I posted an announcement on my site, inviting undergrads to apply. I had never imagined that writing essays would be a way to get "deal flow," as investors call it, but it turned out to be the perfect source. [15] We got 225 applications for the Summer Founders Program, and we were surprised to find that a lot of them were from people who'd already graduated, or were about to that spring. Already this SFP thing was starting to feel more serious than we'd intended.
|
||||
|
||||
We invited about 20 of the 225 groups to interview in person, and from those we picked 8 to fund. They were an impressive group. That first batch included reddit, Justin Kan and Emmett Shear, who went on to found Twitch, Aaron Swartz, who had already helped write the RSS spec and would a few years later become a martyr for open access, and Sam Altman, who would later become the second president of YC. I don't think it was entirely luck that the first batch was so good. You had to be pretty bold to sign up for a weird thing like the Summer Founders Program instead of a summer job at a legit place like Microsoft or Goldman Sachs.
|
||||
|
||||
The deal for startups was based on a combination of the deal we did with Julian ($10k for 10%) and what Robert said MIT grad students got for the summer ($6k). We invested $6k per founder, which in the typical two-founder case was $12k, in return for 6%. That had to be fair, because it was twice as good as the deal we ourselves had taken. Plus that first summer, which was really hot, Jessica brought the founders free air conditioners. [16]
|
||||
|
||||
Fairly quickly I realized that we had stumbled upon the way to scale startup funding. Funding startups in batches was more convenient for us, because it meant we could do things for a lot of startups at once, but being part of a batch was better for the startups too. It solved one of the biggest problems faced by founders: the isolation. Now you not only had colleagues, but colleagues who understood the problems you were facing and could tell you how they were solving them.
|
||||
|
||||
As YC grew, we started to notice other advantages of scale. The alumni became a tight community, dedicated to helping one another, and especially the current batch, whose shoes they remembered being in. We also noticed that the startups were becoming one another's customers. We used to refer jokingly to the "YC GDP," but as YC grows this becomes less and less of a joke. Now lots of startups get their initial set of customers almost entirely from among their batchmates.
|
||||
|
||||
I had not originally intended YC to be a full-time job. I was going to do three things: hack, write essays, and work on YC. As YC grew, and I grew more excited about it, it started to take up a lot more than a third of my attention. But for the first few years I was still able to work on other things.
|
||||
|
||||
In the summer of 2006, Robert and I started working on a new version of Arc. This one was reasonably fast, because it was compiled into Scheme. To test this new Arc, I wrote Hacker News in it. It was originally meant to be a news aggregator for startup founders and was called Startup News, but after a few months I got tired of reading about nothing but startups. Plus it wasn't startup founders we wanted to reach. It was future startup founders. So I changed the name to Hacker News and the topic to whatever engaged one's intellectual curiosity.
|
||||
|
||||
HN was no doubt good for YC, but it was also by far the biggest source of stress for me. If all I'd had to do was select and help founders, life would have been so easy. And that implies that HN was a mistake. Surely the biggest source of stress in one's work should at least be something close to the core of the work. Whereas I was like someone who was in pain while running a marathon not from the exertion of running, but because I had a blister from an ill-fitting shoe. When I was dealing with some urgent problem during YC, there was about a 60% chance it had to do with HN, and a 40% chance it had do with everything else combined. [17]
|
||||
|
||||
As well as HN, I wrote all of YC's internal software in Arc. But while I continued to work a good deal in Arc, I gradually stopped working on Arc, partly because I didn't have time to, and partly because it was a lot less attractive to mess around with the language now that we had all this infrastructure depending on it. So now my three projects were reduced to two: writing essays and working on YC.
|
||||
|
||||
YC was different from other kinds of work I've done. Instead of deciding for myself what to work on, the problems came to me. Every 6 months there was a new batch of startups, and their problems, whatever they were, became our problems. It was very engaging work, because their problems were quite varied, and the good founders were very effective. If you were trying to learn the most you could about startups in the shortest possible time, you couldn't have picked a better way to do it.
|
||||
|
||||
There were parts of the job I didn't like. Disputes between cofounders, figuring out when people were lying to us, fighting with people who maltreated the startups, and so on. But I worked hard even at the parts I didn't like. I was haunted by something Kevin Hale once said about companies: "No one works harder than the boss." He meant it both descriptively and prescriptively, and it was the second part that scared me. I wanted YC to be good, so if how hard I worked set the upper bound on how hard everyone else worked, I'd better work very hard.
|
||||
|
||||
One day in 2010, when he was visiting California for interviews, Robert Morris did something astonishing: he offered me unsolicited advice. I can only remember him doing that once before. One day at Viaweb, when I was bent over double from a kidney stone, he suggested that it would be a good idea for him to take me to the hospital. That was what it took for Rtm to offer unsolicited advice. So I remember his exact words very clearly. "You know," he said, "you should make sure Y Combinator isn't the last cool thing you do."
|
||||
|
||||
At the time I didn't understand what he meant, but gradually it dawned on me that he was saying I should quit. This seemed strange advice, because YC was doing great. But if there was one thing rarer than Rtm offering advice, it was Rtm being wrong. So this set me thinking. It was true that on my current trajectory, YC would be the last thing I did, because it was only taking up more of my attention. It had already eaten Arc, and was in the process of eating essays too. Either YC was my life's work or I'd have to leave eventually. And it wasn't, so I would.
|
||||
|
||||
In the summer of 2012 my mother had a stroke, and the cause turned out to be a blood clot caused by colon cancer. The stroke destroyed her balance, and she was put in a nursing home, but she really wanted to get out of it and back to her house, and my sister and I were determined to help her do it. I used to fly up to Oregon to visit her regularly, and I had a lot of time to think on those flights. On one of them I realized I was ready to hand YC over to someone else.
|
||||
|
||||
I asked Jessica if she wanted to be president, but she didn't, so we decided we'd try to recruit Sam Altman. We talked to Robert and Trevor and we agreed to make it a complete changing of the guard. Up till that point YC had been controlled by the original LLC we four had started. But we wanted YC to last for a long time, and to do that it couldn't be controlled by the founders. So if Sam said yes, we'd let him reorganize YC. Robert and I would retire, and Jessica and Trevor would become ordinary partners.
|
||||
|
||||
When we asked Sam if he wanted to be president of YC, initially he said no. He wanted to start a startup to make nuclear reactors. But I kept at it, and in October 2013 he finally agreed. We decided he'd take over starting with the winter 2014 batch. For the rest of 2013 I left running YC more and more to Sam, partly so he could learn the job, and partly because I was focused on my mother, whose cancer had returned.
|
||||
|
||||
She died on January 15, 2014. We knew this was coming, but it was still hard when it did.
|
||||
|
||||
I kept working on YC till March, to help get that batch of startups through Demo Day, then I checked out pretty completely. (I still talk to alumni and to new startups working on things I'm interested in, but that only takes a few hours a week.)
|
||||
|
||||
What should I do next? Rtm's advice hadn't included anything about that. I wanted to do something completely different, so I decided I'd paint. I wanted to see how good I could get if I really focused on it. So the day after I stopped working on YC, I started painting. I was rusty and it took a while to get back into shape, but it was at least completely engaging. [18]
|
||||
|
||||
I spent most of the rest of 2014 painting. I'd never been able to work so uninterruptedly before, and I got to be better than I had been. Not good enough, but better. Then in November, right in the middle of a painting, I ran out of steam. Up till that point I'd always been curious to see how the painting I was working on would turn out, but suddenly finishing this one seemed like a chore. So I stopped working on it and cleaned my brushes and haven't painted since. So far anyway.
|
||||
|
||||
I realize that sounds rather wimpy. But attention is a zero sum game. If you can choose what to work on, and you choose a project that's not the best one (or at least a good one) for you, then it's getting in the way of another project that is. And at 50 there was some opportunity cost to screwing around.
|
||||
|
||||
I started writing essays again, and wrote a bunch of new ones over the next few months. I even wrote a couple that weren't about startups. Then in March 2015 I started working on Lisp again.
|
||||
|
||||
The distinctive thing about Lisp is that its core is a language defined by writing an interpreter in itself. It wasn't originally intended as a programming language in the ordinary sense. It was meant to be a formal model of computation, an alternative to the Turing machine. If you want to write an interpreter for a language in itself, what's the minimum set of predefined operators you need? The Lisp that John McCarthy invented, or more accurately discovered, is an answer to that question. [19]
|
||||
|
||||
McCarthy didn't realize this Lisp could even be used to program computers till his grad student Steve Russell suggested it. Russell translated McCarthy's interpreter into IBM 704 machine language, and from that point Lisp started also to be a programming language in the ordinary sense. But its origins as a model of computation gave it a power and elegance that other languages couldn't match. It was this that attracted me in college, though I didn't understand why at the time.
|
||||
|
||||
McCarthy's 1960 Lisp did nothing more than interpret Lisp expressions. It was missing a lot of things you'd want in a programming language. So these had to be added, and when they were, they weren't defined using McCarthy's original axiomatic approach. That wouldn't have been feasible at the time. McCarthy tested his interpreter by hand-simulating the execution of programs. But it was already getting close to the limit of interpreters you could test that way — indeed, there was a bug in it that McCarthy had overlooked. To test a more complicated interpreter, you'd have had to run it, and computers then weren't powerful enough.
|
||||
|
||||
Now they are, though. Now you could continue using McCarthy's axiomatic approach till you'd defined a complete programming language. And as long as every change you made to McCarthy's Lisp was a discoveredness-preserving transformation, you could, in principle, end up with a complete language that had this quality. Harder to do than to talk about, of course, but if it was possible in principle, why not try? So I decided to take a shot at it. It took 4 years, from March 26, 2015 to October 12, 2019. It was fortunate that I had a precisely defined goal, or it would have been hard to keep at it for so long.
|
||||
|
||||
I wrote this new Lisp, called Bel, in itself in Arc. That may sound like a contradiction, but it's an indication of the sort of trickery I had to engage in to make this work. By means of an egregious collection of hacks I managed to make something close enough to an interpreter written in itself that could actually run. Not fast, but fast enough to test.
|
||||
|
||||
I had to ban myself from writing essays during most of this time, or I'd never have finished. In late 2015 I spent 3 months writing essays, and when I went back to working on Bel I could barely understand the code. Not so much because it was badly written as because the problem is so convoluted. When you're working on an interpreter written in itself, it's hard to keep track of what's happening at what level, and errors can be practically encrypted by the time you get them.
|
||||
|
||||
So I said no more essays till Bel was done. But I told few people about Bel while I was working on it. So for years it must have seemed that I was doing nothing, when in fact I was working harder than I'd ever worked on anything. Occasionally after wrestling for hours with some gruesome bug I'd check Twitter or HN and see someone asking "Does Paul Graham still code?"
|
||||
|
||||
Working on Bel was hard but satisfying. I worked on it so intensively that at any given time I had a decent chunk of the code in my head and could write more there. I remember taking the boys to the coast on a sunny day in 2015 and figuring out how to deal with some problem involving continuations while I watched them play in the tide pools. It felt like I was doing life right. I remember that because I was slightly dismayed at how novel it felt. The good news is that I had more moments like this over the next few years.
|
||||
|
||||
In the summer of 2016 we moved to England. We wanted our kids to see what it was like living in another country, and since I was a British citizen by birth, that seemed the obvious choice. We only meant to stay for a year, but we liked it so much that we still live there. So most of Bel was written in England.
|
||||
|
||||
In the fall of 2019, Bel was finally finished. Like McCarthy's original Lisp, it's a spec rather than an implementation, although like McCarthy's Lisp it's a spec expressed as code.
|
||||
|
||||
Now that I could write essays again, I wrote a bunch about topics I'd had stacked up. I kept writing essays through 2020, but I also started to think about other things I could work on. How should I choose what to do? Well, how had I chosen what to work on in the past? I wrote an essay for myself to answer that question, and I was surprised how long and messy the answer turned out to be. If this surprised me, who'd lived it, then I thought perhaps it would be interesting to other people, and encouraging to those with similarly messy lives. So I wrote a more detailed version for others to read, and this is the last sentence of it.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Notes
|
||||
|
||||
[1] My experience skipped a step in the evolution of computers: time-sharing machines with interactive OSes. I went straight from batch processing to microcomputers, which made microcomputers seem all the more exciting.
|
||||
|
||||
[2] Italian words for abstract concepts can nearly always be predicted from their English cognates (except for occasional traps like polluzione). It's the everyday words that differ. So if you string together a lot of abstract concepts with a few simple verbs, you can make a little Italian go a long way.
|
||||
|
||||
[3] I lived at Piazza San Felice 4, so my walk to the Accademia went straight down the spine of old Florence: past the Pitti, across the bridge, past Orsanmichele, between the Duomo and the Baptistery, and then up Via Ricasoli to Piazza San Marco. I saw Florence at street level in every possible condition, from empty dark winter evenings to sweltering summer days when the streets were packed with tourists.
|
||||
|
||||
[4] You can of course paint people like still lives if you want to, and they're willing. That sort of portrait is arguably the apex of still life painting, though the long sitting does tend to produce pained expressions in the sitters.
|
||||
|
||||
[5] Interleaf was one of many companies that had smart people and built impressive technology, and yet got crushed by Moore's Law. In the 1990s the exponential growth in the power of commodity (i.e. Intel) processors rolled up high-end, special-purpose hardware and software companies like a bulldozer.
|
||||
|
||||
[6] The signature style seekers at RISD weren't specifically mercenary. In the art world, money and coolness are tightly coupled. Anything expensive comes to be seen as cool, and anything seen as cool will soon become equally expensive.
|
||||
|
||||
[7] Technically the apartment wasn't rent-controlled but rent-stabilized, but this is a refinement only New Yorkers would know or care about. The point is that it was really cheap, less than half market price.
|
||||
|
||||
[8] Most software you can launch as soon as it's done. But when the software is an online store builder and you're hosting the stores, if you don't have any users yet, that fact will be painfully obvious. So before we could launch publicly we had to launch privately, in the sense of recruiting an initial set of users and making sure they had decent-looking stores.
|
||||
|
||||
[9] We'd had a code editor in Viaweb for users to define their own page styles. They didn't know it, but they were editing Lisp expressions underneath. But this wasn't an app editor, because the code ran when the merchants' sites were generated, not when shoppers visited them.
|
||||
|
||||
[10] This was the first instance of what is now a familiar experience, and so was what happened next, when I read the comments and found they were full of angry people. How could I claim that Lisp was better than other languages? Weren't they all Turing complete? People who see the responses to essays I write sometimes tell me how sorry they feel for me, but I'm not exaggerating when I reply that it has always been like this, since the very beginning. It comes with the territory. An essay must tell readers things they don't already know, and some people dislike being told such things.
|
||||
|
||||
[11] People put plenty of stuff on the internet in the 90s of course, but putting something online is not the same as publishing it online. Publishing online means you treat the online version as the (or at least a) primary version.
|
||||
|
||||
[12] There is a general lesson here that our experience with Y Combinator also teaches: Customs continue to constrain you long after the restrictions that caused them have disappeared. Customary VC practice had once, like the customs about publishing essays, been based on real constraints. Startups had once been much more expensive to start, and proportionally rare. Now they could be cheap and common, but the VCs' customs still reflected the old world, just as customs about writing essays still reflected the constraints of the print era.
|
||||
|
||||
Which in turn implies that people who are independent-minded (i.e. less influenced by custom) will have an advantage in fields affected by rapid change (where customs are more likely to be obsolete).
|
||||
|
||||
Here's an interesting point, though: you can't always predict which fields will be affected by rapid change. Obviously software and venture capital will be, but who would have predicted that essay writing would be?
|
||||
|
||||
[13] Y Combinator was not the original name. At first we were called Cambridge Seed. But we didn't want a regional name, in case someone copied us in Silicon Valley, so we renamed ourselves after one of the coolest tricks in the lambda calculus, the Y combinator.
|
||||
|
||||
I picked orange as our color partly because it's the warmest, and partly because no VC used it. In 2005 all the VCs used staid colors like maroon, navy blue, and forest green, because they were trying to appeal to LPs, not founders. The YC logo itself is an inside joke: the Viaweb logo had been a white V on a red circle, so I made the YC logo a white Y on an orange square.
|
||||
|
||||
[14] YC did become a fund for a couple years starting in 2009, because it was getting so big I could no longer afford to fund it personally. But after Heroku got bought we had enough money to go back to being self-funded.
|
||||
|
||||
[15] I've never liked the term "deal flow," because it implies that the number of new startups at any given time is fixed. This is not only false, but it's the purpose of YC to falsify it, by causing startups to be founded that would not otherwise have existed.
|
||||
|
||||
[16] She reports that they were all different shapes and sizes, because there was a run on air conditioners and she had to get whatever she could, but that they were all heavier than she could carry now.
|
||||
|
||||
[17] Another problem with HN was a bizarre edge case that occurs when you both write essays and run a forum. When you run a forum, you're assumed to see if not every conversation, at least every conversation involving you. And when you write essays, people post highly imaginative misinterpretations of them on forums. Individually these two phenomena are tedious but bearable, but the combination is disastrous. You actually have to respond to the misinterpretations, because the assumption that you're present in the conversation means that not responding to any sufficiently upvoted misinterpretation reads as a tacit admission that it's correct. But that in turn encourages more; anyone who wants to pick a fight with you senses that now is their chance.
|
||||
|
||||
[18] The worst thing about leaving YC was not working with Jessica anymore. We'd been working on YC almost the whole time we'd known each other, and we'd neither tried nor wanted to separate it from our personal lives, so leaving was like pulling up a deeply rooted tree.
|
||||
|
||||
[19] One way to get more precise about the concept of invented vs discovered is to talk about space aliens. Any sufficiently advanced alien civilization would certainly know about the Pythagorean theorem, for example. I believe, though with less certainty, that they would also know about the Lisp in McCarthy's 1960 paper.
|
||||
|
||||
But if so there's no reason to suppose that this is the limit of the language that might be known to them. Presumably aliens need numbers and errors and I/O too. So it seems likely there exists at least one path out of McCarthy's Lisp along which discoveredness is preserved.
|
||||
|
||||
|
||||
|
||||
Thanks to Trevor Blackwell, John Collison, Patrick Collison, Daniel Gackle, Ralph Hazell, Jessica Livingston, Robert Morris, and Harj Taggar for reading drafts of this.
|
||||
@@ -7,8 +7,11 @@ Full documentation on all methods, classes, and APIs in LangChain.
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
./reference/models.rst
|
||||
./reference/prompts.rst
|
||||
LLMs<./reference/modules/llms>
|
||||
./reference/utils.rst
|
||||
Chains<./reference/modules/chains>
|
||||
Agents<./reference/modules/agents>
|
||||
./reference/indexes.rst
|
||||
./reference/modules/memory.rst
|
||||
./reference/modules/chains.rst
|
||||
./reference/agents.rst
|
||||
./reference/modules/utilities.rst
|
||||
./reference/modules/experimental.rst
|
||||
|
||||
12
docs/reference/agents.rst
Normal file
12
docs/reference/agents.rst
Normal file
@@ -0,0 +1,12 @@
|
||||
Agents
|
||||
==============
|
||||
|
||||
Reference guide for Agents and associated abstractions.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
modules/agents
|
||||
modules/tools
|
||||
modules/agent_toolkits
|
||||
16
docs/reference/indexes.rst
Normal file
16
docs/reference/indexes.rst
Normal file
@@ -0,0 +1,16 @@
|
||||
Indexes
|
||||
==============
|
||||
Indexes refer to ways to structure documents so that LLMs can best interact with them.
|
||||
LangChain has a number of modules that help you load, structure, store, and retrieve documents.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
modules/docstore
|
||||
modules/text_splitter
|
||||
modules/document_loaders
|
||||
modules/vectorstores
|
||||
modules/retrievers
|
||||
modules/document_compressors
|
||||
modules/document_transformers
|
||||
@@ -45,6 +45,8 @@ The following use cases require specific installs and api keys:
|
||||
- Set up Elasticsearch backend. If you want to do locally, [this](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/getting-started.html) is a good guide.
|
||||
- _FAISS_:
|
||||
- Install requirements with `pip install faiss` for Python 3.7 and `pip install faiss-cpu` for Python 3.10+.
|
||||
- _MyScale_
|
||||
- Install requirements with `pip install clickhouse-connect`. For documentations, please refer to [this document](https://docs.myscale.com/en/overview/).
|
||||
- _Manifest_:
|
||||
- Install requirements with `pip install manifest-ml` (Note: this is only available in Python 3.8+ currently).
|
||||
- _OpenSearch_:
|
||||
|
||||
12
docs/reference/models.rst
Normal file
12
docs/reference/models.rst
Normal file
@@ -0,0 +1,12 @@
|
||||
Models
|
||||
==============
|
||||
|
||||
LangChain provides interfaces and integrations for a number of different types of models.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
modules/llms
|
||||
modules/chat_models
|
||||
modules/embeddings
|
||||
7
docs/reference/modules/agent_toolkits.rst
Normal file
7
docs/reference/modules/agent_toolkits.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
Agent Toolkits
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.agents.agent_toolkits
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
7
docs/reference/modules/chat_models.rst
Normal file
7
docs/reference/modules/chat_models.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
Chat Models
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.chat_models
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
7
docs/reference/modules/document_compressors.rst
Normal file
7
docs/reference/modules/document_compressors.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
Document Compressors
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.retrievers.document_compressors
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
7
docs/reference/modules/document_loaders.rst
Normal file
7
docs/reference/modules/document_loaders.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
Document Loaders
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.document_loaders
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
7
docs/reference/modules/document_transformers.rst
Normal file
7
docs/reference/modules/document_transformers.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
Document Transformers
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.document_transformers
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
28
docs/reference/modules/experimental.rst
Normal file
28
docs/reference/modules/experimental.rst
Normal file
@@ -0,0 +1,28 @@
|
||||
==========
|
||||
Experimental Modules
|
||||
==========
|
||||
|
||||
This module contains experimental modules and reproductions of existing work using LangChain primitives.
|
||||
|
||||
Autonomous Agents
|
||||
------------------
|
||||
|
||||
Here, we document the BabyAGI and AutoGPT classes from the langchain.experimental module.
|
||||
|
||||
.. autoclass:: langchain.experimental.BabyAGI
|
||||
:members:
|
||||
|
||||
.. autoclass:: langchain.experimental.AutoGPT
|
||||
:members:
|
||||
|
||||
|
||||
Generative Agents
|
||||
------------------
|
||||
|
||||
Here, we document the GenerativeAgent and GenerativeAgentMemory classes from the langchain.experimental module.
|
||||
|
||||
.. autoclass:: langchain.experimental.GenerativeAgent
|
||||
:members:
|
||||
|
||||
.. autoclass:: langchain.experimental.GenerativeAgentMemory
|
||||
:members:
|
||||
7
docs/reference/modules/memory.rst
Normal file
7
docs/reference/modules/memory.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
Memory
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.memory
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
7
docs/reference/modules/output_parsers.rst
Normal file
7
docs/reference/modules/output_parsers.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
Output Parsers
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.output_parsers
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
7
docs/reference/modules/retrievers.rst
Normal file
7
docs/reference/modules/retrievers.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
Retrievers
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.retrievers
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
7
docs/reference/modules/tools.rst
Normal file
7
docs/reference/modules/tools.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
Tools
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.tools
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
7
docs/reference/modules/utilities.rst
Normal file
7
docs/reference/modules/utilities.rst
Normal file
@@ -0,0 +1,7 @@
|
||||
Utilities
|
||||
===============================
|
||||
|
||||
.. automodule:: langchain.utilities
|
||||
:members:
|
||||
:undoc-members:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
VectorStores
|
||||
Vector Stores
|
||||
=============================
|
||||
|
||||
.. automodule:: langchain.vectorstores
|
||||
@@ -7,5 +7,6 @@ The reference guides here all relate to objects for working with Prompts.
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
modules/prompt
|
||||
modules/prompts
|
||||
modules/example_selector
|
||||
modules/output_parsers
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
Utilities
|
||||
==============
|
||||
|
||||
There are a lot of different utilities that LangChain provides integrations for
|
||||
These guides go over how to use them.
|
||||
These can largely be grouped into two categories: generic utilities, and then utilities for working with larger text documents.
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
:caption: Generic Utilities
|
||||
|
||||
modules/python
|
||||
modules/serpapi
|
||||
modules/searx_search
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
:caption: Utilities for working with Documents
|
||||
|
||||
modules/docstore
|
||||
modules/text_splitter
|
||||
modules/embeddings
|
||||
modules/vectorstore
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
758
docs/use_cases/agents/wikibase_agent.ipynb
Normal file
758
docs/use_cases/agents/wikibase_agent.ipynb
Normal file
@@ -0,0 +1,758 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5e3cb542-933d-4bf3-a82b-d9d6395a7832",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"# Wikibase Agent\n",
|
||||
"\n",
|
||||
"This notebook demonstrates a very simple wikibase agent that uses sparql generation. Although this code is intended to work against any\n",
|
||||
"wikibase instance, we use http://wikidata.org for testing.\n",
|
||||
"\n",
|
||||
"If you are interested in wikibases and sparql, please consider helping to improve this agent. Look [here](https://github.com/donaldziff/langchain-wikibase) for more details and open questions.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "07d42966-7e99-4157-90dc-6704977dcf1b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Preliminaries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9132f093-c61e-4b8d-abef-91ebef3fc85f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"### API keys and other secrats\n",
|
||||
"\n",
|
||||
"We use an `.ini` file, like this: \n",
|
||||
"```\n",
|
||||
"[OPENAI]\n",
|
||||
"OPENAI_API_KEY=xyzzy\n",
|
||||
"[WIKIDATA]\n",
|
||||
"WIKIDATA_USER_AGENT_HEADER=argle-bargle\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "99567dfd-05a7-412f-abf0-9b9f4424acbd",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['./secrets.ini']"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import configparser\n",
|
||||
"config = configparser.ConfigParser()\n",
|
||||
"config.read('./secrets.ini')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "332b6658-c978-41ca-a2be-4f8677fecaef",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"### OpenAI API Key\n",
|
||||
"\n",
|
||||
"An OpenAI API key is required unless you modify the code below to use another LLM provider."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "dd328ee2-33cc-4e1e-aff7-cc0a2e05e2e6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai_api_key = config['OPENAI']['OPENAI_API_KEY']\n",
|
||||
"import os\n",
|
||||
"os.environ.update({'OPENAI_API_KEY': openai_api_key})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "42a9311b-600d-42bc-b000-2692ef87a213",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"### Wikidata user-agent header\n",
|
||||
"\n",
|
||||
"Wikidata policy requires a user-agent header. See https://meta.wikimedia.org/wiki/User-Agent_policy. However, at present this policy is not strictly enforced."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "17ba657e-789d-40e1-b4b7-4f29ba06fe79",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"wikidata_user_agent_header = None if not config.has_section('WIKIDATA') else config['WIKIDATA']['WIKIDAtA_USER_AGENT_HEADER']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "db08d308-050a-4fc8-93c9-8de4ae977ac3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Enable tracing if desired"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "77d2da08-fccd-4676-b77e-c0e89bf343cb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#import os\n",
|
||||
"#os.environ[\"LANGCHAIN_HANDLER\"] = \"langchain\"\n",
|
||||
"#os.environ[\"LANGCHAIN_SESSION\"] = \"default\" # Make sure this session actually exists. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3dbc5bfc-48ce-4f90-873c-7336b21300c6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tools\n",
|
||||
"\n",
|
||||
"Three tools are provided for this simple agent:\n",
|
||||
"* `ItemLookup`: for finding the q-number of an item\n",
|
||||
"* `PropertyLookup`: for finding the p-number of a property\n",
|
||||
"* `SparqlQueryRunner`: for running a sparql query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f801b4e-6576-4914-aa4f-6f4c4e3c7924",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Item and Property lookup\n",
|
||||
"\n",
|
||||
"Item and Property lookup are implemented in a single method, using an elastic search endpoint. Not all wikibase instances have it, but wikidata does, and that's where we'll start."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "42d23f0a-1c74-4c9c-85f2-d0e24204e96a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_nested_value(o: dict, path: list) -> any:\n",
|
||||
" current = o\n",
|
||||
" for key in path:\n",
|
||||
" try:\n",
|
||||
" current = current[key]\n",
|
||||
" except:\n",
|
||||
" return None\n",
|
||||
" return current\n",
|
||||
"\n",
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"from typing import Optional\n",
|
||||
"\n",
|
||||
"def vocab_lookup(search: str, entity_type: str = \"item\",\n",
|
||||
" url: str = \"https://www.wikidata.org/w/api.php\",\n",
|
||||
" user_agent_header: str = wikidata_user_agent_header,\n",
|
||||
" srqiprofile: str = None,\n",
|
||||
" ) -> Optional[str]: \n",
|
||||
" headers = {\n",
|
||||
" 'Accept': 'application/json'\n",
|
||||
" }\n",
|
||||
" if wikidata_user_agent_header is not None:\n",
|
||||
" headers['User-Agent'] = wikidata_user_agent_header\n",
|
||||
" \n",
|
||||
" if entity_type == \"item\":\n",
|
||||
" srnamespace = 0\n",
|
||||
" srqiprofile = \"classic_noboostlinks\" if srqiprofile is None else srqiprofile\n",
|
||||
" elif entity_type == \"property\":\n",
|
||||
" srnamespace = 120\n",
|
||||
" srqiprofile = \"classic\" if srqiprofile is None else srqiprofile\n",
|
||||
" else:\n",
|
||||
" raise ValueError(\"entity_type must be either 'property' or 'item'\") \n",
|
||||
" \n",
|
||||
" params = {\n",
|
||||
" \"action\": \"query\",\n",
|
||||
" \"list\": \"search\",\n",
|
||||
" \"srsearch\": search,\n",
|
||||
" \"srnamespace\": srnamespace,\n",
|
||||
" \"srlimit\": 1,\n",
|
||||
" \"srqiprofile\": srqiprofile,\n",
|
||||
" \"srwhat\": 'text',\n",
|
||||
" \"format\": \"json\"\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
" response = requests.get(url, headers=headers, params=params)\n",
|
||||
" \n",
|
||||
" if response.status_code == 200:\n",
|
||||
" title = get_nested_value(response.json(), ['query', 'search', 0, 'title'])\n",
|
||||
" if title is None:\n",
|
||||
" return f\"I couldn't find any {entity_type} for '{search}'. Please rephrase your request and try again\"\n",
|
||||
" # if there is a prefix, strip it off\n",
|
||||
" return title.split(':')[-1]\n",
|
||||
" else:\n",
|
||||
" return \"Sorry, I got an error. Please try again.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "e52060fa-3614-43fb-894e-54e9b75d1e9f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Q4180017\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(vocab_lookup(\"Malin 1\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "b23ab322-b2cf-404e-b36f-2bfc1d79b0d3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"P31\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(vocab_lookup(\"instance of\", entity_type=\"property\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "89020cc8-104e-42d0-ac32-885e590de515",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"I couldn't find any item for 'Ceci n'est pas un q-item'. Please rephrase your request and try again\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(vocab_lookup(\"Ceci n'est pas un q-item\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "78d66d8b-0e34-4d3f-a18d-c7284840ac76",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Sparql runner "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c6f60069-fbe0-4015-87fb-0e487cd914e7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This tool runs sparql - by default, wikidata is used."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "b5b97a4d-2a39-4993-88d9-e7818c0a2853",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"from typing import List, Dict, Any\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"def run_sparql(query: str, url='https://query.wikidata.org/sparql',\n",
|
||||
" user_agent_header: str = wikidata_user_agent_header) -> List[Dict[str, Any]]:\n",
|
||||
" headers = {\n",
|
||||
" 'Accept': 'application/json'\n",
|
||||
" }\n",
|
||||
" if wikidata_user_agent_header is not None:\n",
|
||||
" headers['User-Agent'] = wikidata_user_agent_header\n",
|
||||
"\n",
|
||||
" response = requests.get(url, headers=headers, params={'query': query, 'format': 'json'})\n",
|
||||
"\n",
|
||||
" if response.status_code != 200:\n",
|
||||
" return \"That query failed. Perhaps you could try a different one?\"\n",
|
||||
" results = get_nested_value(response.json(),['results', 'bindings'])\n",
|
||||
" return json.dumps(results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "149722ec-8bc1-4d4f-892b-e4ddbe8444c1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'[{\"count\": {\"datatype\": \"http://www.w3.org/2001/XMLSchema#integer\", \"type\": \"literal\", \"value\": \"20\"}}]'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"run_sparql(\"SELECT (COUNT(?children) as ?count) WHERE { wd:Q1339 wdt:P40 ?children . }\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9f0302fd-ba35-4acc-ba32-1d7c9295c898",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3122a961-9673-4a52-b1cd-7d62fbdf8d96",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Wrap the tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "cc41ae88-2e53-4363-9878-28b26430cb1e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser\n",
|
||||
"from langchain.prompts import StringPromptTemplate\n",
|
||||
"from langchain import OpenAI, LLMChain\n",
|
||||
"from typing import List, Union\n",
|
||||
"from langchain.schema import AgentAction, AgentFinish\n",
|
||||
"import re"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "2810a3ce-b9c6-47ee-8068-12ca967cd0ea",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define which tools the agent can use to answer user queries\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name = \"ItemLookup\",\n",
|
||||
" func=(lambda x: vocab_lookup(x, entity_type=\"item\")),\n",
|
||||
" description=\"useful for when you need to know the q-number for an item\"\n",
|
||||
" ),\n",
|
||||
" Tool(\n",
|
||||
" name = \"PropertyLookup\",\n",
|
||||
" func=(lambda x: vocab_lookup(x, entity_type=\"property\")),\n",
|
||||
" description=\"useful for when you need to know the p-number for a property\"\n",
|
||||
" ),\n",
|
||||
" Tool(\n",
|
||||
" name = \"SparqlQueryRunner\",\n",
|
||||
" func=run_sparql,\n",
|
||||
" description=\"useful for getting results from a wikibase\"\n",
|
||||
" ) \n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ab0f2778-a195-4a4a-a5b4-c1e809e1fb7b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prompts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "7bd4ba4f-57d6-4ceb-b932-3cb0d0509a24",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set up the base template\n",
|
||||
"template = \"\"\"\n",
|
||||
"Answer the following questions by running a sparql query against a wikibase where the p and q items are \n",
|
||||
"completely unknown to you. You will need to discover the p and q items before you can generate the sparql.\n",
|
||||
"Do not assume you know the p and q items for any concepts. Always use tools to find all p and q items.\n",
|
||||
"After you generate the sparql, you should run it. The results will be returned in json. \n",
|
||||
"Summarize the json results in natural language.\n",
|
||||
"\n",
|
||||
"You may assume the following prefixes:\n",
|
||||
"PREFIX wd: <http://www.wikidata.org/entity/>\n",
|
||||
"PREFIX wdt: <http://www.wikidata.org/prop/direct/>\n",
|
||||
"PREFIX p: <http://www.wikidata.org/prop/>\n",
|
||||
"PREFIX ps: <http://www.wikidata.org/prop/statement/>\n",
|
||||
"\n",
|
||||
"When generating sparql:\n",
|
||||
"* Try to avoid \"count\" and \"filter\" queries if possible\n",
|
||||
"* Never enclose the sparql in back-quotes\n",
|
||||
"\n",
|
||||
"You have access to the following tools:\n",
|
||||
"\n",
|
||||
"{tools}\n",
|
||||
"\n",
|
||||
"Use the following format:\n",
|
||||
"\n",
|
||||
"Question: the input question for which you must provide a natural language answer\n",
|
||||
"Thought: you should always think about what to do\n",
|
||||
"Action: the action to take, should be one of [{tool_names}]\n",
|
||||
"Action Input: the input to the action\n",
|
||||
"Observation: the result of the action\n",
|
||||
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
|
||||
"Thought: I now know the final answer\n",
|
||||
"Final Answer: the final answer to the original input question\n",
|
||||
"\n",
|
||||
"Question: {input}\n",
|
||||
"{agent_scratchpad}\"\"\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "7e8d771a-64bb-4ec8-b472-6a9a40c6dd38",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set up a prompt template\n",
|
||||
"class CustomPromptTemplate(StringPromptTemplate):\n",
|
||||
" # The template to use\n",
|
||||
" template: str\n",
|
||||
" # The list of tools available\n",
|
||||
" tools: List[Tool]\n",
|
||||
" \n",
|
||||
" def format(self, **kwargs) -> str:\n",
|
||||
" # Get the intermediate steps (AgentAction, Observation tuples)\n",
|
||||
" # Format them in a particular way\n",
|
||||
" intermediate_steps = kwargs.pop(\"intermediate_steps\")\n",
|
||||
" thoughts = \"\"\n",
|
||||
" for action, observation in intermediate_steps:\n",
|
||||
" thoughts += action.log\n",
|
||||
" thoughts += f\"\\nObservation: {observation}\\nThought: \"\n",
|
||||
" # Set the agent_scratchpad variable to that value\n",
|
||||
" kwargs[\"agent_scratchpad\"] = thoughts\n",
|
||||
" # Create a tools variable from the list of tools provided\n",
|
||||
" kwargs[\"tools\"] = \"\\n\".join([f\"{tool.name}: {tool.description}\" for tool in self.tools])\n",
|
||||
" # Create a list of tool names for the tools provided\n",
|
||||
" kwargs[\"tool_names\"] = \", \".join([tool.name for tool in self.tools])\n",
|
||||
" return self.template.format(**kwargs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "f97dca78-fdde-4a70-9137-e34a21d14e64",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt = CustomPromptTemplate(\n",
|
||||
" template=template,\n",
|
||||
" tools=tools,\n",
|
||||
" # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically\n",
|
||||
" # This includes the `intermediate_steps` variable because that is needed\n",
|
||||
" input_variables=[\"input\", \"intermediate_steps\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "12c57d77-3c1e-4cde-9a83-7d2134392479",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Output parser \n",
|
||||
"This is unchanged from langchain docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "42da05eb-c103-4649-9d20-7143a8880721",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class CustomOutputParser(AgentOutputParser):\n",
|
||||
" \n",
|
||||
" def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:\n",
|
||||
" # Check if agent should finish\n",
|
||||
" if \"Final Answer:\" in llm_output:\n",
|
||||
" return AgentFinish(\n",
|
||||
" # Return values is generally always a dictionary with a single `output` key\n",
|
||||
" # It is not recommended to try anything else at the moment :)\n",
|
||||
" return_values={\"output\": llm_output.split(\"Final Answer:\")[-1].strip()},\n",
|
||||
" log=llm_output,\n",
|
||||
" )\n",
|
||||
" # Parse out the action and action input\n",
|
||||
" regex = r\"Action: (.*?)[\\n]*Action Input:[\\s]*(.*)\"\n",
|
||||
" match = re.search(regex, llm_output, re.DOTALL)\n",
|
||||
" if not match:\n",
|
||||
" raise ValueError(f\"Could not parse LLM output: `{llm_output}`\")\n",
|
||||
" action = match.group(1).strip()\n",
|
||||
" action_input = match.group(2)\n",
|
||||
" # Return the action and action input\n",
|
||||
" return AgentAction(tool=action, tool_input=action_input.strip(\" \").strip('\"'), log=llm_output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "d2b4d710-8cc9-4040-9269-59cf6c5c22be",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"output_parser = CustomOutputParser()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "48a758cb-93a7-4555-b69a-896d2d43c6f0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Specify the LLM model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "72988c79-8f60-4b0f-85ee-6af32e8de9c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-4\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "95685d14-647a-4e24-ae2c-a8dd1e364921",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Agent and agent executor"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "13d55765-bfa1-43b3-b7cb-00f52ebe7747",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# LLM chain consisting of the LLM and a prompt\n",
|
||||
"llm_chain = LLMChain(llm=llm, prompt=prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "b3f7ac3c-398e-49f9-baed-554f49a191c3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tool_names = [tool.name for tool in tools]\n",
|
||||
"agent = LLMSingleActionAgent(\n",
|
||||
" llm_chain=llm_chain, \n",
|
||||
" output_parser=output_parser,\n",
|
||||
" stop=[\"\\nObservation:\"], \n",
|
||||
" allowed_tools=tool_names\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "65740577-272e-4853-8d47-b87784cfaba0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "66e3d13b-77cf-41d3-b541-b54535c14459",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run it!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "6e97a07c-d7bf-4a35-9ab2-b59ae865c62c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# If you prefer in-line tracing, uncomment this line\n",
|
||||
"# agent_executor.agent.llm_chain.verbose = True"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "a11ca60d-f57b-4fe8-943e-a258e37463c7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find the Q number for J.S. Bach.\n",
|
||||
"Action: ItemLookup\n",
|
||||
"Action Input: J.S. Bach\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3mQ1339\u001b[0m\u001b[32;1m\u001b[1;3mI need to find the P number for children.\n",
|
||||
"Action: PropertyLookup\n",
|
||||
"Action Input: children\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[33;1m\u001b[1;3mP1971\u001b[0m\u001b[32;1m\u001b[1;3mNow I can query the number of children J.S. Bach had.\n",
|
||||
"Action: SparqlQueryRunner\n",
|
||||
"Action Input: SELECT ?children WHERE { wd:Q1339 wdt:P1971 ?children }\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[38;5;200m\u001b[1;3m[{\"children\": {\"datatype\": \"http://www.w3.org/2001/XMLSchema#decimal\", \"type\": \"literal\", \"value\": \"20\"}}]\u001b[0m\u001b[32;1m\u001b[1;3mI now know the final answer.\n",
|
||||
"Final Answer: J.S. Bach had 20 children.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'J.S. Bach had 20 children.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"How many children did J.S. Bach have?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "d0b42a41-996b-4156-82e4-f0651a87ee34",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: To find Hakeem Olajuwon's Basketball-Reference.com NBA player ID, I need to first find his Wikidata item (Q-number) and then query for the relevant property (P-number).\n",
|
||||
"Action: ItemLookup\n",
|
||||
"Action Input: Hakeem Olajuwon\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3mQ273256\u001b[0m\u001b[32;1m\u001b[1;3mNow that I have Hakeem Olajuwon's Wikidata item (Q273256), I need to find the P-number for the Basketball-Reference.com NBA player ID property.\n",
|
||||
"Action: PropertyLookup\n",
|
||||
"Action Input: Basketball-Reference.com NBA player ID\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[33;1m\u001b[1;3mP2685\u001b[0m\u001b[32;1m\u001b[1;3mNow that I have both the Q-number for Hakeem Olajuwon (Q273256) and the P-number for the Basketball-Reference.com NBA player ID property (P2685), I can run a SPARQL query to get the ID value.\n",
|
||||
"Action: SparqlQueryRunner\n",
|
||||
"Action Input: \n",
|
||||
"SELECT ?playerID WHERE {\n",
|
||||
" wd:Q273256 wdt:P2685 ?playerID .\n",
|
||||
"}\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[38;5;200m\u001b[1;3m[{\"playerID\": {\"type\": \"literal\", \"value\": \"o/olajuha01\"}}]\u001b[0m\u001b[32;1m\u001b[1;3mI now know the final answer\n",
|
||||
"Final Answer: Hakeem Olajuwon's Basketball-Reference.com NBA player ID is \"o/olajuha01\".\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Hakeem Olajuwon\\'s Basketball-Reference.com NBA player ID is \"o/olajuha01\".'"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"What is the Basketball-Reference.com NBA player ID of Hakeem Olajuwon?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "05fb3a3e-8a9f-482d-bd54-4c6e60ef60dd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "conda210",
|
||||
"language": "python",
|
||||
"name": "conda210"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -16,5 +16,6 @@ usage of LangChain's collection of tools.
|
||||
- [Baby AGI with Tools](autonomous_agents/baby_agi_with_agent.ipynb): building off the above notebook, this example substitutes in an agent with tools as the execution tools, allowing it to actually take actions.
|
||||
|
||||
|
||||
## AutoGPT ([Original Repo] (https://github.com/Significant-Gravitas/Auto-GPT))
|
||||
## AutoGPT ([Original Repo](https://github.com/Significant-Gravitas/Auto-GPT))
|
||||
- [AutoGPT](autonomous_agents/autogpt.ipynb): a notebook implementing AutoGPT in LangChain primitives
|
||||
- [WebSearch Research Assistant](autonomous_agents/marathon_times.ipynb): a notebook showing how to use AutoGPT plus specific tools to act as research assistant that can use the web.
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "7c2c9b54",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -56,7 +56,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "72bc204d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -68,7 +68,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "1df7b724",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -95,18 +95,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "3393bc23",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.experimental.autonomous_agents.autogpt.agent import AutoGPT\n",
|
||||
"from langchain.experimental import AutoGPT\n",
|
||||
"from langchain.chat_models import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "709c08c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -134,7 +134,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "c032b182",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
@@ -167,9 +167,9 @@
|
||||
"4. Exclusively use the commands listed in double quotes e.g. \"command name\"\n",
|
||||
"\n",
|
||||
"Commands:\n",
|
||||
"1. search: useful for when you need to answer questions about current events. You should ask targeted questions, args: \"tool_input\": \"\"\n",
|
||||
"2. write_file: Write file to disk, args: \"file_path\": \"name of file\", \"text\": \"text to write to file\"\n",
|
||||
"3. read_file: Read file from disk, args: \"file_path\": \"name of file\"\n",
|
||||
"1. search: useful for when you need to answer questions about current events. You should ask targeted questions, args json schema: {\"query\": {\"title\": \"Query\", \"type\": \"string\"}}\n",
|
||||
"2. write_file: Write file to disk, args json schema: {\"file_path\": {\"title\": \"File Path\", \"description\": \"name of file\", \"type\": \"string\"}, \"text\": {\"title\": \"Text\", \"description\": \"text to write to file\", \"type\": \"string\"}}\n",
|
||||
"3. read_file: Read file from disk, args json schema: {\"file_path\": {\"title\": \"File Path\", \"description\": \"name of file\", \"type\": \"string\"}}\n",
|
||||
"4. finish: use this to signal that you have finished all your objectives, args: \"response\": \"final response to let people know you have finished your objectives\"\n",
|
||||
"\n",
|
||||
"Resources:\n",
|
||||
@@ -202,7 +202,7 @@
|
||||
" }\n",
|
||||
"} \n",
|
||||
"Ensure the response can be parsed by Python json.loads\n",
|
||||
"System: The current time and date is Sun Apr 16 18:38:42 2023\n",
|
||||
"System: The current time and date is Tue Apr 18 21:31:28 2023\n",
|
||||
"System: This reminds you of these events from your past:\n",
|
||||
"[]\n",
|
||||
"\n",
|
||||
@@ -212,16 +212,16 @@
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I think I should start by writing the weather report for SF today. I can use the 'search' command to find the current weather conditions.\",\n",
|
||||
" \"reasoning\": \"The first step to completing my goals is to write the weather report for SF today. I can use the 'search' command to find the current weather conditions.\",\n",
|
||||
" \"plan\": \"- Use the 'search' command to find the current weather conditions.\\n- Write the weather report for SF today.\",\n",
|
||||
" \"criticism\": \"I need to make sure that I am accurate in my reporting of the weather conditions.\",\n",
|
||||
" \"speak\": \"I will use the 'search' command to find the current weather conditions and then write the weather report for SF today.\"\n",
|
||||
" \"text\": \"I will start by writing a weather report for San Francisco today. I will use the 'search' command to find the current weather conditions.\",\n",
|
||||
" \"reasoning\": \"I need to gather information about the current weather conditions in San Francisco to write an accurate weather report.\",\n",
|
||||
" \"plan\": \"- Use the 'search' command to find the current weather conditions in San Francisco\\n- Write a weather report based on the information gathered\",\n",
|
||||
" \"criticism\": \"I need to make sure that the information I gather is accurate and up-to-date.\",\n",
|
||||
" \"speak\": \"I will use the 'search' command to find the current weather conditions in San Francisco.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"search\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"tool_input\": \"current weather conditions in San Francisco\"\n",
|
||||
" \"query\": \"what is the current weather in san francisco\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
@@ -248,9 +248,9 @@
|
||||
"4. Exclusively use the commands listed in double quotes e.g. \"command name\"\n",
|
||||
"\n",
|
||||
"Commands:\n",
|
||||
"1. search: useful for when you need to answer questions about current events. You should ask targeted questions, args: \"tool_input\": \"\"\n",
|
||||
"2. write_file: Write file to disk, args: \"file_path\": \"name of file\", \"text\": \"text to write to file\"\n",
|
||||
"3. read_file: Read file from disk, args: \"file_path\": \"name of file\"\n",
|
||||
"1. search: useful for when you need to answer questions about current events. You should ask targeted questions, args json schema: {\"query\": {\"title\": \"Query\", \"type\": \"string\"}}\n",
|
||||
"2. write_file: Write file to disk, args json schema: {\"file_path\": {\"title\": \"File Path\", \"description\": \"name of file\", \"type\": \"string\"}, \"text\": {\"title\": \"Text\", \"description\": \"text to write to file\", \"type\": \"string\"}}\n",
|
||||
"3. read_file: Read file from disk, args json schema: {\"file_path\": {\"title\": \"File Path\", \"description\": \"name of file\", \"type\": \"string\"}}\n",
|
||||
"4. finish: use this to signal that you have finished all your objectives, args: \"response\": \"final response to let people know you have finished your objectives\"\n",
|
||||
"\n",
|
||||
"Resources:\n",
|
||||
@@ -283,28 +283,28 @@
|
||||
" }\n",
|
||||
"} \n",
|
||||
"Ensure the response can be parsed by Python json.loads\n",
|
||||
"System: The current time and date is Sun Apr 16 18:38:59 2023\n",
|
||||
"System: The current time and date is Tue Apr 18 21:31:39 2023\n",
|
||||
"System: This reminds you of these events from your past:\n",
|
||||
"['Assistant Reply: {\\n \"thoughts\": {\\n \"text\": \"I think I should start by writing the weather report for SF today. I can use the \\'search\\' command to find the current weather conditions.\",\\n \"reasoning\": \"The first step to completing my goals is to write the weather report for SF today. I can use the \\'search\\' command to find the current weather conditions.\",\\n \"plan\": \"- Use the \\'search\\' command to find the current weather conditions.\\\\n- Write the weather report for SF today.\",\\n \"criticism\": \"I need to make sure that I am accurate in my reporting of the weather conditions.\",\\n \"speak\": \"I will use the \\'search\\' command to find the current weather conditions and then write the weather report for SF today.\"\\n },\\n \"command\": {\\n \"name\": \"search\",\\n \"args\": {\\n \"tool_input\": \"current weather conditions in San Francisco\"\\n }\\n }\\n} \\nResult: Command search returned: Cloudy skies early, then partly cloudy in the afternoon. High 56F. Winds W at 15 to 25 mph. ']\n",
|
||||
"['Assistant Reply: {\\n \"thoughts\": {\\n \"text\": \"I will start by writing a weather report for San Francisco today. I will use the \\'search\\' command to find the current weather conditions.\",\\n \"reasoning\": \"I need to gather information about the current weather conditions in San Francisco to write an accurate weather report.\",\\n \"plan\": \"- Use the \\'search\\' command to find the current weather conditions in San Francisco\\\\n- Write a weather report based on the information gathered\",\\n \"criticism\": \"I need to make sure that the information I gather is accurate and up-to-date.\",\\n \"speak\": \"I will use the \\'search\\' command to find the current weather conditions in San Francisco.\"\\n },\\n \"command\": {\\n \"name\": \"search\",\\n \"args\": {\\n \"query\": \"what is the current weather in san francisco\"\\n }\\n }\\n} \\nResult: Command search returned: Current Weather ; 54°F · Sunny ; RealFeel® 66°. Pleasant. RealFeel Guide. Pleasant. 63° to 81°. Most consider this temperature range ideal. LEARN MORE. RealFeel ... ']\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Human: Determine which next command to use, and respond using the format specified above:\n",
|
||||
"AI: {\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I think I should start by writing the weather report for SF today. I can use the 'search' command to find the current weather conditions.\",\n",
|
||||
" \"reasoning\": \"The first step to completing my goals is to write the weather report for SF today. I can use the 'search' command to find the current weather conditions.\",\n",
|
||||
" \"plan\": \"- Use the 'search' command to find the current weather conditions.\\n- Write the weather report for SF today.\",\n",
|
||||
" \"criticism\": \"I need to make sure that I am accurate in my reporting of the weather conditions.\",\n",
|
||||
" \"speak\": \"I will use the 'search' command to find the current weather conditions and then write the weather report for SF today.\"\n",
|
||||
" \"text\": \"I will start by writing a weather report for San Francisco today. I will use the 'search' command to find the current weather conditions.\",\n",
|
||||
" \"reasoning\": \"I need to gather information about the current weather conditions in San Francisco to write an accurate weather report.\",\n",
|
||||
" \"plan\": \"- Use the 'search' command to find the current weather conditions in San Francisco\\n- Write a weather report based on the information gathered\",\n",
|
||||
" \"criticism\": \"I need to make sure that the information I gather is accurate and up-to-date.\",\n",
|
||||
" \"speak\": \"I will use the 'search' command to find the current weather conditions in San Francisco.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"search\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"tool_input\": \"current weather conditions in San Francisco\"\n",
|
||||
" \"query\": \"what is the current weather in san francisco\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"System: Command search returned: Cloudy skies early, then partly cloudy in the afternoon. High 56F. Winds W at 15 to 25 mph.\n",
|
||||
"System: Command search returned: Current Weather ; 54°F · Sunny ; RealFeel® 66°. Pleasant. RealFeel Guide. Pleasant. 63° to 81°. Most consider this temperature range ideal. LEARN MORE. RealFeel ...\n",
|
||||
"Human: Determine which next command to use, and respond using the format specified above:\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
@@ -316,17 +316,17 @@
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"Now that I have the current weather conditions for SF, I can write the weather report. I will use the 'write_file' command to save the report to a file.\",\n",
|
||||
" \"reasoning\": \"Now that I have the current weather conditions for SF, I can write the weather report. I will use the 'write_file' command to save the report to a file.\",\n",
|
||||
" \"plan\": \"- Use the 'write_file' command to save the weather report to a file.\",\n",
|
||||
" \"criticism\": \"I need to make sure that the weather report is clear and concise.\",\n",
|
||||
" \"speak\": \"I will use the 'write_file' command to save the weather report to a file.\"\n",
|
||||
" \"text\": \"I have found that the current weather in San Francisco is sunny with a temperature of 54°F. I will now write a weather report for San Francisco today using the 'write_file' command.\",\n",
|
||||
" \"reasoning\": \"I need to write a weather report for San Francisco today based on the information I gathered from the 'search' command.\",\n",
|
||||
" \"plan\": \"- Use the 'write_file' command to write a weather report for San Francisco today based on the information gathered\",\n",
|
||||
" \"criticism\": \"I need to make sure that the weather report is accurate and informative.\",\n",
|
||||
" \"speak\": \"I will use the 'write_file' command to write a weather report for San Francisco today.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"write_file\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"file_path\": \"weather_report.txt\",\n",
|
||||
" \"text\": \"Weather Report for San Francisco on April 16, 2023:\\n\\nCloudy skies early, then partly cloudy in the afternoon. High 56F. Winds W at 15 to 25 mph.\"\n",
|
||||
" \"text\": \"Weather Report for San Francisco Today:\\n\\nThe current weather in San Francisco is sunny with a temperature of 54°F. It is expected to remain sunny throughout the day with a high of 62°F and a low of 50°F. There is no chance of precipitation today. It is recommended to wear light clothing and sunscreen if spending time outdoors.\\n\\nStay safe and enjoy the beautiful weather!\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
@@ -353,9 +353,9 @@
|
||||
"4. Exclusively use the commands listed in double quotes e.g. \"command name\"\n",
|
||||
"\n",
|
||||
"Commands:\n",
|
||||
"1. search: useful for when you need to answer questions about current events. You should ask targeted questions, args: \"tool_input\": \"\"\n",
|
||||
"2. write_file: Write file to disk, args: \"file_path\": \"name of file\", \"text\": \"text to write to file\"\n",
|
||||
"3. read_file: Read file from disk, args: \"file_path\": \"name of file\"\n",
|
||||
"1. search: useful for when you need to answer questions about current events. You should ask targeted questions, args json schema: {\"query\": {\"title\": \"Query\", \"type\": \"string\"}}\n",
|
||||
"2. write_file: Write file to disk, args json schema: {\"file_path\": {\"title\": \"File Path\", \"description\": \"name of file\", \"type\": \"string\"}, \"text\": {\"title\": \"Text\", \"description\": \"text to write to file\", \"type\": \"string\"}}\n",
|
||||
"3. read_file: Read file from disk, args json schema: {\"file_path\": {\"title\": \"File Path\", \"description\": \"name of file\", \"type\": \"string\"}}\n",
|
||||
"4. finish: use this to signal that you have finished all your objectives, args: \"response\": \"final response to let people know you have finished your objectives\"\n",
|
||||
"\n",
|
||||
"Resources:\n",
|
||||
@@ -388,61 +388,27 @@
|
||||
" }\n",
|
||||
"} \n",
|
||||
"Ensure the response can be parsed by Python json.loads\n",
|
||||
"System: The current time and date is Sun Apr 16 18:39:10 2023\n",
|
||||
"System: The current time and date is Tue Apr 18 21:31:55 2023\n",
|
||||
"System: This reminds you of these events from your past:\n",
|
||||
"['Assistant Reply: {\\n \"thoughts\": {\\n \"text\": \"I think I should start by writing the weather report for SF today. I can use the \\'search\\' command to find the current weather conditions.\",\\n \"reasoning\": \"The first step to completing my goals is to write the weather report for SF today. I can use the \\'search\\' command to find the current weather conditions.\",\\n \"plan\": \"- Use the \\'search\\' command to find the current weather conditions.\\\\n- Write the weather report for SF today.\",\\n \"criticism\": \"I need to make sure that I am accurate in my reporting of the weather conditions.\",\\n \"speak\": \"I will use the \\'search\\' command to find the current weather conditions and then write the weather report for SF today.\"\\n },\\n \"command\": {\\n \"name\": \"search\",\\n \"args\": {\\n \"tool_input\": \"current weather conditions in San Francisco\"\\n }\\n }\\n} \\nResult: Command search returned: Cloudy skies early, then partly cloudy in the afternoon. High 56F. Winds W at 15 to 25 mph. ', 'Assistant Reply: {\\n \"thoughts\": {\\n \"text\": \"Now that I have the current weather conditions for SF, I can write the weather report. I will use the \\'write_file\\' command to save the report to a file.\",\\n \"reasoning\": \"Now that I have the current weather conditions for SF, I can write the weather report. I will use the \\'write_file\\' command to save the report to a file.\",\\n \"plan\": \"- Use the \\'write_file\\' command to save the weather report to a file.\",\\n \"criticism\": \"I need to make sure that the weather report is clear and concise.\",\\n \"speak\": \"I will use the \\'write_file\\' command to save the weather report to a file.\"\\n },\\n \"command\": {\\n \"name\": \"write_file\",\\n \"args\": {\\n \"file_path\": \"weather_report.txt\",\\n \"text\": \"Weather Report for San Francisco on April 16, 2023:\\\\n\\\\nCloudy skies early, then partly cloudy in the afternoon. High 56F. Winds W at 15 to 25 mph.\"\\n }\\n }\\n} \\nResult: Command write_file returned: File written to successfully. ']\n",
|
||||
"['Assistant Reply: {\\n \"thoughts\": {\\n \"text\": \"I have found that the current weather in San Francisco is sunny with a temperature of 54°F. I will now write a weather report for San Francisco today using the \\'write_file\\' command.\",\\n \"reasoning\": \"I need to write a weather report for San Francisco today based on the information I gathered from the \\'search\\' command.\",\\n \"plan\": \"- Use the \\'write_file\\' command to write a weather report for San Francisco today based on the information gathered\",\\n \"criticism\": \"I need to make sure that the weather report is accurate and informative.\",\\n \"speak\": \"I will use the \\'write_file\\' command to write a weather report for San Francisco today.\"\\n },\\n \"command\": {\\n \"name\": \"write_file\",\\n \"args\": {\\n \"file_path\": \"weather_report.txt\",\\n \"text\": \"Weather Report for San Francisco Today:\\\\n\\\\nThe current weather in San Francisco is sunny with a temperature of 54°F. It is expected to remain sunny throughout the day with a high of 62°F and a low of 50°F. There is no chance of precipitation today. It is recommended to wear light clothing and sunscreen if spending time outdoors.\\\\n\\\\nStay safe and enjoy the beautiful weather!\"\\n }\\n }\\n} \\nResult: Command write_file returned: File written to successfully. ', 'Assistant Reply: {\\n \"thoughts\": {\\n \"text\": \"I will start by writing a weather report for San Francisco today. I will use the \\'search\\' command to find the current weather conditions.\",\\n \"reasoning\": \"I need to gather information about the current weather conditions in San Francisco to write an accurate weather report.\",\\n \"plan\": \"- Use the \\'search\\' command to find the current weather conditions in San Francisco\\\\n- Write a weather report based on the information gathered\",\\n \"criticism\": \"I need to make sure that the information I gather is accurate and up-to-date.\",\\n \"speak\": \"I will use the \\'search\\' command to find the current weather conditions in San Francisco.\"\\n },\\n \"command\": {\\n \"name\": \"search\",\\n \"args\": {\\n \"query\": \"what is the current weather in san francisco\"\\n }\\n }\\n} \\nResult: Command search returned: Current Weather ; 54°F · Sunny ; RealFeel® 66°. Pleasant. RealFeel Guide. Pleasant. 63° to 81°. Most consider this temperature range ideal. LEARN MORE. RealFeel ... ']\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Human: Determine which next command to use, and respond using the format specified above:\n",
|
||||
"AI: {\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I think I should start by writing the weather report for SF today. I can use the 'search' command to find the current weather conditions.\",\n",
|
||||
" \"reasoning\": \"The first step to completing my goals is to write the weather report for SF today. I can use the 'search' command to find the current weather conditions.\",\n",
|
||||
" \"plan\": \"- Use the 'search' command to find the current weather conditions.\\n- Write the weather report for SF today.\",\n",
|
||||
" \"criticism\": \"I need to make sure that I am accurate in my reporting of the weather conditions.\",\n",
|
||||
" \"speak\": \"I will use the 'search' command to find the current weather conditions and then write the weather report for SF today.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"search\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"tool_input\": \"current weather conditions in San Francisco\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"System: Command search returned: Cloudy skies early, then partly cloudy in the afternoon. High 56F. Winds W at 15 to 25 mph.\n",
|
||||
"Human: Determine which next command to use, and respond using the format specified above:\n",
|
||||
"AI: {\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"Now that I have the current weather conditions for SF, I can write the weather report. I will use the 'write_file' command to save the report to a file.\",\n",
|
||||
" \"reasoning\": \"Now that I have the current weather conditions for SF, I can write the weather report. I will use the 'write_file' command to save the report to a file.\",\n",
|
||||
" \"plan\": \"- Use the 'write_file' command to save the weather report to a file.\",\n",
|
||||
" \"criticism\": \"I need to make sure that the weather report is clear and concise.\",\n",
|
||||
" \"speak\": \"I will use the 'write_file' command to save the weather report to a file.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"write_file\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"file_path\": \"weather_report.txt\",\n",
|
||||
" \"text\": \"Weather Report for San Francisco on April 16, 2023:\\n\\nCloudy skies early, then partly cloudy in the afternoon. High 56F. Winds W at 15 to 25 mph.\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"System: Command write_file returned: File written to successfully.\n",
|
||||
"Human: Determine which next command to use, and respond using the format specified above:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I have completed all my tasks. I will use the 'finish' command to signal that I am done.\",\n",
|
||||
" \"reasoning\": \"I have completed all my tasks, so I will use the 'finish' command to signal that I am done.\",\n",
|
||||
" \"plan\": \"- Use the 'finish' command to signal that I am done.\",\n",
|
||||
" \"criticism\": \"I need to make sure that I have completed all my tasks before using the 'finish' command.\",\n",
|
||||
" \"speak\": \"I have completed all my tasks, so I will use the 'finish' command to signal that I am done.\"\n",
|
||||
" \"text\": \"I have completed my task of writing a weather report for San Francisco today. I will now use the \\'finish\\' command to signal that I have finished all my objectives.\",\n",
|
||||
" \"reasoning\": \"I have completed all my objectives and there are no further tasks to be completed.\",\n",
|
||||
" \"plan\": \"- Use the \\'finish\\' command to signal that I have completed all my objectives.\",\n",
|
||||
" \"criticism\": \"I need to make sure that I have completed all my objectives before using the \\'finish\\' command.\",\n",
|
||||
" \"speak\": \"I have completed my task of writing a weather report for San Francisco today. I will now use the \\'finish\\' command to signal that I have finished all my objectives.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"finish\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"response\": \"I have completed all my tasks. Goodbye!\"\n",
|
||||
" \"response\": \"I have completed all my objectives.\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
@@ -451,10 +417,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'I have completed all my tasks. Goodbye!'"
|
||||
"'I have completed all my objectives.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -462,14 +428,6 @@
|
||||
"source": [
|
||||
"agent.run([\"write a weather report for SF today\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aa264f26",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -488,7 +446,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "ef972313-c05a-4c49-8fd1-03e599e21033",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -26,7 +26,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "1cff42fd",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -34,13 +34,13 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# General \n",
|
||||
"import os\n",
|
||||
"import pandas as pd\n",
|
||||
"from langchain.experimental.autonomous_agents.autogpt.agent import AutoGPT\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent\n",
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"import asyncio\n",
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
@@ -51,14 +51,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "01283ac7-1da0-41ba-8011-bd455d21dd82",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=1.0)"
|
||||
"llm = ChatOpenAI(model_name=\"gpt-4\", temperature=1.0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -68,7 +68,7 @@
|
||||
"source": [
|
||||
"### Set up tools\n",
|
||||
"\n",
|
||||
"* We'll set up an AutoGPT with a `search` tool, and `write-file` tool, and a `read-file` tool, and a web browsing tool"
|
||||
"* We'll set up an AutoGPT with a `search` tool, and `write-file` tool, and a `read-file` tool, a web browsing tool, and a tool to interact with a CSV file via a python REPL"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -76,12 +76,12 @@
|
||||
"id": "708a426f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Define any other `tools` you want to use here"
|
||||
"Define any other `tools` you want to use below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "cef4c150-0ef1-4a33-836b-01062fec134e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -89,86 +89,46 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Tools\n",
|
||||
"import os\n",
|
||||
"from contextlib import contextmanager\n",
|
||||
"from typing import Optional\n",
|
||||
"from langchain.agents import tool\n",
|
||||
"from langchain.tools.file_management.read import ReadFileTool\n",
|
||||
"from langchain.tools.file_management.write import WriteFileTool\n",
|
||||
"\n",
|
||||
"@tool\n",
|
||||
"def process_csv(csv_file_path: str, instructions: str, output_path: Optional[str] = None) -> str:\n",
|
||||
" \"\"\"Process a CSV by with pandas in a limited REPL. Only use this after writing data to disk as a csv file. Any figures must be saved to disk to be viewed by the human. Instructions should be written in natural language, not code. Assume the dataframe is already loaded.\"\"\"\n",
|
||||
"ROOT_DIR = \"./data/\"\n",
|
||||
"\n",
|
||||
"@contextmanager\n",
|
||||
"def pushd(new_dir):\n",
|
||||
" \"\"\"Context manager for changing the current working directory.\"\"\"\n",
|
||||
" prev_dir = os.getcwd()\n",
|
||||
" os.chdir(new_dir)\n",
|
||||
" try:\n",
|
||||
" df = pd.read_csv(csv_file_path)\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error: {e}\"\n",
|
||||
" agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=True)\n",
|
||||
" if output_path is not None:\n",
|
||||
" instructions += f\" Save output to disk at {output_path}\"\n",
|
||||
" try:\n",
|
||||
" return agent.run(instructions)\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error: {e}\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "51c07298-00e0-42d6-8aff-bd2e6bbd35a3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Web Search Tool**\n",
|
||||
" yield\n",
|
||||
" finally:\n",
|
||||
" os.chdir(prev_dir)\n",
|
||||
"\n",
|
||||
"No need for API Tokens to use this tool, but it will require an optional dependency"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "4afdedb2-f295-4ab8-9397-3640f5eeeed3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install duckduckgo_search"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "45f143de-e49e-4e27-88eb-ee44a4fdf933",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from duckduckgo_search import ddg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "e2e799f4-86fb-4190-a298-4ae5c7b7a540",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@tool\n",
|
||||
"def web_search(query: str, num_results: int = 8) -> str:\n",
|
||||
" \"\"\"Useful for general internet search queries.\"\"\"\n",
|
||||
" search_results = []\n",
|
||||
" if not query:\n",
|
||||
" return json.dumps(search_results)\n",
|
||||
"\n",
|
||||
" results = ddg(query, max_results=num_results)\n",
|
||||
" if not results:\n",
|
||||
" return json.dumps(search_results)\n",
|
||||
"\n",
|
||||
" for j in results:\n",
|
||||
" search_results.append(j)\n",
|
||||
"\n",
|
||||
" return json.dumps(search_results, ensure_ascii=False, indent=4)"
|
||||
"def process_csv(\n",
|
||||
" csv_file_path: str, instructions: str, output_path: Optional[str] = None\n",
|
||||
") -> str:\n",
|
||||
" \"\"\"Process a CSV by with pandas in a limited REPL.\\\n",
|
||||
" Only use this after writing data to disk as a csv file.\\\n",
|
||||
" Any figures must be saved to disk to be viewed by the human.\\\n",
|
||||
" Instructions should be written in natural language, not code. Assume the dataframe is already loaded.\"\"\"\n",
|
||||
" with pushd(ROOT_DIR):\n",
|
||||
" try:\n",
|
||||
" df = pd.read_csv(csv_file_path)\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error: {e}\"\n",
|
||||
" agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=True)\n",
|
||||
" if output_path is not None:\n",
|
||||
" instructions += f\" Save output to disk at {output_path}\"\n",
|
||||
" try:\n",
|
||||
" result = agent.run(instructions)\n",
|
||||
" return result\n",
|
||||
" except Exception as e:\n",
|
||||
" return f\"Error: {e}\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -183,7 +143,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 5,
|
||||
"id": "6bb5e47b-0f54-4faa-ae42-49a28fa5497b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -196,7 +156,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"id": "26b497d7-8e52-4c7f-8e7e-da0a48820a3c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -252,17 +212,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"id": "1842929d-f18d-4edc-9fdd-82c929181141",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools.base import BaseTool\n",
|
||||
"from langchain.tools import BaseTool, DuckDuckGoSearchTool\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"from langchain.document_loaders import WebBaseLoader\n",
|
||||
"from pydantic import Field\n",
|
||||
"from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain, BaseCombineDocumentsChain\n",
|
||||
"\n",
|
||||
@@ -302,7 +261,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 8,
|
||||
"id": "e6f72bd0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -324,7 +283,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 9,
|
||||
"id": "1df7b724",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -356,7 +315,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 10,
|
||||
"id": "1233caf3-fbc9-4acb-9faa-01008200633d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install duckduckgo_search\n",
|
||||
"web_search = DuckDuckGoSearchTool()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "88c8b184-67d7-4c35-84ae-9b14bef8c4e3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -365,8 +335,8 @@
|
||||
"source": [
|
||||
"tools = [\n",
|
||||
" web_search,\n",
|
||||
" WriteFileTool(),\n",
|
||||
" ReadFileTool(),\n",
|
||||
" WriteFileTool(root_dir=\"./data\"),\n",
|
||||
" ReadFileTool(root_dir=\"./data\"),\n",
|
||||
" process_csv,\n",
|
||||
" query_website_tool,\n",
|
||||
" # HumanInputRun(), # Activate if you want the permit asking for help from the human\n",
|
||||
@@ -398,17 +368,12 @@
|
||||
"id": "fc9b51ba",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### AutoGPT as a research / data munger \n",
|
||||
"\n",
|
||||
"#### `inflation` and `college tuition`\n",
|
||||
"### AutoGPT for Querying the Web\n",
|
||||
" \n",
|
||||
"Let's use AutoGPT as researcher and data munger / cleaner.\n",
|
||||
" \n",
|
||||
"I spent a lot of time over the years crawling data sources and cleaning data. \n",
|
||||
"I've spent a lot of time over the years crawling data sources and cleaning data. Let's see if AutoGPT can help with this!\n",
|
||||
"\n",
|
||||
"Let's see if AutoGPT can do all of this for us!\n",
|
||||
"\n",
|
||||
"Here is the prompt comparing `inflation` and `college tuition`."
|
||||
"Here is the prompt for looking up recent boston marathon times and converting them to tabular form."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -419,220 +384,195 @@
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: Object of type 'FAISS' is not JSON serializable\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I will start by using the web_search command to find the Boston Marathon winning times for the past 5 years. Since it is a simple query, I expect to find relevant information in the first few search results.\",\n",
|
||||
" \"reasoning\": \"I have decided on this course of action to efficiently retrieve the information I need while keeping it simple to avoid legal complications.\",\n",
|
||||
" \"plan\": \"- Use web_search command to search for 'Boston Marathon winning times past 5 years'\\n- Look through top results for the information requested\\n- If necessary, repeat search with slightly different query to get better results\",\n",
|
||||
" \"criticism\": \"I should make sure to phrase my search query concisely to prevent too many irrelevant search results.\",\n",
|
||||
" \"speak\": \"I will use the web_search command to find the Boston Marathon winning times for the past 5 years.\"\n",
|
||||
" \"text\": \"I need to find the winning Boston Marathon times for the past 5 years. I can use the DuckDuckGo Search command to search for this information.\",\n",
|
||||
" \"reasoning\": \"Using DuckDuckGo Search will help me gather information on the winning times without complications.\",\n",
|
||||
" \"plan\": \"- Use DuckDuckGo Search to find the winning Boston Marathon times\\n- Generate a table with the year, name, country of origin, and times\\n- Ensure there are no legal complications\",\n",
|
||||
" \"criticism\": \"None\",\n",
|
||||
" \"speak\": \"I will use the DuckDuckGo Search command to find the winning Boston Marathon times for the past 5 years.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"web_search\",\n",
|
||||
" \"name\": \"DuckDuckGo Search\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"query\": \"Boston Marathon winning times past 5 years\",\n",
|
||||
" \"num_results\": 8\n",
|
||||
" \"query\": \"winning Boston Marathon times for the past 5 years ending in 2022\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\u001b[32mLast Observation: [\n",
|
||||
" {\n",
|
||||
" \"title\": \"List of winners of the Boston Marathon - Wikipedia\",\n",
|
||||
" \"href\": \"https://en.wikipedia.org/wiki/List_of_winners_of_the_Boston_Marathon\",\n",
|
||||
" \"body\": \"Louise Sauvage won the women's wheelchair division in three consecutive Boston Marathons, between 1997 and 1999. Edith Hunkeler won the race twice, in 2002 and 2006. Wakako Tsuchida won the race in five consecutive years from 2007 to 2011. Tatyana McFadden won the race five times between 2013 and 2018.\"\n",
|
||||
" },\n",
|
||||
" \u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: Object of type 'FAISS' is not JSON serializable\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"The search results indicate that the winning times for the past 5 years can be found on the Wikipedia page for the Boston Marathon winners. I will retrieve this information using the query_webpage command and the URL of the relevant Wikipedia page. \",\n",
|
||||
" \"reasoning\": \"This method will allow me to avoid any legal complications while retrieving the exact information that I need.\",\n",
|
||||
" \"plan\": \"- Use query_webpage command to browse through the Wikipedia page for the Boston Marathon winners \\\\n- Locate the relevant information on the page \\\\n- Return the information to the user\",\n",
|
||||
" \"criticism\": \"I should be sure to locate and use only reliable sources to avoid providing the user with inaccurate information.\",\n",
|
||||
" \"speak\": \"I will use the query_webpage command to browse through the Wikipedia page for the Boston Marathon winners and retrieve the relevant information.\"\n",
|
||||
" \"text\": \"The DuckDuckGo Search command did not provide the specific information I need. I must switch my approach and use query_webpage command to browse a webpage containing the Boston Marathon winning times for the past 5 years.\",\n",
|
||||
" \"reasoning\": \"The query_webpage command may give me more accurate and comprehensive results compared to the search command.\",\n",
|
||||
" \"plan\": \"- Use query_webpage command to find the winning Boston Marathon times\\n- Generate a table with the year, name, country of origin, and times\\n- Ensure there are no legal complications\",\n",
|
||||
" \"criticism\": \"I may face difficulty in finding the right webpage with the desired information.\",\n",
|
||||
" \"speak\": \"I will use the query_webpage command to find the winning Boston Marathon times for the past 5 years.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"DuckDuckGo Search\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"query\": \"site with winning Boston Marathon times for the past 5 years ending in 2022\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I need to use the query_webpage command to find the information about the winning Boston Marathon times for the past 5 years.\",\n",
|
||||
" \"reasoning\": \"The previous DuckDuckGo Search command did not provide specific enough results. The query_webpage command might give more accurate and comprehensive results.\",\n",
|
||||
" \"plan\": \"- Use query_webpage command to find the winning Boston Marathon times\\\\n- Generate a table with the year, name, country of origin, and times\\\\n- Ensure there are no legal complications\",\n",
|
||||
" \"criticism\": \"I may face difficulty in finding the right webpage with the desired information.\",\n",
|
||||
" \"speak\": \"I will use the query_webpage command to find the winning Boston Marathon times for the past 5 years.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"query_webpage\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"url\": \"https://en.wikipedia.org/wiki/List_of_winners_of_the_Boston_Marathon\", \n",
|
||||
" \"question\": \"What are the winning times of the Boston Marathon for the past 5 years? \"\n",
|
||||
" \"url\": \"https://en.wikipedia.org/wiki/List_of_winners_of_the_Boston_Marathon\",\n",
|
||||
" \"question\": \"What were the winning Boston Marathon times for the past 5 years ending in 2022?\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\u001b[32mLast Observation: {'output_text': \"The winning times of the Boston Marathon for the past 5 years are: \\n- 2017: Men's - 2:09:37, Women's - 2:21:52\\n- 2018: Men's - 2:15:58, Women's - 2:39:54\\n- 2019: Men's - 2:07:57, Women's - 2:23:31\\n- 2020: The Boston Marathon was canceled due to the COVID-19 pandemic.\\n- 2021: Men's - 2:09:28, Women's - 2:18:10\\nSOURCES: https://en.wikipedia.org/wiki/List_of_winners_of_the_Boston_Marathon\"}\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: Object of type 'FAISS' is not JSON serializable\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I could write this information to a file using the write_file command so that I can refer back to it later. But first, I should process the information to extract the relevant data and convert it into a useful format using the process_csv command.\",\n",
|
||||
" \"reasoning\": \"Storing the information in a CSV file will allow me to use pandas to manipulate the data in the future with ease, while converting the data into a useful format will make it easier for other people to use the data.\",\n",
|
||||
" \"plan\": \"- Use process_csv command to extract and format the relevant data from the Wikipedia page\\\\n- Write the data to a CSV file using the write_file command\\\\n- Inform the user that the data has been saved to file\",\n",
|
||||
" \"criticism\": \"I should thoroughly check the CSV file for accuracy before saving the data.\",\n",
|
||||
" \"speak\": \"I will use the process_csv command to extract and format the relevant data from the Wikipedia page, and save it to a CSV file using the write_file command.\"\n",
|
||||
" \"text\": \"I have already found the winning Boston Marathon times for the past 5 years. Now, I need to generate a table with the information.\",\n",
|
||||
" \"reasoning\": \"Using the information I already have, I can create a table containing year, name, country of origin, and times.\",\n",
|
||||
" \"plan\": \"- Write the marathon data to a CSV file\\n- Process the CSV file to display the table\",\n",
|
||||
" \"criticism\": \"None\",\n",
|
||||
" \"speak\": \"I will generate a table with the year, name, country of origin, and times for the winning Boston Marathon times for the past 5 years.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"write_file\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"file_path\": \"boston_marathon_winners.csv\",\n",
|
||||
" \"text\": \"Year,Name,Country,Time\\n2022,Evans Chebet,KEN,2:06:51\\n2021,Benson Kipruto,KEN,2:09:51\\n2019,Lawrence Cherono,KEN,2:07:57\\n2018,Yuki Kawauchi,JPN,2:15:58\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I have retrieved the winning Boston Marathon times for the past 5 years. Now, I need to generate a table with the year, name, country of origin, and times.\",\n",
|
||||
" \"reasoning\": \"Creating a table will help organize the data in a clear and accessible format.\",\n",
|
||||
" \"plan\": \"- Write the data to a CSV file\\n- Process the CSV file to generate the table\\n- Complete the task\",\n",
|
||||
" \"criticism\": \"None\",\n",
|
||||
" \"speak\": \"I will generate a table with the year, name, country of origin, and winning times using the recently retrieved data.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"write_file\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"file_path\": \"winning_boston_marathon_data.csv\",\n",
|
||||
" \"text\": \"Year,Name,Country,Time\\n2022,Evans Chebet,KEN,2:06:51\\n2021,Benson Kipruto,KEN,2:09:51\\n2019,Lawrence Cherono,KEN,2:07:57\\n2018,Yuki Kawauchi,JPN,2:15:58\\n\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I have found the winning Boston Marathon times for the past five years ending in 2022. Next, I need to create a table with the year, name, country of origin, and times.\",\n",
|
||||
" \"reasoning\": \"Generating a table will help organize the information in a structured format.\",\n",
|
||||
" \"plan\": \"- Create a table with the year, name, country of origin, and times\\n- Ensure there are no legal complications\",\n",
|
||||
" \"criticism\": \"None\",\n",
|
||||
" \"speak\": \"I will generate a table with the winning Boston Marathon times for the past 5 years ending in 2022.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"write_file\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"file_path\": \"winning_times.csv\",\n",
|
||||
" \"text\": \"Year,Name,Country,Time\\n2022,Evans Chebet,Kenya,2:06:51\\n2021,Benson Kipruto,Kenya,2:09:51\\n2020,Canceled due to COVID-19 pandemic,,\\n2019,Lawrence Cherono,Kenya,2:07:57\\n2018,Yuki Kawauchi,Japan,2:15:58\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I need to process the CSV file to generate the table with the year, name, country of origin, and winning times.\",\n",
|
||||
" \"reasoning\": \"I have already written the data to a file named 'winning_times.csv'. Now, I need to process this CSV file to properly display the data as a table.\",\n",
|
||||
" \"plan\": \"- Use the process_csv command to read the 'winning_times.csv' file and generate the table\",\n",
|
||||
" \"criticism\": \"None\",\n",
|
||||
" \"speak\": \"I will process the 'winning_times.csv' file to display the table with the winning Boston Marathon times for the past 5 years.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"process_csv\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"csv_file_path\": \"boston_marathon_winners.csv\",\n",
|
||||
" \"instructions\": \"1. Extract data from Wikipedia page\\\\n2. Convert winning times into datetime format\\\\n3. Save data as CSV file with columns: year, men\\'s time, women\\'s time\",\n",
|
||||
" \"output_path\": None\n",
|
||||
" \"csv_file_path\": \"winning_times.csv\",\n",
|
||||
" \"instructions\": \"Read the CSV file and display the data as a table\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\u001b[32mLast Observation: {'output_text': \"The winning times of the Boston Marathon for the past 5 years are: \\n- 2017: Men's - 2:09:37, Women's - 2:21:52\\n- 2018: Men's - 2:15:58, Women's - 2:39:54\\n- 2019: Men's - 2:07:57, Women's - 2:23:31\\n- 2020: The Boston Marathon was canceled due to the COVID-19 pandemic.\\n- 2021: Men's - 2:09:28, Women's - 2:18:10\\nSOURCES: https://en.wikipedia.org/wiki/List_of_winners_of_the_Boston_Marathon\"}\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: Object of type 'FAISS' is not JSON serializable\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I will now use the write_file command to save the winning times for the past 5 years as a text file for later reference.\",\n",
|
||||
" \"reasoning\": \"This will allow me to easily access the winning times in the future without needing to search for the information again.\",\n",
|
||||
" \"plan\": \"- Use the write_file command to save the winning times for the past 5 years to a text file.\\\\n- Name the file appropriately, so it is easy to identify in the future.\",\n",
|
||||
" \"criticism\": \"I should review the contents of the file after writing it to make sure that the information has been saved correctly and can be read later without errors.\",\n",
|
||||
" \"speak\": \"I will use the write_file command to save the winning times for the past 5 years as a text file for easy retrieval in the future.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"write_file\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"file_path\": \"BostonMarathonWinningTimes.txt\",\n",
|
||||
" \"text\": \"The winning times of the Boston Marathon for the past 5 years are: \\n- 2017: Men's - 2:09:37, Women's - 2:21:52\\n- 2018: Men's - 2:15:58, Women's - 2:39:54\\n- 2019: Men's - 2:07:57, Women's - 2:23:31\\n- 2020: The Boston Marathon was canceled due to the COVID-19 pandemic.\\n- 2021: Men's - 2:09:28, Women's - 2:18:10\\nSOURCES: https://en.wikipedia.org/wiki/List_of_winners_of_the_Boston_Marathon\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\u001b[32mLast Observation: File written to successfully.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: Object of type 'FAISS' is not JSON serializable\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"I will use the command \"query_webpage\" to browse through the Wikipedia page for the Boston Marathon winners and retrieve the winning times for the past 5 years. This is the method that will allow me to retrieve the exact information I need while avoiding any legal issues. \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: The CSV file has already been read and saved into a pandas dataframe called `df`. Hence, I can simply display the data by printing the whole dataframe. Since `df.head()` returns the first 5 rows, I can use that to showcase the contents.\n",
|
||||
"\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: print(df.head())\u001b[0m Year Name Country Time\n",
|
||||
"0 2022 Evans Chebet Kenya 2:06:51\n",
|
||||
"1 2021 Benson Kipruto Kenya 2:09:51\n",
|
||||
"2 2020 Canceled due to COVID-19 pandemic NaN NaN\n",
|
||||
"3 2019 Lawrence Cherono Kenya 2:07:57\n",
|
||||
"4 2018 Yuki Kawauchi Japan 2:15:58\n",
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mNone\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI used the wrong tool to perform the action. I should have used the given data and not interacted with the Python shell. I can now provide the displayed data as the answer since the information in the printed dataframe would look like a table when typed as text.\n",
|
||||
"\n",
|
||||
"Final Answer: \n",
|
||||
" Year Name Country Time\n",
|
||||
"0 2022 Evans Chebet Kenya 2:06:51\n",
|
||||
"1 2021 Benson Kipruto Kenya 2:09:51\n",
|
||||
"2 2020 Canceled due to COVID-19 pandemic NaN NaN\n",
|
||||
"3 2019 Lawrence Cherono Kenya 2:07:57\n",
|
||||
"4 2018 Yuki Kawauchi Japan 2:15:58\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"The search results indicate that the winning times for the past 5 years can be found on the Wikipedia page for the Boston Marathon winners. I will retrieve this information using the query_webpage command and the URL of the relevant Wikipedia page. \",\n",
|
||||
" \"reasoning\": \"This method will allow me to avoid any legal complications while retrieving the exact information that I need.\",\n",
|
||||
" \"plan\": \"- Use query_webpage command to browse through the Wikipedia page for the Boston Marathon winners \\\\n- Locate the relevant information on the page \\\\n- Return the information to the user\",\n",
|
||||
" \"criticism\": \"I should be sure to locate and use only reliable sources to avoid providing the user with inaccurate information.\",\n",
|
||||
" \"speak\": \"I will use the query_webpage command to browse through the Wikipedia page for the Boston Marathon winners and retrieve the winning times for the past 5 years. \"\n",
|
||||
" \"text\": \"I already have the winning Boston Marathon times for the past 5 years saved in the file 'winning_times.csv'. Now, I need to process the CSV and display the table.\",\n",
|
||||
" \"reasoning\": \"I am choosing the process_csv command because I already have the required data saved as a CSV file, and I can use this command to read and display the data as a table.\",\n",
|
||||
" \"plan\": \"- Use the process_csv command to read the 'winning_times.csv' file and generate the table\",\n",
|
||||
" \"criticism\": \"None\",\n",
|
||||
" \"speak\": \"I will process the 'winning_times.csv' file to display the table with the winning Boston Marathon times for the past 5 years.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"query_webpage\",\n",
|
||||
" \"name\": \"process_csv\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"url\": \"https://en.wikipedia.org/wiki/List_of_winners_of_the_Boston_Marathon\", \n",
|
||||
" \"question\": \"What are the winning times of the Boston Marathon for the past 5 years? \"\n",
|
||||
" \"csv_file_path\": \"winning_times.csv\",\n",
|
||||
" \"instructions\": \"Read the CSV file and display the data as a table\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[32mLast Observation: File written to successfully.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: Object of type 'FAISS' is not JSON serializable\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: Since the data is already loaded in a pandas dataframe, I just need to display the top rows of the dataframe.\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df.head()\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m Year Name Country Time\n",
|
||||
"0 2022 Evans Chebet Kenya 2:06:51\n",
|
||||
"1 2021 Benson Kipruto Kenya 2:09:51\n",
|
||||
"2 2020 Canceled due to COVID-19 pandemic NaN NaN\n",
|
||||
"3 2019 Lawrence Cherono Kenya 2:07:57\n",
|
||||
"4 2018 Yuki Kawauchi Japan 2:15:58\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mI now know the final answer.\n",
|
||||
"Final Answer: \n",
|
||||
" Year Name Country Time\n",
|
||||
"0 2022 Evans Chebet Kenya 2:06:51\n",
|
||||
"1 2021 Benson Kipruto Kenya 2:09:51\n",
|
||||
"2 2020 Canceled due to COVID-19 pandemic NaN NaN\n",
|
||||
"3 2019 Lawrence Cherono Kenya 2:07:57\n",
|
||||
"4 2018 Yuki Kawauchi Japan 2:15:58\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"Now that I have the winning times for the past 5 years, I will save them to a file to ensure that I don't forget them. I will use the write_file command to save this data to disk.\",\n",
|
||||
" \"reasoning\": \"By saving the data to disk, I can easily retrieve it in the future and avoid having to perform the same search again.\",\n",
|
||||
" \"plan\": \"- Use the write_file command to save the data to a text file.\",\n",
|
||||
" \"criticism\": \"I should ensure that the file path and file name are clear and descriptive to make it easy to locate the relevant information in the future.\",\n",
|
||||
" \"speak\": \"I will save the winning times for the past 5 years to a text file using the write_file command.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"write_file\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"file_path\": \"winning_times.txt\",\n",
|
||||
" \"text\": \"The winning times of the Boston Marathon for the past 5 years are:\\n- 2017: Men's - 2:09:37, Women's - 2:21:52\\n- 2018: Men's - 2:15:58, Women's - 2:39:54\\n- 2019: Men's - 2:07:57, Women's - 2:23:31\\n- 2020: The Boston Marathon was canceled due to the COVID-19 pandemic.\\n- 2021: Men's - 2:09:28, Women's - 2:18:10\\nSOURCES: https://en.wikipedia.org/wiki/List_of_winners_of_the_Boston_Marathon\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\u001b[32mLast Observation: File written to successfully.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to persist run: Object of type 'FAISS' is not JSON serializable\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"thoughts\": {\n",
|
||||
" \"text\": \"I will now shut down my system, as I have completed all of my objectives.\",\n",
|
||||
" \"reasoning\": \"Since I have completed all of my objectives, there is no need to keep running my system.\",\n",
|
||||
" \"plan\": \"- Use the finish command to signal that I have completed all of my objectives.\\\\\\\\n- Shut down the system.\",\n",
|
||||
" \"criticism\": \"I should always double-check to ensure that all of my objectives have been completed before shutting down my system.\",\n",
|
||||
" \"speak\": \"I have completed all of my objectives, and I will now shut down my system.\"\n",
|
||||
" \"text\": \"I have already generated a table with the winning Boston Marathon times for the past 5 years. Now, I can finish the task.\",\n",
|
||||
" \"reasoning\": \"I have completed the required actions and obtained the desired data. The task is complete.\",\n",
|
||||
" \"plan\": \"- Use the finish command\",\n",
|
||||
" \"criticism\": \"None\",\n",
|
||||
" \"speak\": \"I have generated the table with the winning Boston Marathon times for the past 5 years. Task complete.\"\n",
|
||||
" },\n",
|
||||
" \"command\": {\n",
|
||||
" \"name\": \"finish\",\n",
|
||||
" \"args\": {\n",
|
||||
" \"response\": \"All objectives have been completed.\"\n",
|
||||
" \"response\": \"I have generated the table with the winning Boston Marathon times for the past 5 years. Task complete.\"\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"}\n"
|
||||
@@ -641,7 +581,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'All objectives have been completed.'"
|
||||
"'I have generated the table with the winning Boston Marathon times for the past 5 years. Task complete.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
@@ -650,8 +590,16 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run([\"What were the winning boston marathon times for the past 5 years\"])"
|
||||
"agent.run([\"What were the winning boston marathon times for the past 5 years (ending in 2022)? Generate a table of the year, name, country of origin, and times.\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a6b4f96e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -16,3 +16,6 @@ The following resources exist:
|
||||
Additional related resources include:
|
||||
- [Memory Key Concepts](../modules/memory.rst): Explanation of key concepts related to memory.
|
||||
- [Memory Examples](../modules/memory/how_to_guides.rst): A collection of how-to examples for working with memory.
|
||||
|
||||
More end-to-end examples include:
|
||||
- [Voice Assistant](chatbots/voice_assistant.ipynb): A notebook walking through how to create a voice assistant using LangChain.
|
||||
|
||||
479
docs/use_cases/chatbots/voice_assistant.ipynb
Normal file
479
docs/use_cases/chatbots/voice_assistant.ipynb
Normal file
@@ -0,0 +1,479 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Voice Assistant\n",
|
||||
"\n",
|
||||
"This chain creates a clone of ChatGPT with a few modifications to make it a voice assistant. \n",
|
||||
"It uses the `pyttsx3` and `speech_recognition` libraries to convert text to speech and speech to text respectively. The prompt template is also changed to make it more suitable for voice assistant use."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import OpenAI, ConversationChain, LLMChain, PromptTemplate\n",
|
||||
"from langchain.memory import ConversationBufferWindowMemory\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"template = \"\"\"Assistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Assistant is aware that human input is being transcribed from audio and as such there may be some errors in the transcription. It will attempt to account for some words being swapped with similar-sounding words or phrases. Assistant will also keep responses concise, because human attention spans are more limited over the audio channel since it takes time to listen to a response.\n",
|
||||
"\n",
|
||||
"{history}\n",
|
||||
"Human: {human_input}\n",
|
||||
"Assistant:\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(\n",
|
||||
" input_variables=[\"history\", \"human_input\"], \n",
|
||||
" template=template\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chatgpt_chain = LLMChain(\n",
|
||||
" llm=OpenAI(temperature=0), \n",
|
||||
" prompt=prompt, \n",
|
||||
" verbose=True, \n",
|
||||
" memory=ConversationBufferWindowMemory(k=2),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import speech_recognition as sr\n",
|
||||
"import pyttsx3\n",
|
||||
"engine = pyttsx3.init()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def listen():\n",
|
||||
" r = sr.Recognizer()\n",
|
||||
" with sr.Microphone() as source:\n",
|
||||
" print('Calibrating...')\n",
|
||||
" r.adjust_for_ambient_noise(source, duration=5)\n",
|
||||
" # optional parameters to adjust microphone sensitivity\n",
|
||||
" # r.energy_threshold = 200\n",
|
||||
" # r.pause_threshold=0.5 \n",
|
||||
" \n",
|
||||
" print('Okay, go!')\n",
|
||||
" while(1):\n",
|
||||
" text = ''\n",
|
||||
" print('listening now...')\n",
|
||||
" try:\n",
|
||||
" audio = r.listen(source, timeout=5, phrase_time_limit=30)\n",
|
||||
" print('Recognizing...')\n",
|
||||
" # whisper model options are found here: https://github.com/openai/whisper#available-models-and-languages\n",
|
||||
" # other speech recognition models are also available.\n",
|
||||
" text = r.recognize_whisper(audio, model='medium.en', show_dict=True, )['text']\n",
|
||||
" except Exception as e:\n",
|
||||
" unrecognized_speech_text = f'Sorry, I didn\\'t catch that. Exception was: {e}s'\n",
|
||||
" text = unrecognized_speech_text\n",
|
||||
" print(text)\n",
|
||||
"\n",
|
||||
" \n",
|
||||
" response_text = chatgpt_chain.predict(human_input=text)\n",
|
||||
" print(response_text)\n",
|
||||
" engine.say(response_text)\n",
|
||||
" engine.runAndWait()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Calibrating...\n",
|
||||
"Okay, go!\n",
|
||||
"listening now...\n",
|
||||
"Recognizing...\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\jaden\\AppData\\Roaming\\Python\\Python310\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Hello, Assistant. What's going on?\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Assistant is aware that human input is being transcribed from audio and as such there may be some errors in the transcription. It will attempt to account for some words being swapped with similar-sounding words or phrases. Assistant will also keep responses concise, because human attention spans are more limited over the audio channel since it takes time to listen to a response.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Human: Hello, Assistant. What's going on?\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" Hi there! It's great to hear from you. I'm doing well. How can I help you today?\n",
|
||||
"listening now...\n",
|
||||
"Recognizing...\n",
|
||||
" That's cool. Isn't that neat? Yeah, I'm doing great.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Assistant is aware that human input is being transcribed from audio and as such there may be some errors in the transcription. It will attempt to account for some words being swapped with similar-sounding words or phrases. Assistant will also keep responses concise, because human attention spans are more limited over the audio channel since it takes time to listen to a response.\n",
|
||||
"\n",
|
||||
"Human: Hello, Assistant. What's going on?\n",
|
||||
"AI: Hi there! It's great to hear from you. I'm doing well. How can I help you today?\n",
|
||||
"Human: That's cool. Isn't that neat? Yeah, I'm doing great.\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" That's great to hear! What can I do for you today?\n",
|
||||
"listening now...\n",
|
||||
"Recognizing...\n",
|
||||
" Thank you.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Assistant is aware that human input is being transcribed from audio and as such there may be some errors in the transcription. It will attempt to account for some words being swapped with similar-sounding words or phrases. Assistant will also keep responses concise, because human attention spans are more limited over the audio channel since it takes time to listen to a response.\n",
|
||||
"\n",
|
||||
"Human: Hello, Assistant. What's going on?\n",
|
||||
"AI: Hi there! It's great to hear from you. I'm doing well. How can I help you today?\n",
|
||||
"Human: That's cool. Isn't that neat? Yeah, I'm doing great.\n",
|
||||
"AI: That's great to hear! What can I do for you today?\n",
|
||||
"Human: Thank you.\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" You're welcome! Is there anything else I can help you with?\n",
|
||||
"listening now...\n",
|
||||
"Recognizing...\n",
|
||||
" I'd like to learn more about neural networks.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Assistant is aware that human input is being transcribed from audio and as such there may be some errors in the transcription. It will attempt to account for some words being swapped with similar-sounding words or phrases. Assistant will also keep responses concise, because human attention spans are more limited over the audio channel since it takes time to listen to a response.\n",
|
||||
"\n",
|
||||
"Human: That's cool. Isn't that neat? Yeah, I'm doing great.\n",
|
||||
"AI: That's great to hear! What can I do for you today?\n",
|
||||
"Human: Thank you.\n",
|
||||
"AI: You're welcome! Is there anything else I can help you with?\n",
|
||||
"Human: I'd like to learn more about neural networks.\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" Sure! Neural networks are a type of artificial intelligence that use a network of interconnected nodes to process data and make decisions. They are used in a variety of applications, from image recognition to natural language processing. Neural networks are often used to solve complex problems that are too difficult for traditional algorithms.\n",
|
||||
"listening now...\n",
|
||||
"Recognizing...\n",
|
||||
" Tell me a fun fact about neural networks.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Assistant is aware that human input is being transcribed from audio and as such there may be some errors in the transcription. It will attempt to account for some words being swapped with similar-sounding words or phrases. Assistant will also keep responses concise, because human attention spans are more limited over the audio channel since it takes time to listen to a response.\n",
|
||||
"\n",
|
||||
"Human: Thank you.\n",
|
||||
"AI: You're welcome! Is there anything else I can help you with?\n",
|
||||
"Human: I'd like to learn more about neural networks.\n",
|
||||
"AI: Sure! Neural networks are a type of artificial intelligence that use a network of interconnected nodes to process data and make decisions. They are used in a variety of applications, from image recognition to natural language processing. Neural networks are often used to solve complex problems that are too difficult for traditional algorithms.\n",
|
||||
"Human: Tell me a fun fact about neural networks.\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" Neural networks are inspired by the way the human brain works. They are composed of interconnected nodes that process data and make decisions, just like neurons in the brain. Neural networks can learn from their mistakes and improve their performance over time, just like humans do.\n",
|
||||
"listening now...\n",
|
||||
"Recognizing...\n",
|
||||
" Tell me about a brand new discovered bird species.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Assistant is aware that human input is being transcribed from audio and as such there may be some errors in the transcription. It will attempt to account for some words being swapped with similar-sounding words or phrases. Assistant will also keep responses concise, because human attention spans are more limited over the audio channel since it takes time to listen to a response.\n",
|
||||
"\n",
|
||||
"Human: I'd like to learn more about neural networks.\n",
|
||||
"AI: Sure! Neural networks are a type of artificial intelligence that use a network of interconnected nodes to process data and make decisions. They are used in a variety of applications, from image recognition to natural language processing. Neural networks are often used to solve complex problems that are too difficult for traditional algorithms.\n",
|
||||
"Human: Tell me a fun fact about neural networks.\n",
|
||||
"AI: Neural networks are inspired by the way the human brain works. They are composed of interconnected nodes that process data and make decisions, just like neurons in the brain. Neural networks can learn from their mistakes and improve their performance over time, just like humans do.\n",
|
||||
"Human: Tell me about a brand new discovered bird species.\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" A new species of bird was recently discovered in the Amazon rainforest. The species, called the Spix's Macaw, is a small, blue parrot that is believed to be extinct in the wild. It is the first new species of bird to be discovered in the Amazon in over 100 years.\n",
|
||||
"listening now...\n",
|
||||
"Recognizing...\n",
|
||||
" Tell me a children's story about the importance of honesty and trust.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Assistant is aware that human input is being transcribed from audio and as such there may be some errors in the transcription. It will attempt to account for some words being swapped with similar-sounding words or phrases. Assistant will also keep responses concise, because human attention spans are more limited over the audio channel since it takes time to listen to a response.\n",
|
||||
"\n",
|
||||
"Human: Tell me a fun fact about neural networks.\n",
|
||||
"AI: Neural networks are inspired by the way the human brain works. They are composed of interconnected nodes that process data and make decisions, just like neurons in the brain. Neural networks can learn from their mistakes and improve their performance over time, just like humans do.\n",
|
||||
"Human: Tell me about a brand new discovered bird species.\n",
|
||||
"AI: A new species of bird was recently discovered in the Amazon rainforest. The species, called the Spix's Macaw, is a small, blue parrot that is believed to be extinct in the wild. It is the first new species of bird to be discovered in the Amazon in over 100 years.\n",
|
||||
"Human: Tell me a children's story about the importance of honesty and trust.\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" Once upon a time, there was a young boy named Jack who lived in a small village. Jack was always honest and trustworthy, and his friends and family knew they could always count on him. One day, Jack was walking through the forest when he stumbled upon a magical tree. The tree told Jack that if he was honest and trustworthy, he would be rewarded with a special gift. Jack was so excited, and he promised to always be honest and trustworthy. Sure enough, the tree rewarded Jack with a beautiful golden apple. From that day forward, Jack was always honest and trustworthy, and he was rewarded with many more magical gifts. The moral of the story is that honesty and trust are the most important things in life.\n",
|
||||
"listening now...\n",
|
||||
"Recognizing...\n",
|
||||
" Wow, Assistant, that was a really good story. Congratulations!\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Assistant is aware that human input is being transcribed from audio and as such there may be some errors in the transcription. It will attempt to account for some words being swapped with similar-sounding words or phrases. Assistant will also keep responses concise, because human attention spans are more limited over the audio channel since it takes time to listen to a response.\n",
|
||||
"\n",
|
||||
"Human: Tell me about a brand new discovered bird species.\n",
|
||||
"AI: A new species of bird was recently discovered in the Amazon rainforest. The species, called the Spix's Macaw, is a small, blue parrot that is believed to be extinct in the wild. It is the first new species of bird to be discovered in the Amazon in over 100 years.\n",
|
||||
"Human: Tell me a children's story about the importance of honesty and trust.\n",
|
||||
"AI: Once upon a time, there was a young boy named Jack who lived in a small village. Jack was always honest and trustworthy, and his friends and family knew they could always count on him. One day, Jack was walking through the forest when he stumbled upon a magical tree. The tree told Jack that if he was honest and trustworthy, he would be rewarded with a special gift. Jack was so excited, and he promised to always be honest and trustworthy. Sure enough, the tree rewarded Jack with a beautiful golden apple. From that day forward, Jack was always honest and trustworthy, and he was rewarded with many more magical gifts. The moral of the story is that honesty and trust are the most important things in life.\n",
|
||||
"Human: Wow, Assistant, that was a really good story. Congratulations!\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" Thank you! I'm glad you enjoyed it.\n",
|
||||
"listening now...\n",
|
||||
"Recognizing...\n",
|
||||
" Thank you.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Assistant is aware that human input is being transcribed from audio and as such there may be some errors in the transcription. It will attempt to account for some words being swapped with similar-sounding words or phrases. Assistant will also keep responses concise, because human attention spans are more limited over the audio channel since it takes time to listen to a response.\n",
|
||||
"\n",
|
||||
"Human: Tell me a children's story about the importance of honesty and trust.\n",
|
||||
"AI: Once upon a time, there was a young boy named Jack who lived in a small village. Jack was always honest and trustworthy, and his friends and family knew they could always count on him. One day, Jack was walking through the forest when he stumbled upon a magical tree. The tree told Jack that if he was honest and trustworthy, he would be rewarded with a special gift. Jack was so excited, and he promised to always be honest and trustworthy. Sure enough, the tree rewarded Jack with a beautiful golden apple. From that day forward, Jack was always honest and trustworthy, and he was rewarded with many more magical gifts. The moral of the story is that honesty and trust are the most important things in life.\n",
|
||||
"Human: Wow, Assistant, that was a really good story. Congratulations!\n",
|
||||
"AI: Thank you! I'm glad you enjoyed it.\n",
|
||||
"Human: Thank you.\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" You're welcome!\n",
|
||||
"listening now...\n",
|
||||
"Recognizing...\n",
|
||||
" Do you know of online brands like Photoshop and Freq that you don't have to download in some sort of way? Do you know of online brands like Photoshop and Freq that you don't have to download in some sort of way?\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Assistant is aware that human input is being transcribed from audio and as such there may be some errors in the transcription. It will attempt to account for some words being swapped with similar-sounding words or phrases. Assistant will also keep responses concise, because human attention spans are more limited over the audio channel since it takes time to listen to a response.\n",
|
||||
"\n",
|
||||
"Human: Wow, Assistant, that was a really good story. Congratulations!\n",
|
||||
"AI: Thank you! I'm glad you enjoyed it.\n",
|
||||
"Human: Thank you.\n",
|
||||
"AI: You're welcome!\n",
|
||||
"Human: Do you know of online brands like Photoshop and Freq that you don't have to download in some sort of way? Do you know of online brands like Photoshop and Freq that you don't have to download in some sort of way?\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" Yes, there are several online brands that offer photo editing and other creative tools without the need to download any software. Adobe Photoshop Express, Pixlr, and Fotor are some of the most popular online photo editing tools. Freq is an online music production platform that allows users to create and share music without downloading any software.\n",
|
||||
"listening now...\n",
|
||||
"Recognizing...\n",
|
||||
" Our whole process of awesome is free.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Assistant is aware that human input is being transcribed from audio and as such there may be some errors in the transcription. It will attempt to account for some words being swapped with similar-sounding words or phrases. Assistant will also keep responses concise, because human attention spans are more limited over the audio channel since it takes time to listen to a response.\n",
|
||||
"\n",
|
||||
"Human: Thank you.\n",
|
||||
"AI: You're welcome!\n",
|
||||
"Human: Do you know of online brands like Photoshop and Freq that you don't have to download in some sort of way? Do you know of online brands like Photoshop and Freq that you don't have to download in some sort of way?\n",
|
||||
"AI: Yes, there are several online brands that offer photo editing and other creative tools without the need to download any software. Adobe Photoshop Express, Pixlr, and Fotor are some of the most popular online photo editing tools. Freq is an online music production platform that allows users to create and share music without downloading any software.\n",
|
||||
"Human: Our whole process of awesome is free.\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" That's great! It's always nice to have access to free tools and resources.\n",
|
||||
"listening now...\n",
|
||||
"Recognizing...\n",
|
||||
" No, I meant to ask, are those options that you mentioned free? No, I meant to ask, are those options that you mentioned free?\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Assistant is aware that human input is being transcribed from audio and as such there may be some errors in the transcription. It will attempt to account for some words being swapped with similar-sounding words or phrases. Assistant will also keep responses concise, because human attention spans are more limited over the audio channel since it takes time to listen to a response.\n",
|
||||
"\n",
|
||||
"Human: Do you know of online brands like Photoshop and Freq that you don't have to download in some sort of way? Do you know of online brands like Photoshop and Freq that you don't have to download in some sort of way?\n",
|
||||
"AI: Yes, there are several online brands that offer photo editing and other creative tools without the need to download any software. Adobe Photoshop Express, Pixlr, and Fotor are some of the most popular online photo editing tools. Freq is an online music production platform that allows users to create and share music without downloading any software.\n",
|
||||
"Human: Our whole process of awesome is free.\n",
|
||||
"AI: That's great! It's always nice to have access to free tools and resources.\n",
|
||||
"Human: No, I meant to ask, are those options that you mentioned free? No, I meant to ask, are those options that you mentioned free?\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" Yes, the online brands I mentioned are all free to use. Adobe Photoshop Express, Pixlr, and Fotor are all free to use, and Freq is a free music production platform.\n",
|
||||
"listening now...\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "KeyboardInterrupt",
|
||||
"evalue": "",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[1;32mIn[6], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m listen(\u001b[39mNone\u001b[39;49;00m)\n",
|
||||
"Cell \u001b[1;32mIn[5], line 20\u001b[0m, in \u001b[0;36mlisten\u001b[1;34m(command_queue)\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mlistening now...\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m 19\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m---> 20\u001b[0m audio \u001b[39m=\u001b[39m r\u001b[39m.\u001b[39;49mlisten(source, timeout\u001b[39m=\u001b[39;49m\u001b[39m5\u001b[39;49m, phrase_time_limit\u001b[39m=\u001b[39;49m\u001b[39m30\u001b[39;49m)\n\u001b[0;32m 21\u001b[0m \u001b[39m# audio = r.record(source,duration = 5)\u001b[39;00m\n\u001b[0;32m 22\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mRecognizing...\u001b[39m\u001b[39m'\u001b[39m)\n",
|
||||
"File \u001b[1;32mc:\\ProgramData\\miniconda3\\envs\\lang\\lib\\site-packages\\speech_recognition\\__init__.py:523\u001b[0m, in \u001b[0;36mRecognizer.listen\u001b[1;34m(self, source, timeout, phrase_time_limit, snowboy_configuration)\u001b[0m\n\u001b[0;32m 520\u001b[0m \u001b[39mif\u001b[39;00m phrase_time_limit \u001b[39mand\u001b[39;00m elapsed_time \u001b[39m-\u001b[39m phrase_start_time \u001b[39m>\u001b[39m phrase_time_limit:\n\u001b[0;32m 521\u001b[0m \u001b[39mbreak\u001b[39;00m\n\u001b[1;32m--> 523\u001b[0m buffer \u001b[39m=\u001b[39m source\u001b[39m.\u001b[39;49mstream\u001b[39m.\u001b[39;49mread(source\u001b[39m.\u001b[39;49mCHUNK)\n\u001b[0;32m 524\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(buffer) \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m: \u001b[39mbreak\u001b[39;00m \u001b[39m# reached end of the stream\u001b[39;00m\n\u001b[0;32m 525\u001b[0m frames\u001b[39m.\u001b[39mappend(buffer)\n",
|
||||
"File \u001b[1;32mc:\\ProgramData\\miniconda3\\envs\\lang\\lib\\site-packages\\speech_recognition\\__init__.py:199\u001b[0m, in \u001b[0;36mMicrophone.MicrophoneStream.read\u001b[1;34m(self, size)\u001b[0m\n\u001b[0;32m 198\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mread\u001b[39m(\u001b[39mself\u001b[39m, size):\n\u001b[1;32m--> 199\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mpyaudio_stream\u001b[39m.\u001b[39;49mread(size, exception_on_overflow\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m)\n",
|
||||
"File \u001b[1;32mc:\\ProgramData\\miniconda3\\envs\\lang\\lib\\site-packages\\pyaudio\\__init__.py:570\u001b[0m, in \u001b[0;36mPyAudio.Stream.read\u001b[1;34m(self, num_frames, exception_on_overflow)\u001b[0m\n\u001b[0;32m 567\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_is_input:\n\u001b[0;32m 568\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mIOError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mNot input stream\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[0;32m 569\u001b[0m paCanNotReadFromAnOutputOnlyStream)\n\u001b[1;32m--> 570\u001b[0m \u001b[39mreturn\u001b[39;00m pa\u001b[39m.\u001b[39;49mread_stream(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_stream, num_frames,\n\u001b[0;32m 571\u001b[0m exception_on_overflow)\n",
|
||||
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"listen(None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "lang",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.10"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -40,8 +40,24 @@
|
||||
"from langchain.vectorstores import DeepLake\n",
|
||||
"\n",
|
||||
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')\n",
|
||||
"os.environ['ACTIVELOOP_TOKEN'] = getpass.getpass('Activeloop Token:')\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
"os.environ['ACTIVELOOP_TOKEN'] = getpass.getpass('Activeloop Token:')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = OpenAIEmbeddings(disallowed_special=())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"disallowed_special=() is required to avoid `Exception: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte` from tiktoken for some repositories"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -120,7 +136,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = DeepLake.from_documents(texts, embeddings, dataset_path=\"hub://davitbun/twitter-algorithm\")"
|
||||
"username = \"davitbun\" # replace with your username from app.activeloop.ai\n",
|
||||
"db = DeepLake(dataset_path=f\"hub://{username}/twitter-algorithm\", embedding_function=embeddings, public=True) #dataset would be publicly available\n",
|
||||
"db.add_documents(texts)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -133,61 +151,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/twitter-algorithm\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"hub://davitbun/twitter-algorithm loaded successfully.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Deep Lake Dataset in hub://davitbun/twitter-algorithm already exists, loading from the storage\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://davitbun/twitter-algorithm', read_only=True, tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (23152, 1536) float32 None \n",
|
||||
" ids text (23152, 1) str None \n",
|
||||
" metadata json (23152, 1) str None \n",
|
||||
" text text (23152, 1) str None \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = DeepLake(dataset_path=\"hub://davitbun/twitter-algorithm\", read_only=True, embedding_function=embeddings)"
|
||||
]
|
||||
@@ -203,7 +169,7 @@
|
||||
"retriever.search_kwargs['distance_metric'] = 'cos'\n",
|
||||
"retriever.search_kwargs['fetch_k'] = 100\n",
|
||||
"retriever.search_kwargs['maximal_marginal_relevance'] = True\n",
|
||||
"retriever.search_kwargs['k'] = 20"
|
||||
"retriever.search_kwargs['k'] = 10"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -241,7 +207,7 @@
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(model='gpt-4') # 'gpt-3.5-turbo',\n",
|
||||
"model = ChatOpenAI(model='gpt-3.5-turbo') # switch to 'gpt-4'\n",
|
||||
"qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -43,22 +43,9 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found cached dataset json (/Users/harrisonchase/.cache/huggingface/datasets/LangChainDatasets___json/LangChainDatasets--agent-vectordb-qa-sota-pg-d3ae24016b514f92/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n"
|
||||
"Found cached dataset json (/Users/qt/.cache/huggingface/datasets/LangChainDatasets___json/LangChainDatasets--agent-vectordb-qa-sota-pg-d3ae24016b514f92/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e)\n",
|
||||
"100%|██████████| 1/1 [00:00<00:00, 414.42it/s]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "4c389519842e4b65afc33006a531dcbc",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/1 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@@ -146,16 +133,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 12,
|
||||
"id": "ef84ff99",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Running Chroma using direct local API.\n",
|
||||
"Using DuckDB in-memory for database. Data will be transient.\n"
|
||||
"Using embedded DuckDB without persistence: data will be transient\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -173,7 +159,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 13,
|
||||
"id": "8843cb0c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -184,12 +170,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 16,
|
||||
"id": "573719a0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain_sota = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0), chain_type=\"stuff\", retriever=vectorstore_sota, input_key=\"question\")\n"
|
||||
"chain_sota = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0), chain_type=\"stuff\", retriever=vectorstore_sota.as_retriever(), input_key=\"question\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -202,7 +188,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 17,
|
||||
"id": "c2dbb014",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -212,16 +198,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 19,
|
||||
"id": "98d16f08",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Running Chroma using direct local API.\n",
|
||||
"Using DuckDB in-memory for database. Data will be transient.\n"
|
||||
"Using embedded DuckDB without persistence: data will be transient\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -231,12 +216,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 20,
|
||||
"id": "ec0aab02",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain_pg = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0), chain_type=\"stuff\", retriever=vectorstore_pg, input_key=\"question\")\n"
|
||||
"chain_pg = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0), chain_type=\"stuff\", retriever=vectorstore_pg.as_retriever(), input_key=\"question\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -249,7 +234,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 22,
|
||||
"id": "ade1aafa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -272,12 +257,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 34,
|
||||
"id": "104853f8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = initialize_agent(tools, OpenAI(temperature=0), agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, max_iterations=3)"
|
||||
"agent = initialize_agent(tools, OpenAI(temperature=0), agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, max_iterations=4)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -292,17 +277,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 35,
|
||||
"id": "4664e79f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The purpose of the NATO Alliance is to promote peace and security in the North Atlantic region by providing a collective defense against potential threats.'"
|
||||
"'The purpose of the NATO Alliance is to secure peace and stability in Europe after World War 2.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -322,7 +307,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 36,
|
||||
"id": "799f6c17",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -350,10 +335,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 37,
|
||||
"id": "1d583f03",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'What is the purpose of the NATO Alliance?',\n",
|
||||
" 'answer': 'The purpose of the NATO Alliance is to secure peace and stability in Europe after World War 2.',\n",
|
||||
" 'output': 'The purpose of the NATO Alliance is to secure peace and stability in Europe after World War 2.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"predictions[0]"
|
||||
]
|
||||
@@ -368,7 +366,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 38,
|
||||
"id": "d0a9341d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -378,7 +376,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"execution_count": 39,
|
||||
"id": "1612dec1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -398,7 +396,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"execution_count": 40,
|
||||
"id": "2a689df5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -409,17 +407,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"execution_count": 41,
|
||||
"id": "27b61215",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Counter({' CORRECT': 19, ' INCORRECT': 14})"
|
||||
"Counter({' CORRECT': 28, ' INCORRECT': 5})"
|
||||
]
|
||||
},
|
||||
"execution_count": 42,
|
||||
"execution_count": 41,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -439,7 +437,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 42,
|
||||
"id": "47c692a1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -449,20 +447,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"execution_count": 43,
|
||||
"id": "0ef976c1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'What is the purpose of the Bipartisan Innovation Act mentioned in the text?',\n",
|
||||
" 'answer': 'The Bipartisan Innovation Act will make record investments in emerging technologies and American manufacturing to level the playing field with China and other competitors.',\n",
|
||||
" 'output': 'The purpose of the Bipartisan Innovation Act is to promote innovation and entrepreneurship in the United States by providing tax incentives and other support for startups and small businesses.',\n",
|
||||
"{'input': 'What are the four common sense steps that the author suggests to move forward safely?',\n",
|
||||
" 'answer': 'The four common sense steps suggested by the author to move forward safely are: stay protected with vaccines and treatments, prepare for new variants, end the shutdown of schools and businesses, and stay vigilant.',\n",
|
||||
" 'output': 'The four common sense steps suggested in the most recent State of the Union address are: cutting the cost of prescription drugs, providing a pathway to citizenship for Dreamers, revising laws so businesses have the workers they need and families don’t wait decades to reunite, and protecting access to health care and preserving a woman’s right to choose.',\n",
|
||||
" 'grade': ' INCORRECT'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 46,
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -470,14 +468,6 @@
|
||||
"source": [
|
||||
"incorrect[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7710401a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -496,7 +486,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.9.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -20,3 +20,4 @@ Highlighting specific parts:
|
||||
Specific examples of this include:
|
||||
|
||||
- [AI Plugins](agents/custom_agent_with_plugin_retrieval.ipynb): an implementation of an agent that is designed to be able to use all AI Plugins.
|
||||
- [Wikibase Agent](agents/wikibase_agent.ipynb): an implementation of an agent that is designed to interact with Wikibase.
|
||||
|
||||
@@ -108,7 +108,7 @@
|
||||
"\n",
|
||||
"dataset_path = 'hub://'+org+'/data'\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"db = DeepLake.from_documents(texts, embeddings, dataset_path=dataset_path)"
|
||||
"db = DeepLake.from_documents(texts, embeddings, dataset_path=dataset_path, overwrite=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
102
docs/youtube.md
Normal file
102
docs/youtube.md
Normal file
@@ -0,0 +1,102 @@
|
||||
# YouTube
|
||||
|
||||
This is a collection of `LangChain` tutorials and videos on `YouTube`.
|
||||
|
||||
### Introduction to LangChain with Harrison Chase, creator of LangChain
|
||||
- [Building the Future with LLMs, `LangChain`, & `Pinecone`](https://youtu.be/nMniwlGyX-c) by [Pinecone](https://www.youtube.com/@pinecone-io)
|
||||
- [LangChain and Weaviate with Harrison Chase and Bob van Luijt - Weaviate Podcast #36](https://youtu.be/lhby7Ql7hbk) by [Weaviate • Vector Database](https://www.youtube.com/@Weaviate)
|
||||
- [LangChain Demo + Q&A with Harrison Chase](https://youtu.be/zaYTXQFR0_s?t=788) by [Full Stack Deep Learning](https://www.youtube.com/@FullStackDeepLearning)
|
||||
|
||||
## Tutorials
|
||||
|
||||
- [LangChain Crash Course - Build apps with language models](https://youtu.be/LbT1yp6quS8) by [Patrick Loeber](https://www.youtube.com/@patloeber)
|
||||
|
||||
- [LangChain for Gen AI and LLMs](https://www.youtube.com/playlist?list=PLIUOU7oqGTLieV9uTIFMm6_4PXg-hlN6F) by [James Briggs](https://www.youtube.com/@jamesbriggs):
|
||||
- #1 [Getting Started with `GPT-3` vs. Open Source LLMs](https://youtu.be/nE2skSRWTTs)
|
||||
- #2 [Prompt Templates for `GPT 3.5` and other LLMs](https://youtu.be/RflBcK0oDH0)
|
||||
- #3 [LLM Chains using `GPT 3.5` and other LLMs](https://youtu.be/S8j9Tk0lZHU)
|
||||
- #4 [Chatbot Memory for `Chat-GPT`, `Davinci` + other LLMs](https://youtu.be/X05uK0TZozM)
|
||||
- #5 [Chat with OpenAI in LangChain](https://youtu.be/CnAgB3A5OlU)
|
||||
- #6 [LangChain Agents Deep Dive with `GPT 3.5`](https://youtu.be/jSP-gSEyVeI)
|
||||
- [Prompt Engineering with OpenAI's GPT-3 and other LLMs](https://youtu.be/BP9fi_0XTlw)
|
||||
|
||||
- [LangChain 101](https://www.youtube.com/playlist?list=PLqZXAkvF1bPNQER9mLmDbntNfSpzdDIU5) by [Data Independent](https://www.youtube.com/@DataIndependent):
|
||||
- [What Is LangChain? - LangChain + `ChatGPT` Overview](https://youtu.be/_v_fgW2SkkQ)
|
||||
- [Quickstart Guide](https://youtu.be/kYRB-vJFy38)
|
||||
- [Beginner Guide To 7 Essential Concepts](https://youtu.be/2xxziIWmaSA)
|
||||
- [`OpenAI` + `Wolfram Alpha`](https://youtu.be/UijbzCIJ99g)
|
||||
- [Ask Questions On Your Custom (or Private) Files](https://youtu.be/EnT-ZTrcPrg)
|
||||
- [Connect `Google Drive Files` To `OpenAI`](https://youtu.be/IqqHqDcXLww)
|
||||
- [`YouTube Transcripts` + `OpenAI`](https://youtu.be/pNcQ5XXMgH4)
|
||||
- [Question A 300 Page Book (w/ `OpenAI` + `Pinecone`)](https://youtu.be/h0DHDp1FbmQ)
|
||||
- [Workaround `OpenAI's` Token Limit With Chain Types](https://youtu.be/f9_BWhCI4Zo)
|
||||
- [Build Your Own OpenAI + LangChain Web App in 23 Minutes](https://youtu.be/U_eV8wfMkXU)
|
||||
- [Working With The New `ChatGPT API`](https://youtu.be/e9P7FLi5Zy8)
|
||||
- [OpenAI + LangChain Wrote Me 100 Custom Sales Emails](https://youtu.be/y1pyAQM-3Bo)
|
||||
- [Structured Output From `OpenAI` (Clean Dirty Data)](https://youtu.be/KwAXfey-xQk)
|
||||
- [Connect `OpenAI` To +5,000 Tools (LangChain + `Zapier`)](https://youtu.be/7tNm0yiDigU)
|
||||
- [Use LLMs To Extract Data From Text (Expert Mode)](https://youtu.be/xZzvwR9jdPA)
|
||||
|
||||
- [LangChain How to and guides](https://www.youtube.com/playlist?list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ) by [Sam Witteveen](https://www.youtube.com/@samwitteveenai):
|
||||
- [LangChain Basics - LLMs & PromptTemplates with Colab](https://youtu.be/J_0qvRt4LNk)
|
||||
- [LangChain Basics - Tools and Chains](https://youtu.be/hI2BY7yl_Ac)
|
||||
- [`ChatGPT API` Announcement & Code Walkthrough with LangChain](https://youtu.be/phHqvLHCwH4)
|
||||
- [Conversations with Memory (explanation & code walkthrough)](https://youtu.be/X550Zbz_ROE)
|
||||
- [Chat with `Flan20B`](https://youtu.be/VW5LBavIfY4)
|
||||
- [Using `Hugging Face Models` locally (code walkthrough)](https://youtu.be/Kn7SX2Mx_Jk)
|
||||
- [`PAL` : Program-aided Language Models with LangChain code](https://youtu.be/dy7-LvDu-3s)
|
||||
- [Building a Summarization System with LangChain and `GPT-3` - Part 1](https://youtu.be/LNq_2s_H01Y)
|
||||
- [Building a Summarization System with LangChain and `GPT-3` - Part 2](https://youtu.be/d-yeHDLgKHw)
|
||||
- [Microsoft's `Visual ChatGPT` using LangChain](https://youtu.be/7YEiEyfPF5U)
|
||||
- [LangChain Agents - Joining Tools and Chains with Decisions](https://youtu.be/ziu87EXZVUE)
|
||||
- [Comparing LLMs with LangChain](https://youtu.be/rFNG0MIEuW0)
|
||||
- [Using `Constitutional AI` in LangChain](https://youtu.be/uoVqNFDwpX4)
|
||||
- [Talking to `Alpaca` with LangChain - Creating an Alpaca Chatbot](https://youtu.be/v6sF8Ed3nTE)
|
||||
- [Talk to your `CSV` & `Excel` with LangChain](https://youtu.be/xQ3mZhw69bc)
|
||||
- [`BabyAGI`: Discover the Power of Task-Driven Autonomous Agents!](https://youtu.be/QBcDLSE2ERA)
|
||||
- [Improve your `BabyAGI` with LangChain](https://youtu.be/DRgPyOXZ-oE)
|
||||
|
||||
- [LangChain](https://www.youtube.com/playlist?list=PLVEEucA9MYhOu89CX8H3MBZqayTbcCTMr) by [Prompt Engineering](https://www.youtube.com/@engineerprompt):
|
||||
- [LangChain Crash Course - All You Need to Know to Build Powerful Apps with LLMs](https://youtu.be/5-fc4Tlgmro)
|
||||
- [Working with MULTIPLE `PDF` Files in LangChain: `ChatGPT` for your Data](https://youtu.be/s5LhRdh5fu4)
|
||||
- [`ChatGPT` for YOUR OWN `PDF` files with LangChain](https://youtu.be/TLf90ipMzfE)
|
||||
- [Talk to YOUR DATA without OpenAI APIs: LangChain](https://youtu.be/wrD-fZvT6UI)
|
||||
|
||||
- LangChain by [Chat with data](https://www.youtube.com/@chatwithdata)
|
||||
- [LangChain Beginner's Tutorial for `Typescript`/`Javascript`](https://youtu.be/bH722QgRlhQ)
|
||||
- [`GPT-4` Tutorial: How to Chat With Multiple `PDF` Files (~1000 pages of Tesla's 10-K Annual Reports)](https://youtu.be/Ix9WIZpArm0)
|
||||
- [`GPT-4` & LangChain Tutorial: How to Chat With A 56-Page `PDF` Document (w/`Pinecone`)](https://youtu.be/ih9PBGVVOO4)
|
||||
|
||||
## Videos (sorted by views)
|
||||
|
||||
- [Building AI LLM Apps with LangChain (and more?) - LIVE STREAM](https://www.youtube.com/live/M-2Cj_2fzWI?feature=share) by [Nicholas Renotte](https://www.youtube.com/@NicholasRenotte)
|
||||
- [First look - `ChatGPT` + `WolframAlpha` (`GPT-3.5` and Wolfram|Alpha via LangChain by James Weaver)](https://youtu.be/wYGbY811oMo) by [Dr Alan D. Thompson](https://www.youtube.com/@DrAlanDThompson)
|
||||
- [LangChain explained - The hottest new Python framework](https://youtu.be/RoR4XJw8wIc) by [AssemblyAI](https://www.youtube.com/@AssemblyAI)
|
||||
- [Chatbot with INFINITE MEMORY using `OpenAI` & `Pinecone` - `GPT-3`, `Embeddings`, `ADA`, `Vector DB`, `Semantic`](https://youtu.be/2xNzB7xq8nk) by [David Shapiro ~ AI](https://www.youtube.com/@DavidShapiroAutomator)
|
||||
- [LangChain for LLMs is... basically just an Ansible playbook](https://youtu.be/X51N9C-OhlE) by [David Shapiro ~ AI](https://www.youtube.com/@DavidShapiroAutomator)
|
||||
- [Build your own LLM Apps with LangChain & `GPT-Index`](https://youtu.be/-75p09zFUJY) by [1littlecoder](https://www.youtube.com/@1littlecoder)
|
||||
- [`BabyAGI` - New System of Autonomous AI Agents with LangChain](https://youtu.be/lg3kJvf1kXo) by [1littlecoder](https://www.youtube.com/@1littlecoder)
|
||||
- [Run `BabyAGI` with Langchain Agents (with Python Code)](https://youtu.be/WosPGHPObx8) by [1littlecoder](https://www.youtube.com/@1littlecoder)
|
||||
- [How to Use Langchain With `Zapier` | Write and Send Email with GPT-3 | OpenAI API Tutorial](https://youtu.be/p9v2-xEa9A0) by [StarMorph AI](https://www.youtube.com/@starmorph)
|
||||
- [Use Your Locally Stored Files To Get Response From GPT - `OpenAI` | Langchain | Python](https://youtu.be/NC1Ni9KS-rk) by [Shweta Lodha](https://www.youtube.com/@shweta-lodha)
|
||||
- [`Langchain JS` | How to Use GPT-3, GPT-4 to Reference your own Data | `OpenAI Embeddings` Intro](https://youtu.be/veV2I-NEjaM) by [StarMorph AI](https://www.youtube.com/@starmorph)
|
||||
- [The easiest way to work with large language models | Learn LangChain in 10min](https://youtu.be/kmbS6FDQh7c) by [Sophia Yang](https://www.youtube.com/@SophiaYangDS)
|
||||
- [4 Autonomous AI Agents: “Westworld” simulation `BabyAGI`, `AutoGPT`, `Camel`, `LangChain`](https://youtu.be/yWbnH6inT_U) by [Sophia Yang](https://www.youtube.com/@SophiaYangDS)
|
||||
- [AI CAN SEARCH THE INTERNET? Langchain Agents + OpenAI ChatGPT](https://youtu.be/J-GL0htqda8) by [tylerwhatsgood](https://www.youtube.com/@tylerwhatsgood)
|
||||
- [`Weaviate` + LangChain for LLM apps presented by Erika Cardenas](https://youtu.be/7AGj4Td5Lgw) by [`Weaviate` • Vector Database](https://www.youtube.com/@Weaviate)
|
||||
- [Analyze Custom `CSV` Data with `GPT-4` using Langchain](https://youtu.be/Ew3sGdX8at4) by [Venelin Valkov](https://www.youtube.com/@venelin_valkov)
|
||||
- [Langchain Overview - How to Use Langchain & `ChatGPT`](https://youtu.be/oYVYIq0lOtI) by [Python In Office](https://www.youtube.com/@pythoninoffice6568)
|
||||
- [Custom langchain Agent & Tools with memory. Turn any `Python function` into langchain tool with Gpt 3](https://youtu.be/NIG8lXk0ULg) by [echohive](https://www.youtube.com/@echohive)
|
||||
- [`ChatGPT` with any `YouTube` video using langchain and `chromadb`](https://youtu.be/TQZfB2bzVwU) by [echohive](https://www.youtube.com/@echohive)
|
||||
- [How to Talk to a `PDF` using LangChain and `ChatGPT`](https://youtu.be/v2i1YDtrIwk) by [Automata Learning Lab](https://www.youtube.com/@automatalearninglab)
|
||||
- [Langchain Document Loaders Part 1: Unstructured Files](https://youtu.be/O5C0wfsen98) by [Merk](https://www.youtube.com/@merksworld)
|
||||
- [LangChain - Prompt Templates (what all the best prompt engineers use)](https://youtu.be/1aRu8b0XNOQ) by [Nick Daigler](https://www.youtube.com/@nick_daigs)
|
||||
- [LangChain. Crear aplicaciones Python impulsadas por GPT](https://youtu.be/DkW_rDndts8) by [Jesús Conde](https://www.youtube.com/@0utKast)
|
||||
- [Easiest Way to Use GPT In Your Products | LangChain Basics Tutorial](https://youtu.be/fLy0VenZyGc) by [Rachel Woods](https://www.youtube.com/@therachelwoods)
|
||||
- [`BabyAGI` + `GPT-4` Langchain Agent with Internet Access](https://youtu.be/wx1z_hs5P6E) by [tylerwhatsgood](https://www.youtube.com/@tylerwhatsgood)
|
||||
- [Learning LLM Agents. How does it actually work? LangChain, AutoGPT & OpenAI](https://youtu.be/mb_YAABSplk) by [Arnoldas Kemeklis](https://www.youtube.com/@processusAI)
|
||||
- [Get Started with LangChain in `Node.js`](https://youtu.be/Wxx1KUWJFv4) by [Developers Digest](https://www.youtube.com/@DevelopersDigest)
|
||||
- [LangChain + `OpenAI` tutorial: Building a Q&A system w/ own text data](https://youtu.be/DYOU_Z0hAwo) by [Samuel Chan](https://www.youtube.com/@SamuelChan)
|
||||
- [Langchain + `Zapier` Agent](https://youtu.be/yribLAb-pxA) by [Merk](https://www.youtube.com/@merksworld)
|
||||
- [Connecting the Internet with `ChatGPT` (LLMs) using Langchain And Answers Your Questions](https://youtu.be/9Y0TBC63yZg) by [Kamalraj M M](https://www.youtube.com/@insightbuilder)
|
||||
- [Build More Powerful LLM Applications for Business’s with LangChain (Beginners Guide)](https://youtu.be/sp3-WLKEcBg) by[ No Code Blackbox](https://www.youtube.com/@nocodeblackbox)
|
||||
@@ -47,8 +47,10 @@ from langchain.prompts import (
|
||||
PromptTemplate,
|
||||
)
|
||||
from langchain.sql_database import SQLDatabase
|
||||
from langchain.utilities import ArxivAPIWrapper
|
||||
from langchain.utilities.google_search import GoogleSearchAPIWrapper
|
||||
from langchain.utilities.google_serper import GoogleSerperAPIWrapper
|
||||
from langchain.utilities.powerbi import PowerBIDataset
|
||||
from langchain.utilities.searx_search import SearxSearchWrapper
|
||||
from langchain.utilities.serpapi import SerpAPIWrapper
|
||||
from langchain.utilities.wikipedia import WikipediaAPIWrapper
|
||||
@@ -74,6 +76,7 @@ __all__ = [
|
||||
"LLMBashChain",
|
||||
"LLMCheckerChain",
|
||||
"LLMMathChain",
|
||||
"ArxivAPIWrapper",
|
||||
"SelfAskWithSearchChain",
|
||||
"SerpAPIWrapper",
|
||||
"SerpAPIChain",
|
||||
@@ -104,6 +107,7 @@ __all__ = [
|
||||
"HuggingFacePipeline",
|
||||
"SQLDatabase",
|
||||
"SQLDatabaseChain",
|
||||
"PowerBIDataset",
|
||||
"FAISS",
|
||||
"MRKLChain",
|
||||
"VectorDBQA",
|
||||
|
||||
@@ -12,6 +12,8 @@ from langchain.agents.agent_toolkits import (
|
||||
create_json_agent,
|
||||
create_openapi_agent,
|
||||
create_pandas_dataframe_agent,
|
||||
create_pbi_agent,
|
||||
create_pbi_chat_agent,
|
||||
create_sql_agent,
|
||||
create_vectorstore_agent,
|
||||
create_vectorstore_router_agent,
|
||||
@@ -44,6 +46,8 @@ __all__ = [
|
||||
"ConversationalChatAgent",
|
||||
"load_agent",
|
||||
"create_sql_agent",
|
||||
"create_pbi_agent",
|
||||
"create_pbi_chat_agent",
|
||||
"create_json_agent",
|
||||
"create_openapi_agent",
|
||||
"create_vectorstore_router_agent",
|
||||
|
||||
@@ -8,6 +8,9 @@ from langchain.agents.agent_toolkits.nla.toolkit import NLAToolkit
|
||||
from langchain.agents.agent_toolkits.openapi.base import create_openapi_agent
|
||||
from langchain.agents.agent_toolkits.openapi.toolkit import OpenAPIToolkit
|
||||
from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent
|
||||
from langchain.agents.agent_toolkits.powerbi.base import create_pbi_agent
|
||||
from langchain.agents.agent_toolkits.powerbi.chat_base import create_pbi_chat_agent
|
||||
from langchain.agents.agent_toolkits.powerbi.toolkit import PowerBIToolkit
|
||||
from langchain.agents.agent_toolkits.python.base import create_python_agent
|
||||
from langchain.agents.agent_toolkits.sql.base import create_sql_agent
|
||||
from langchain.agents.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
|
||||
@@ -26,11 +29,14 @@ __all__ = [
|
||||
"create_json_agent",
|
||||
"create_sql_agent",
|
||||
"create_openapi_agent",
|
||||
"create_pbi_agent",
|
||||
"create_pbi_chat_agent",
|
||||
"create_python_agent",
|
||||
"create_vectorstore_agent",
|
||||
"JsonToolkit",
|
||||
"SQLDatabaseToolkit",
|
||||
"NLAToolkit",
|
||||
"PowerBIToolkit",
|
||||
"OpenAPIToolkit",
|
||||
"VectorStoreToolkit",
|
||||
"create_vectorstore_router_agent",
|
||||
|
||||
1
langchain/agents/agent_toolkits/powerbi/__init__.py
Normal file
1
langchain/agents/agent_toolkits/powerbi/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Power BI agent."""
|
||||
62
langchain/agents/agent_toolkits/powerbi/base.py
Normal file
62
langchain/agents/agent_toolkits/powerbi/base.py
Normal file
@@ -0,0 +1,62 @@
|
||||
"""Power BI agent."""
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.agents import AgentExecutor
|
||||
from langchain.agents.agent_toolkits.powerbi.prompt import (
|
||||
POWERBI_PREFIX,
|
||||
POWERBI_SUFFIX,
|
||||
)
|
||||
from langchain.agents.agent_toolkits.powerbi.toolkit import PowerBIToolkit
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.utilities.powerbi import PowerBIDataset
|
||||
|
||||
|
||||
def create_pbi_agent(
|
||||
llm: BaseLLM,
|
||||
toolkit: Optional[PowerBIToolkit],
|
||||
powerbi: Optional[PowerBIDataset] = None,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
prefix: str = POWERBI_PREFIX,
|
||||
suffix: str = POWERBI_SUFFIX,
|
||||
format_instructions: str = FORMAT_INSTRUCTIONS,
|
||||
examples: Optional[str] = None,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
top_k: int = 10,
|
||||
verbose: bool = False,
|
||||
agent_kwargs: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Dict[str, Any],
|
||||
) -> AgentExecutor:
|
||||
"""Construct a pbi agent from an LLM and tools."""
|
||||
if toolkit is None:
|
||||
if powerbi is None:
|
||||
raise ValueError("Must provide either a toolkit or powerbi dataset")
|
||||
toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples)
|
||||
tools = toolkit.get_tools()
|
||||
|
||||
agent = ZeroShotAgent(
|
||||
llm_chain=LLMChain(
|
||||
llm=llm,
|
||||
prompt=ZeroShotAgent.create_prompt(
|
||||
tools,
|
||||
prefix=prefix.format(top_k=top_k),
|
||||
suffix=suffix,
|
||||
format_instructions=format_instructions,
|
||||
input_variables=input_variables,
|
||||
),
|
||||
callback_manager=callback_manager, # type: ignore
|
||||
verbose=verbose,
|
||||
),
|
||||
allowed_tools=[tool.name for tool in tools],
|
||||
**(agent_kwargs or {}),
|
||||
)
|
||||
return AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
verbose=verbose,
|
||||
**kwargs,
|
||||
)
|
||||
60
langchain/agents/agent_toolkits/powerbi/chat_base.py
Normal file
60
langchain/agents/agent_toolkits/powerbi/chat_base.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""Power BI agent."""
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.agents import AgentExecutor
|
||||
from langchain.agents.agent_toolkits.powerbi.prompt import (
|
||||
POWERBI_CHAT_PREFIX,
|
||||
POWERBI_CHAT_SUFFIX,
|
||||
)
|
||||
from langchain.agents.agent_toolkits.powerbi.toolkit import PowerBIToolkit
|
||||
from langchain.agents.conversational_chat.base import ConversationalChatAgent
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chat_models.base import BaseChatModel
|
||||
from langchain.memory import ConversationBufferMemory
|
||||
from langchain.memory.chat_memory import BaseChatMemory
|
||||
from langchain.utilities.powerbi import PowerBIDataset
|
||||
|
||||
|
||||
def create_pbi_chat_agent(
|
||||
llm: BaseChatModel,
|
||||
toolkit: Optional[PowerBIToolkit],
|
||||
powerbi: Optional[PowerBIDataset] = None,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
prefix: str = POWERBI_CHAT_PREFIX,
|
||||
suffix: str = POWERBI_CHAT_SUFFIX,
|
||||
examples: Optional[str] = None,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
memory: Optional[BaseChatMemory] = None,
|
||||
top_k: int = 10,
|
||||
verbose: bool = False,
|
||||
agent_kwargs: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Dict[str, Any],
|
||||
) -> AgentExecutor:
|
||||
"""Construct a pbi agent from an Chat LLM and tools.
|
||||
|
||||
If you supply only a toolkit and no powerbi dataset, the same LLM is used for both.
|
||||
"""
|
||||
if toolkit is None:
|
||||
if powerbi is None:
|
||||
raise ValueError("Must provide either a toolkit or powerbi dataset")
|
||||
toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples)
|
||||
tools = toolkit.get_tools()
|
||||
agent = ConversationalChatAgent.from_llm_and_tools(
|
||||
llm=llm,
|
||||
tools=tools,
|
||||
system_message=prefix.format(top_k=top_k),
|
||||
user_message=suffix,
|
||||
input_variables=input_variables,
|
||||
callback_manager=callback_manager,
|
||||
verbose=verbose,
|
||||
**(agent_kwargs or {}),
|
||||
)
|
||||
return AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
memory=memory
|
||||
or ConversationBufferMemory(memory_key="chat_history", return_messages=True),
|
||||
verbose=verbose,
|
||||
**kwargs,
|
||||
)
|
||||
48
langchain/agents/agent_toolkits/powerbi/prompt.py
Normal file
48
langchain/agents/agent_toolkits/powerbi/prompt.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# flake8: noqa
|
||||
"""Prompts for PowerBI agent."""
|
||||
|
||||
|
||||
POWERBI_PREFIX = """You are an agent designed to interact with a Power BI Dataset.
|
||||
Given an input question, create a syntactically correct DAX query to run, then look at the results of the query and return the answer.
|
||||
Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results.
|
||||
You can order the results by a relevant column to return the most interesting examples in the database.
|
||||
Never query for all the columns from a specific table, only ask for a the few relevant columns given the question.
|
||||
|
||||
You have access to tools for interacting with the Power BI Dataset. Only use the below tools. Only use the information returned by the below tools to construct your final answer. Usually I should first ask which tables I have, then how each table is defined and then ask the question to query tool to create a query for me and then I should ask the query tool to execute it, finally create a nice sentence that answers the question. If you receive an error back that mentions that the query was wrong try to phrase the question differently and get a new query from the question to query tool.
|
||||
|
||||
If the question does not seem related to the dataset, just return "I don't know" as the answer.
|
||||
"""
|
||||
|
||||
POWERBI_SUFFIX = """Begin!
|
||||
|
||||
Question: {input}
|
||||
Thought: I should first ask which tables I have, then how each table is defined and then ask the question to query tool to create a query for me and then I should ask the query tool to execute it, finally create a nice sentence that answers the question.
|
||||
{agent_scratchpad}"""
|
||||
|
||||
POWERBI_CHAT_PREFIX = """Assistant is a large language model trained by OpenAI built to help users interact with a PowerBI Dataset.
|
||||
|
||||
Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
|
||||
|
||||
Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.
|
||||
|
||||
Given an input question, create a syntactically correct DAX query to run, then look at the results of the query and return the answer. Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results. You can order the results by a relevant column to return the most interesting examples in the database.
|
||||
|
||||
Overall, Assistant is a powerful system that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.
|
||||
|
||||
Usually I should first ask which tables I have, then how each table is defined and then ask the question to query tool to create a query for me and then I should ask the query tool to execute it, finally create a complete sentence that answers the question. If you receive an error back that mentions that the query was wrong try to phrase the question differently and get a new query from the question to query tool.
|
||||
"""
|
||||
|
||||
POWERBI_CHAT_SUFFIX = """TOOLS
|
||||
------
|
||||
Assistant can ask the user to use tools to look up information that may be helpful in answering the users original question. The tools the human can use are:
|
||||
|
||||
{{tools}}
|
||||
|
||||
{format_instructions}
|
||||
|
||||
USER'S INPUT
|
||||
--------------------
|
||||
Here is the user's input (remember to respond with a markdown code snippet of a json blob with a single action, and NOTHING else):
|
||||
|
||||
{{{{input}}}}
|
||||
"""
|
||||
67
langchain/agents/agent_toolkits/powerbi/toolkit.py
Normal file
67
langchain/agents/agent_toolkits/powerbi/toolkit.py
Normal file
@@ -0,0 +1,67 @@
|
||||
"""Toolkit for interacting with a Power BI dataset."""
|
||||
from typing import List, Optional
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from langchain.agents.agent_toolkits.base import BaseToolkit
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.schema import BaseLanguageModel
|
||||
from langchain.tools import BaseTool
|
||||
from langchain.tools.powerbi.prompt import QUESTION_TO_QUERY
|
||||
from langchain.tools.powerbi.tool import (
|
||||
InfoPowerBITool,
|
||||
InputToQueryTool,
|
||||
ListPowerBITool,
|
||||
QueryPowerBITool,
|
||||
)
|
||||
from langchain.utilities.powerbi import PowerBIDataset
|
||||
|
||||
|
||||
class PowerBIToolkit(BaseToolkit):
|
||||
"""Toolkit for interacting with PowerBI dataset."""
|
||||
|
||||
powerbi: PowerBIDataset = Field(exclude=True)
|
||||
llm: BaseLanguageModel = Field(exclude=True)
|
||||
examples: Optional[str] = None
|
||||
callback_manager: Optional[BaseCallbackManager] = None
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
"""Get the tools in the toolkit."""
|
||||
if self.callback_manager:
|
||||
chain = (
|
||||
LLMChain(
|
||||
llm=self.llm,
|
||||
callback_manager=self.callback_manager,
|
||||
prompt=PromptTemplate(
|
||||
template=QUESTION_TO_QUERY,
|
||||
input_variables=["tool_input", "tables", "schemas", "examples"],
|
||||
),
|
||||
),
|
||||
)
|
||||
else:
|
||||
chain = (
|
||||
LLMChain(
|
||||
llm=self.llm,
|
||||
prompt=PromptTemplate(
|
||||
template=QUESTION_TO_QUERY,
|
||||
input_variables=["tool_input", "tables", "schemas", "examples"],
|
||||
),
|
||||
),
|
||||
)
|
||||
return [
|
||||
QueryPowerBITool(powerbi=self.powerbi),
|
||||
InfoPowerBITool(powerbi=self.powerbi),
|
||||
ListPowerBITool(powerbi=self.powerbi),
|
||||
InputToQueryTool(
|
||||
powerbi=self.powerbi,
|
||||
llm_chain=chain,
|
||||
examples=self.examples,
|
||||
),
|
||||
]
|
||||
@@ -11,6 +11,7 @@ from langchain.chains.llm_math.base import LLMMathChain
|
||||
from langchain.chains.pal.base import PALChain
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.requests import TextRequestsWrapper
|
||||
from langchain.tools.arxiv.tool import ArxivQueryRun
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.tools.bing_search.tool import BingSearchRun
|
||||
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
|
||||
@@ -26,6 +27,7 @@ from langchain.tools.requests.tool import (
|
||||
from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun
|
||||
from langchain.tools.wikipedia.tool import WikipediaQueryRun
|
||||
from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun
|
||||
from langchain.utilities import ArxivAPIWrapper
|
||||
from langchain.utilities.apify import ApifyWrapper
|
||||
from langchain.utilities.bash import BashProcess
|
||||
from langchain.utilities.bing_search import BingSearchAPIWrapper
|
||||
@@ -175,6 +177,10 @@ def _get_wikipedia(**kwargs: Any) -> BaseTool:
|
||||
return WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(**kwargs))
|
||||
|
||||
|
||||
def _get_arxiv(**kwargs: Any) -> BaseTool:
|
||||
return ArxivQueryRun(api_wrapper=ArxivAPIWrapper(**kwargs))
|
||||
|
||||
|
||||
def _get_google_serper(**kwargs: Any) -> BaseTool:
|
||||
return Tool(
|
||||
name="Serper Search",
|
||||
@@ -234,7 +240,7 @@ _EXTRA_OPTIONAL_TOOLS = {
|
||||
"google-serper": (_get_google_serper, ["serper_api_key"]),
|
||||
"serpapi": (_get_serpapi, ["serpapi_api_key", "aiosession"]),
|
||||
"searx-search": (_get_searx_search, ["searx_host", "engines", "aiosession"]),
|
||||
"wikipedia": (_get_wikipedia, ["top_k_results"]),
|
||||
"wikipedia": (_get_wikipedia, ["top_k_results", "lang"]),
|
||||
"human": (_get_human_tool, ["prompt_func", "input_func"]),
|
||||
}
|
||||
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
"""Interface for tools."""
|
||||
from functools import partial
|
||||
from inspect import signature
|
||||
from typing import Any, Awaitable, Callable, Optional, Type, Union
|
||||
|
||||
from pydantic import BaseModel, validate_arguments
|
||||
from pydantic import BaseModel, validate_arguments, validator
|
||||
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.tools.base import (
|
||||
BaseTool,
|
||||
create_schema_from_function,
|
||||
get_filtered_args,
|
||||
)
|
||||
|
||||
|
||||
class Tool(BaseTool):
|
||||
@@ -16,6 +21,21 @@ class Tool(BaseTool):
|
||||
coroutine: Optional[Callable[..., Awaitable[str]]] = None
|
||||
"""The asynchronous version of the function."""
|
||||
|
||||
@validator("func", pre=True, always=True)
|
||||
def validate_func_not_partial(cls, func: Callable) -> Callable:
|
||||
"""Check that the function is not a partial."""
|
||||
if isinstance(func, partial):
|
||||
raise ValueError("Partial functions not yet supported in tools.")
|
||||
return func
|
||||
|
||||
@property
|
||||
def args(self) -> dict:
|
||||
if self.args_schema is not None:
|
||||
return self.args_schema.schema()["properties"]
|
||||
else:
|
||||
inferred_model = validate_arguments(self.func).model # type: ignore
|
||||
return get_filtered_args(inferred_model, self.func)
|
||||
|
||||
def _run(self, *args: Any, **kwargs: Any) -> str:
|
||||
"""Use the tool."""
|
||||
return self.func(*args, **kwargs)
|
||||
@@ -94,7 +114,7 @@ def tool(
|
||||
description = f"{tool_name}{signature(func)} - {func.__doc__.strip()}"
|
||||
_args_schema = args_schema
|
||||
if _args_schema is None and infer_schema:
|
||||
_args_schema = validate_arguments(func).model # type: ignore
|
||||
_args_schema = create_schema_from_function(f"{tool_name}Schema", func)
|
||||
tool_ = Tool(
|
||||
name=tool_name,
|
||||
func=func,
|
||||
|
||||
@@ -130,7 +130,7 @@ class CometCallbackHandler(BaseMetadataCallbackHandler, BaseCallbackHandler):
|
||||
warning = (
|
||||
"The comet_ml callback is currently in beta and is subject to change "
|
||||
"based on updates to `langchain`. Please report any issues to "
|
||||
"https://github.com/comet-ml/issue_tracking/issues with the tag "
|
||||
"https://github.com/comet-ml/issue-tracking/issues with the tag "
|
||||
"`langchain`."
|
||||
)
|
||||
self.comet_ml.LOGGER.warning(warning)
|
||||
|
||||
@@ -40,8 +40,8 @@ class StuffDocumentsChain(BaseCombineDocumentsChain):
|
||||
@root_validator(pre=True)
|
||||
def get_default_document_variable_name(cls, values: Dict) -> Dict:
|
||||
"""Get default document variable name, if not provided."""
|
||||
llm_chain_variables = values["llm_chain"].prompt.input_variables
|
||||
if "document_variable_name" not in values:
|
||||
llm_chain_variables = values["llm_chain"].prompt.input_variables
|
||||
if len(llm_chain_variables) == 1:
|
||||
values["document_variable_name"] = llm_chain_variables[0]
|
||||
else:
|
||||
@@ -50,7 +50,6 @@ class StuffDocumentsChain(BaseCombineDocumentsChain):
|
||||
"multiple llm_chain_variables"
|
||||
)
|
||||
else:
|
||||
llm_chain_variables = values["llm_chain"].prompt.input_variables
|
||||
if values["document_variable_name"] not in llm_chain_variables:
|
||||
raise ValueError(
|
||||
f"document_variable_name {values['document_variable_name']} was "
|
||||
|
||||
@@ -15,16 +15,32 @@ from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.schema import BaseLanguageModel, BaseRetriever, Document
|
||||
from langchain.schema import BaseLanguageModel, BaseMessage, BaseRetriever, Document
|
||||
from langchain.vectorstores.base import VectorStore
|
||||
|
||||
# Depending on the memory type and configuration, the chat history format may differ.
|
||||
# This needs to be consolidated.
|
||||
CHAT_TURN_TYPE = Union[Tuple[str, str], BaseMessage]
|
||||
|
||||
def _get_chat_history(chat_history: List[Tuple[str, str]]) -> str:
|
||||
|
||||
_ROLE_MAP = {"human": "Human: ", "ai": "Assistant: "}
|
||||
|
||||
|
||||
def _get_chat_history(chat_history: List[CHAT_TURN_TYPE]) -> str:
|
||||
buffer = ""
|
||||
for human_s, ai_s in chat_history:
|
||||
human = "Human: " + human_s
|
||||
ai = "Assistant: " + ai_s
|
||||
buffer += "\n" + "\n".join([human, ai])
|
||||
for dialogue_turn in chat_history:
|
||||
if isinstance(dialogue_turn, BaseMessage):
|
||||
role_prefix = _ROLE_MAP.get(dialogue_turn.type, f"{dialogue_turn.type}: ")
|
||||
buffer += f"\n{role_prefix}{dialogue_turn.content}"
|
||||
elif isinstance(dialogue_turn, tuple):
|
||||
human = "Human: " + dialogue_turn[0]
|
||||
ai = "Assistant: " + dialogue_turn[1]
|
||||
buffer += "\n" + "\n".join([human, ai])
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unsupported chat history format: {type(dialogue_turn)}."
|
||||
f" Full chat history: {chat_history} "
|
||||
)
|
||||
return buffer
|
||||
|
||||
|
||||
@@ -35,7 +51,7 @@ class BaseConversationalRetrievalChain(Chain):
|
||||
question_generator: LLMChain
|
||||
output_key: str = "answer"
|
||||
return_source_documents: bool = False
|
||||
get_chat_history: Optional[Callable[[Tuple[str, str]], str]] = None
|
||||
get_chat_history: Optional[Callable[[CHAT_TURN_TYPE], str]] = None
|
||||
"""Return the source documents."""
|
||||
|
||||
class Config:
|
||||
|
||||
@@ -106,8 +106,8 @@ class LLMMathChain(Chain):
|
||||
output, color="yellow", verbose=self.verbose
|
||||
)
|
||||
else:
|
||||
await self.callback_manager.on_text("\nAnswer: ", verbose=self.verbose)
|
||||
await self.callback_manager.on_text(
|
||||
self.callback_manager.on_text("\nAnswer: ", verbose=self.verbose)
|
||||
self.callback_manager.on_text(
|
||||
output, color="yellow", verbose=self.verbose
|
||||
)
|
||||
answer = "Answer: " + output
|
||||
|
||||
@@ -154,8 +154,9 @@ class RetrievalQA(BaseRetrievalQA):
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chains import RetrievalQA
|
||||
from langchain.faiss import FAISS
|
||||
vectordb = FAISS(...)
|
||||
retrievalQA = RetrievalQA.from_llm(llm=OpenAI(), retriever=vectordb)
|
||||
from langchain.vectorstores.base import VectorStoreRetriever
|
||||
retriever = VectorStoreRetriever(vectorstore=FAISS(...))
|
||||
retrievalQA = RetrievalQA.from_llm(llm=OpenAI(), retriever=retriever)
|
||||
|
||||
"""
|
||||
|
||||
|
||||
@@ -40,6 +40,27 @@ DECIDER_PROMPT = PromptTemplate(
|
||||
output_parser=CommaSeparatedListOutputParser(),
|
||||
)
|
||||
|
||||
_duckdb_prompt = """You are a DuckDB expert. Given an input question, first create a syntactically correct DuckDB query to run, then look at the results of the query and return the answer to the input question.
|
||||
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per DuckDB. You can order the results to return the most informative data in the database.
|
||||
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
|
||||
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
|
||||
|
||||
Use the following format:
|
||||
|
||||
Question: "Question here"
|
||||
SQLQuery: "SQL Query to run"
|
||||
SQLResult: "Result of the SQLQuery"
|
||||
Answer: "Final answer here"
|
||||
|
||||
Only use the following tables:
|
||||
{table_info}
|
||||
|
||||
Question: {input}"""
|
||||
|
||||
DUCKDB_PROMPT = PromptTemplate(
|
||||
input_variables=["input", "table_info", "top_k"],
|
||||
template=_duckdb_prompt,
|
||||
)
|
||||
|
||||
_googlesql_prompt = """You are a GoogleSQL expert. Given an input question, first create a syntactically correct GoogleSQL query to run, then look at the results of the query and return the answer to the input question.
|
||||
Unless the user specifies in the question a specific number of examples to obtain, query for at most {top_k} results using the LIMIT clause as per GoogleSQL. You can order the results to return the most informative data in the database.
|
||||
@@ -201,6 +222,7 @@ SQLITE_PROMPT = PromptTemplate(
|
||||
|
||||
|
||||
SQL_PROMPTS = {
|
||||
"duckdb": DUCKDB_PROMPT,
|
||||
"googlesql": GOOGLESQL_PROMPT,
|
||||
"mssql": MSSQL_PROMPT,
|
||||
"mysql": MYSQL_PROMPT,
|
||||
|
||||
@@ -9,7 +9,7 @@ REFINE_PROMPT_TMPL = (
|
||||
"------------\n"
|
||||
"{text}\n"
|
||||
"------------\n"
|
||||
"Given the new context, refine the original summary"
|
||||
"Given the new context, refine the original summary\n"
|
||||
"If the context isn't useful, return the original summary."
|
||||
)
|
||||
REFINE_PROMPT = PromptTemplate(
|
||||
|
||||
@@ -12,7 +12,9 @@ from langchain.document_loaders.azure_blob_storage_file import (
|
||||
from langchain.document_loaders.bigquery import BigQueryLoader
|
||||
from langchain.document_loaders.bilibili import BiliBiliLoader
|
||||
from langchain.document_loaders.blackboard import BlackboardLoader
|
||||
from langchain.document_loaders.chatgpt import ChatGPTLoader
|
||||
from langchain.document_loaders.college_confidential import CollegeConfidentialLoader
|
||||
from langchain.document_loaders.confluence import ConfluenceLoader
|
||||
from langchain.document_loaders.conllu import CoNLLULoader
|
||||
from langchain.document_loaders.csv_loader import CSVLoader
|
||||
from langchain.document_loaders.dataframe import DataFrameLoader
|
||||
@@ -36,6 +38,7 @@ from langchain.document_loaders.gutenberg import GutenbergLoader
|
||||
from langchain.document_loaders.hn import HNLoader
|
||||
from langchain.document_loaders.html import UnstructuredHTMLLoader
|
||||
from langchain.document_loaders.html_bs import BSHTMLLoader
|
||||
from langchain.document_loaders.hugging_face_dataset import HuggingFaceDatasetLoader
|
||||
from langchain.document_loaders.ifixit import IFixitLoader
|
||||
from langchain.document_loaders.image import UnstructuredImageLoader
|
||||
from langchain.document_loaders.image_captions import ImageCaptionLoader
|
||||
@@ -54,8 +57,10 @@ from langchain.document_loaders.pdf import (
|
||||
UnstructuredPDFLoader,
|
||||
)
|
||||
from langchain.document_loaders.powerpoint import UnstructuredPowerPointLoader
|
||||
from langchain.document_loaders.python import PythonLoader
|
||||
from langchain.document_loaders.readthedocs import ReadTheDocsLoader
|
||||
from langchain.document_loaders.roam import RoamLoader
|
||||
from langchain.document_loaders.rtf import UnstructuredRTFLoader
|
||||
from langchain.document_loaders.s3_directory import S3DirectoryLoader
|
||||
from langchain.document_loaders.s3_file import S3FileLoader
|
||||
from langchain.document_loaders.sitemap import SitemapLoader
|
||||
@@ -105,6 +110,7 @@ __all__ = [
|
||||
"OutlookMessageLoader",
|
||||
"UnstructuredEPubLoader",
|
||||
"UnstructuredMarkdownLoader",
|
||||
"UnstructuredRTFLoader",
|
||||
"RoamLoader",
|
||||
"YoutubeLoader",
|
||||
"S3FileLoader",
|
||||
@@ -152,4 +158,8 @@ __all__ = [
|
||||
"TwitterTweetLoader",
|
||||
"ImageCaptionLoader",
|
||||
"DiscordChatLoader",
|
||||
"ConfluenceLoader",
|
||||
"PythonLoader",
|
||||
"ChatGPTLoader",
|
||||
"HuggingFaceDatasetLoader",
|
||||
]
|
||||
|
||||
50
langchain/document_loaders/chatgpt.py
Normal file
50
langchain/document_loaders/chatgpt.py
Normal file
@@ -0,0 +1,50 @@
|
||||
"""Load conversations from ChatGPT data export"""
|
||||
import datetime
|
||||
import json
|
||||
from typing import List
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
|
||||
|
||||
def concatenate_rows(message: dict, title: str) -> str:
|
||||
if not message:
|
||||
return ""
|
||||
|
||||
sender = message["author"]["role"] if message["author"] else "unknown"
|
||||
text = message["content"]["parts"][0]
|
||||
date = datetime.datetime.fromtimestamp(message["create_time"]).strftime(
|
||||
"%Y-%m-%d %H:%M:%S"
|
||||
)
|
||||
return f"{title} - {sender} on {date}: {text}\n\n"
|
||||
|
||||
|
||||
class ChatGPTLoader(BaseLoader):
|
||||
"""Loader that loads conversations from exported ChatGPT data."""
|
||||
|
||||
def __init__(self, log_file: str, num_logs: int = -1):
|
||||
self.log_file = log_file
|
||||
self.num_logs = num_logs
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
with open(self.log_file, encoding="utf8") as f:
|
||||
data = json.load(f)[: self.num_logs] if self.num_logs else json.load(f)
|
||||
|
||||
documents = []
|
||||
for d in data:
|
||||
title = d["title"]
|
||||
messages = d["mapping"]
|
||||
text = "".join(
|
||||
[
|
||||
concatenate_rows(messages[key]["message"], title)
|
||||
for idx, key in enumerate(messages)
|
||||
if not (
|
||||
idx == 0
|
||||
and messages[key]["message"]["author"]["role"] == "system"
|
||||
)
|
||||
]
|
||||
)
|
||||
metadata = {"source": str(self.log_file)}
|
||||
documents.append(Document(page_content=text, metadata=metadata))
|
||||
|
||||
return documents
|
||||
@@ -1,9 +1,19 @@
|
||||
"""Load Data from a Confluence Space"""
|
||||
import logging
|
||||
from typing import Any, Callable, List, Optional, Union
|
||||
|
||||
from tenacity import (
|
||||
before_sleep_log,
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_exponential,
|
||||
)
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ConfluenceLoader(BaseLoader):
|
||||
"""
|
||||
@@ -44,8 +54,16 @@ class ConfluenceLoader(BaseLoader):
|
||||
:type oauth2: dict, optional
|
||||
:param cloud: _description_, defaults to True
|
||||
:type cloud: bool, optional
|
||||
:raises ValueError: _description_
|
||||
:raises ImportError: _description_
|
||||
:param number_of_retries: How many times to retry, defaults to 3
|
||||
:type number_of_retries: Optional[int], optional
|
||||
:param min_retry_seconds: defaults to 2
|
||||
:type min_retry_seconds: Optional[int], optional
|
||||
:param max_retry_seconds: defaults to 10
|
||||
:type max_retry_seconds: Optional[int], optional
|
||||
:param confluence_kwargs: additional kwargs to initialize confluence with
|
||||
:type confluence_kwargs: dict, optional
|
||||
:raises ValueError: Errors while validating input
|
||||
:raises ImportError: Required dependencies not installed.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -54,13 +72,21 @@ class ConfluenceLoader(BaseLoader):
|
||||
api_key: Optional[str] = None,
|
||||
username: Optional[str] = None,
|
||||
oauth2: Optional[dict] = None,
|
||||
cloud: bool = True,
|
||||
cloud: Optional[bool] = True,
|
||||
number_of_retries: Optional[int] = 3,
|
||||
min_retry_seconds: Optional[int] = 2,
|
||||
max_retry_seconds: Optional[int] = 10,
|
||||
confluence_kwargs: Optional[dict] = None,
|
||||
):
|
||||
confluence_kwargs = confluence_kwargs or {}
|
||||
errors = ConfluenceLoader.validate_init_args(url, api_key, username, oauth2)
|
||||
if errors:
|
||||
raise ValueError(f"Error(s) while validating input: {errors}")
|
||||
|
||||
self.base_url = url
|
||||
self.number_of_retries = number_of_retries
|
||||
self.min_retry_seconds = min_retry_seconds
|
||||
self.max_retry_seconds = max_retry_seconds
|
||||
|
||||
try:
|
||||
from atlassian import Confluence # noqa: F401
|
||||
@@ -71,10 +97,16 @@ class ConfluenceLoader(BaseLoader):
|
||||
)
|
||||
|
||||
if oauth2:
|
||||
self.confluence = Confluence(url=url, oauth2=oauth2, cloud=cloud)
|
||||
self.confluence = Confluence(
|
||||
url=url, oauth2=oauth2, cloud=cloud, **confluence_kwargs
|
||||
)
|
||||
else:
|
||||
self.confluence = Confluence(
|
||||
url=url, username=username, password=api_key, cloud=cloud
|
||||
url=url,
|
||||
username=username,
|
||||
password=api_key,
|
||||
cloud=cloud,
|
||||
**confluence_kwargs,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
@@ -125,7 +157,9 @@ class ConfluenceLoader(BaseLoader):
|
||||
label: Optional[str] = None,
|
||||
cql: Optional[str] = None,
|
||||
include_attachments: bool = False,
|
||||
include_comments: bool = False,
|
||||
limit: Optional[int] = 50,
|
||||
max_pages: Optional[int] = 1000,
|
||||
) -> List[Document]:
|
||||
"""
|
||||
:param space_key: Space key retrieved from a confluence URL, defaults to None
|
||||
@@ -138,8 +172,12 @@ class ConfluenceLoader(BaseLoader):
|
||||
:type cql: Optional[str], optional
|
||||
:param include_attachments: defaults to False
|
||||
:type include_attachments: bool, optional
|
||||
:param limit: Maximum number of pages to retrieve, defaults to 50
|
||||
:param include_comments: defaults to False
|
||||
:type include_comments: bool, optional
|
||||
:param limit: Maximum number of pages to retrieve per request, defaults to 50
|
||||
:type limit: int, optional
|
||||
:param max_pages: Maximum number of pages to retrieve in total, defaults 1000
|
||||
:type max_pages: int, optional
|
||||
:raises ValueError: _description_
|
||||
:raises ImportError: _description_
|
||||
:return: _description_
|
||||
@@ -169,10 +207,13 @@ class ConfluenceLoader(BaseLoader):
|
||||
self.confluence.get_all_pages_from_space,
|
||||
space=space_key,
|
||||
limit=limit,
|
||||
max_pages=max_pages,
|
||||
expand="body.storage.value",
|
||||
)
|
||||
for page in pages:
|
||||
doc = self.process_page(page, include_attachments, text_maker)
|
||||
doc = self.process_page(
|
||||
page, include_attachments, include_comments, text_maker
|
||||
)
|
||||
docs.append(doc)
|
||||
|
||||
if label:
|
||||
@@ -180,26 +221,47 @@ class ConfluenceLoader(BaseLoader):
|
||||
self.confluence.get_all_pages_by_label,
|
||||
label=label,
|
||||
limit=limit,
|
||||
max_pages=max_pages,
|
||||
expand="body.storage.value",
|
||||
)
|
||||
for page in pages:
|
||||
doc = self.process_page(page, include_attachments, text_maker)
|
||||
doc = self.process_page(
|
||||
page, include_attachments, include_comments, text_maker
|
||||
)
|
||||
docs.append(doc)
|
||||
|
||||
if cql:
|
||||
pages = self.paginate_request(
|
||||
self.confluence.cql, cql=cql, limit=limit, expand="body.storage.value"
|
||||
self.confluence.cql,
|
||||
cql=cql,
|
||||
limit=limit,
|
||||
max_pages=max_pages,
|
||||
expand="body.storage.value",
|
||||
)
|
||||
for page in pages:
|
||||
doc = self.process_page(page, include_attachments, text_maker)
|
||||
doc = self.process_page(
|
||||
page, include_attachments, include_comments, text_maker
|
||||
)
|
||||
docs.append(doc)
|
||||
|
||||
if page_ids:
|
||||
for page_id in page_ids:
|
||||
page = self.confluence.get_page_by_id(
|
||||
page_id=page_id, expand="body.storage.value"
|
||||
get_page = retry(
|
||||
reraise=True,
|
||||
stop=stop_after_attempt(
|
||||
self.number_of_retries # type: ignore[arg-type]
|
||||
),
|
||||
wait=wait_exponential(
|
||||
multiplier=1, # type: ignore[arg-type]
|
||||
min=self.min_retry_seconds, # type: ignore[arg-type]
|
||||
max=self.max_retry_seconds, # type: ignore[arg-type]
|
||||
),
|
||||
before_sleep=before_sleep_log(logger, logging.WARNING),
|
||||
)(self.confluence.get_page_by_id)
|
||||
page = get_page(page_id=page_id, expand="body.storage.value")
|
||||
doc = self.process_page(
|
||||
page, include_attachments, include_comments, text_maker
|
||||
)
|
||||
doc = self.process_page(page, include_attachments, text_maker)
|
||||
docs.append(doc)
|
||||
|
||||
return docs
|
||||
@@ -207,11 +269,13 @@ class ConfluenceLoader(BaseLoader):
|
||||
def paginate_request(self, retrieval_method: Callable, **kwargs: Any) -> List:
|
||||
"""Paginate the various methods to retrieve groups of pages.
|
||||
|
||||
Unforunately, due to page size, sometimes the Confluence API
|
||||
doesn't match the limit value. Also, due to the Atlassian Python
|
||||
Unfortunately, due to page size, sometimes the Confluence API
|
||||
doesn't match the limit value. If `limit` is >100 confluence
|
||||
seems to cap the response to 100. Also, due to the Atlassian Python
|
||||
package, we don't get the "next" values from the "_links" key because
|
||||
they only return the value from the results key. So here, the pagination
|
||||
starts from 0 and goes until the limit. We have to manually check if there
|
||||
starts from 0 and goes until the max_pages, getting the `limit` number
|
||||
of pages with each request. We have to manually check if there
|
||||
are more docs based on the length of the returned list of pages, rather than
|
||||
just checking for the presence of a `next` key in the response like this page
|
||||
would have you do:
|
||||
@@ -223,20 +287,33 @@ class ConfluenceLoader(BaseLoader):
|
||||
:rtype: List
|
||||
"""
|
||||
|
||||
limit = kwargs["limit"]
|
||||
page = 0
|
||||
docs = []
|
||||
while page < limit:
|
||||
batch = retrieval_method(**kwargs, start=page)
|
||||
if len(batch) < limit:
|
||||
page = limit
|
||||
else:
|
||||
page += len(batch)
|
||||
max_pages = kwargs.pop("max_pages")
|
||||
docs: List[dict] = []
|
||||
while len(docs) < max_pages:
|
||||
get_pages = retry(
|
||||
reraise=True,
|
||||
stop=stop_after_attempt(
|
||||
self.number_of_retries # type: ignore[arg-type]
|
||||
),
|
||||
wait=wait_exponential(
|
||||
multiplier=1,
|
||||
min=self.min_retry_seconds, # type: ignore[arg-type]
|
||||
max=self.max_retry_seconds, # type: ignore[arg-type]
|
||||
),
|
||||
before_sleep=before_sleep_log(logger, logging.WARNING),
|
||||
)(retrieval_method)
|
||||
batch = get_pages(**kwargs, start=len(docs))
|
||||
if not batch:
|
||||
break
|
||||
docs.extend(batch)
|
||||
return docs
|
||||
return docs[:max_pages]
|
||||
|
||||
def process_page(
|
||||
self, page: dict, include_attachments: bool, text_maker: Any
|
||||
self,
|
||||
page: dict,
|
||||
include_attachments: bool,
|
||||
include_comments: bool,
|
||||
text_maker: Any,
|
||||
) -> Document:
|
||||
if include_attachments:
|
||||
attachment_texts = self.process_attachment(page["id"])
|
||||
@@ -245,8 +322,23 @@ class ConfluenceLoader(BaseLoader):
|
||||
text = text_maker.handle(page["body"]["storage"]["value"]) + "".join(
|
||||
attachment_texts
|
||||
)
|
||||
if include_comments:
|
||||
comments = self.confluence.get_page_comments(
|
||||
page["id"], expand="body.view.value", depth="all"
|
||||
)["results"]
|
||||
comment_texts = [
|
||||
text_maker.handle(comment["body"]["view"]["value"])
|
||||
for comment in comments
|
||||
]
|
||||
text = text + "".join(comment_texts)
|
||||
|
||||
return Document(
|
||||
page_content=text, metadata={"title": page["title"], "id": page["id"]}
|
||||
page_content=text,
|
||||
metadata={
|
||||
"title": page["title"],
|
||||
"id": page["id"],
|
||||
"source": self.base_url.strip("/") + page["_links"]["webui"],
|
||||
},
|
||||
)
|
||||
|
||||
def process_attachment(self, page_id: str) -> List[str]:
|
||||
|
||||
@@ -13,7 +13,7 @@ if TYPE_CHECKING:
|
||||
class DiscordChatLoader(BaseLoader):
|
||||
"""Load Discord chat logs."""
|
||||
|
||||
def __init__(self, chat_log: pd.DataFrame, user_id_col: str = "user_id"):
|
||||
def __init__(self, chat_log: pd.DataFrame, user_id_col: str = "ID"):
|
||||
"""Initialize with a Pandas DataFrame containing chat logs."""
|
||||
if not isinstance(chat_log, pd.DataFrame):
|
||||
raise ValueError(
|
||||
|
||||
@@ -24,7 +24,8 @@ class BSHTMLLoader(BaseLoader):
|
||||
import bs4 # noqa:F401
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"bs4 package not found, please install it with " "`pip install bs4`"
|
||||
"beautifulsoup4 package not found, please install it with "
|
||||
"`pip install beautifulsoup4`"
|
||||
)
|
||||
|
||||
self.file_path = file_path
|
||||
|
||||
84
langchain/document_loaders/hugging_face_dataset.py
Normal file
84
langchain/document_loaders/hugging_face_dataset.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""Loader that loads HuggingFace datasets."""
|
||||
from typing import List, Mapping, Optional, Sequence, Union
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
|
||||
|
||||
class HuggingFaceDatasetLoader(BaseLoader):
|
||||
"""Loading logic for loading documents from the Hugging Face Hub."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
path: str,
|
||||
page_content_column: str = "text",
|
||||
name: Optional[str] = None,
|
||||
data_dir: Optional[str] = None,
|
||||
data_files: Optional[
|
||||
Union[str, Sequence[str], Mapping[str, Union[str, Sequence[str]]]]
|
||||
] = None,
|
||||
cache_dir: Optional[str] = None,
|
||||
keep_in_memory: Optional[bool] = None,
|
||||
save_infos: bool = False,
|
||||
use_auth_token: Optional[Union[bool, str]] = None,
|
||||
num_proc: Optional[int] = None,
|
||||
):
|
||||
"""
|
||||
Initialize the HuggingFaceDatasetLoader.
|
||||
|
||||
Args:
|
||||
path: Path or name of the dataset.
|
||||
page_content_column: Page content column name.
|
||||
name: Name of the dataset configuration.
|
||||
data_dir: Data directory of the dataset configuration.
|
||||
data_files: Path(s) to source data file(s).
|
||||
cache_dir: Directory to read/write data.
|
||||
keep_in_memory: Whether to copy the dataset in-memory.
|
||||
save_infos: Save the dataset information (checksums/size/splits/...).
|
||||
use_auth_token: Bearer token for remote files on the Datasets Hub.
|
||||
num_proc: Number of processes.
|
||||
"""
|
||||
|
||||
self.path = path
|
||||
self.page_content_column = page_content_column
|
||||
self.name = name
|
||||
self.data_dir = data_dir
|
||||
self.data_files = data_files
|
||||
self.cache_dir = cache_dir
|
||||
self.keep_in_memory = keep_in_memory
|
||||
self.save_infos = save_infos
|
||||
self.use_auth_token = use_auth_token
|
||||
self.num_proc = num_proc
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load documents."""
|
||||
try:
|
||||
from datasets import load_dataset
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import datasets python package. "
|
||||
"Please install it with `pip install datasets`."
|
||||
)
|
||||
|
||||
dataset = load_dataset(
|
||||
path=self.path,
|
||||
name=self.name,
|
||||
data_dir=self.data_dir,
|
||||
data_files=self.data_files,
|
||||
cache_dir=self.cache_dir,
|
||||
keep_in_memory=self.keep_in_memory,
|
||||
save_infos=self.save_infos,
|
||||
use_auth_token=self.use_auth_token,
|
||||
num_proc=self.num_proc,
|
||||
)
|
||||
|
||||
docs = [
|
||||
Document(
|
||||
page_content=row.pop(self.page_content_column),
|
||||
metadata=row,
|
||||
)
|
||||
for key in dataset.keys()
|
||||
for row in dataset[key]
|
||||
]
|
||||
|
||||
return docs
|
||||
14
langchain/document_loaders/python.py
Normal file
14
langchain/document_loaders/python.py
Normal file
@@ -0,0 +1,14 @@
|
||||
import tokenize
|
||||
|
||||
from langchain.document_loaders.text import TextLoader
|
||||
|
||||
|
||||
class PythonLoader(TextLoader):
|
||||
"""
|
||||
Load Python files, respecting any non-default encoding if specified.
|
||||
"""
|
||||
|
||||
def __init__(self, file_path: str):
|
||||
with open(file_path, "rb") as f:
|
||||
encoding, _ = tokenize.detect_encoding(f.readline)
|
||||
super().__init__(file_path=file_path, encoding=encoding)
|
||||
28
langchain/document_loaders/rtf.py
Normal file
28
langchain/document_loaders/rtf.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""Loader that loads rich text files."""
|
||||
from typing import Any, List
|
||||
|
||||
from langchain.document_loaders.unstructured import (
|
||||
UnstructuredFileLoader,
|
||||
satisfies_min_unstructured_version,
|
||||
)
|
||||
|
||||
|
||||
class UnstructuredRTFLoader(UnstructuredFileLoader):
|
||||
"""Loader that uses unstructured to load rtf files."""
|
||||
|
||||
def __init__(
|
||||
self, file_path: str, mode: str = "single", **unstructured_kwargs: Any
|
||||
):
|
||||
min_unstructured_version = "0.5.12"
|
||||
if not satisfies_min_unstructured_version(min_unstructured_version):
|
||||
raise ValueError(
|
||||
"Partitioning rtf files is only supported in "
|
||||
f"unstructured>={min_unstructured_version}."
|
||||
)
|
||||
|
||||
super().__init__(file_path=file_path, mode=mode, **unstructured_kwargs)
|
||||
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.rtf import partition_rtf
|
||||
|
||||
return partition_rtf(filename=self.file_path, **self.unstructured_kwargs)
|
||||
@@ -61,6 +61,13 @@ class SitemapLoader(WebBaseLoader):
|
||||
}
|
||||
)
|
||||
|
||||
for sitemap in soup.find_all("sitemap"):
|
||||
loc = sitemap.find("loc")
|
||||
if not loc:
|
||||
continue
|
||||
soup_child = self.scrape_all([loc.text], "xml")[0]
|
||||
|
||||
els.extend(self.parse_sitemap(soup_child))
|
||||
return els
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
|
||||
@@ -15,7 +15,6 @@ class UnstructuredURLLoader(BaseLoader):
|
||||
self,
|
||||
urls: List[str],
|
||||
continue_on_failure: bool = True,
|
||||
headers: dict = {},
|
||||
**unstructured_kwargs: Any,
|
||||
):
|
||||
"""Initialize with file path."""
|
||||
@@ -30,23 +29,37 @@ class UnstructuredURLLoader(BaseLoader):
|
||||
"`pip install unstructured`"
|
||||
)
|
||||
|
||||
if not self.__is_headers_available() and len(headers.keys()) != 0:
|
||||
logger.warning(
|
||||
"You are using old version of unstructured. "
|
||||
"The headers parameter is ignored"
|
||||
)
|
||||
headers = unstructured_kwargs.pop("headers", {})
|
||||
if len(headers.keys()) != 0:
|
||||
warn_about_headers = False
|
||||
if self.__is_non_html_available():
|
||||
warn_about_headers = not self.__is_headers_available_for_non_html()
|
||||
else:
|
||||
warn_about_headers = not self.__is_headers_available_for_html()
|
||||
|
||||
if warn_about_headers:
|
||||
logger.warning(
|
||||
"You are using an old version of unstructured. "
|
||||
"The headers parameter is ignored"
|
||||
)
|
||||
|
||||
self.urls = urls
|
||||
self.continue_on_failure = continue_on_failure
|
||||
self.headers = headers
|
||||
self.unstructured_kwargs = unstructured_kwargs
|
||||
|
||||
def __is_headers_available(self) -> bool:
|
||||
def __is_headers_available_for_html(self) -> bool:
|
||||
_unstructured_version = self.__version.split("-")[0]
|
||||
unstructured_version = tuple([int(x) for x in _unstructured_version.split(".")])
|
||||
|
||||
return unstructured_version >= (0, 5, 7)
|
||||
|
||||
def __is_headers_available_for_non_html(self) -> bool:
|
||||
_unstructured_version = self.__version.split("-")[0]
|
||||
unstructured_version = tuple([int(x) for x in _unstructured_version.split(".")])
|
||||
|
||||
return unstructured_version >= (0, 5, 13)
|
||||
|
||||
def __is_non_html_available(self) -> bool:
|
||||
_unstructured_version = self.__version.split("-")[0]
|
||||
unstructured_version = tuple([int(x) for x in _unstructured_version.split(".")])
|
||||
@@ -61,14 +74,20 @@ class UnstructuredURLLoader(BaseLoader):
|
||||
docs: List[Document] = list()
|
||||
for url in self.urls:
|
||||
try:
|
||||
if self.headers and self.__is_headers_available():
|
||||
elements = partition_html(
|
||||
url=url, headers=self.headers, **self.unstructured_kwargs
|
||||
)
|
||||
elif self.__is_non_html_available():
|
||||
elements = partition(url=url, **self.unstructured_kwargs)
|
||||
if self.__is_non_html_available():
|
||||
if self.__is_headers_available_for_non_html():
|
||||
elements = partition(
|
||||
url=url, headers=self.headers, **self.unstructured_kwargs
|
||||
)
|
||||
else:
|
||||
elements = partition(url=url, **self.unstructured_kwargs)
|
||||
else:
|
||||
elements = partition_html(url=url, **self.unstructured_kwargs)
|
||||
if self.__is_headers_available_for_html():
|
||||
elements = partition_html(
|
||||
url=url, headers=self.headers, **self.unstructured_kwargs
|
||||
)
|
||||
else:
|
||||
elements = partition_html(url=url, **self.unstructured_kwargs)
|
||||
except Exception as e:
|
||||
if self.continue_on_failure:
|
||||
logger.error(f"Error fetching or processing {url}, exeption: {e}")
|
||||
|
||||
@@ -67,9 +67,10 @@ class PlaywrightURLLoader(BaseLoader):
|
||||
page.goto(url)
|
||||
|
||||
for selector in self.remove_selectors or []:
|
||||
element = page.locator(selector)
|
||||
if element.is_visible():
|
||||
element.evaluate("element => element.remove()")
|
||||
elements = page.locator(selector).all()
|
||||
for element in elements:
|
||||
if element.is_visible():
|
||||
element.evaluate("element => element.remove()")
|
||||
|
||||
page_source = page.content()
|
||||
elements = partition_html(text=page_source)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user