mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-13 22:32:33 +00:00
Compare commits
71 Commits
harrison/f
...
harrison/a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
47181f9a7a | ||
|
|
bb76440bfa | ||
|
|
c104d507bf | ||
|
|
ad4414b59f | ||
|
|
c8b4b54479 | ||
|
|
47ba34c83a | ||
|
|
467aa0cee0 | ||
|
|
6be5747466 | ||
|
|
46c428234f | ||
|
|
ffed5e0056 | ||
|
|
fc66a32c6f | ||
|
|
a01d3e6955 | ||
|
|
766b84a9d9 | ||
|
|
cf98f219f9 | ||
|
|
e7b625fe03 | ||
|
|
3474f39e21 | ||
|
|
8d0869c6d3 | ||
|
|
a7084ad6e4 | ||
|
|
50257fce59 | ||
|
|
fe6695b9e7 | ||
|
|
2eef76ed3f | ||
|
|
85c1bd2cd0 | ||
|
|
809a9f485f | ||
|
|
750edfb440 | ||
|
|
2dd895d98c | ||
|
|
c1b50b7b13 | ||
|
|
ed143b598f | ||
|
|
428508bd75 | ||
|
|
78b31e5966 | ||
|
|
8cf62ce06e | ||
|
|
5161ae7e08 | ||
|
|
8c167627ed | ||
|
|
e26b6f9c89 | ||
|
|
3c6796b72e | ||
|
|
996b5a3dfb | ||
|
|
9bb7195085 | ||
|
|
595cc1ae1a | ||
|
|
482611f426 | ||
|
|
8861770bd0 | ||
|
|
8fdcdf4c2f | ||
|
|
137356dbec | ||
|
|
2fbb152386 | ||
|
|
d946be2f3d | ||
|
|
292f1cfa96 | ||
|
|
948e999eff | ||
|
|
a7c8e37e77 | ||
|
|
19a9fa16a9 | ||
|
|
e02d6b2288 | ||
|
|
36b4c58acf | ||
|
|
7827f0a844 | ||
|
|
9ee6115deb | ||
|
|
9d08384d5f | ||
|
|
853894dd47 | ||
|
|
5267ebce2d | ||
|
|
43c9bd869f | ||
|
|
0f399350f1 | ||
|
|
85c66dc6a4 | ||
|
|
b10be842f6 | ||
|
|
e2e501aa06 | ||
|
|
e9b1c8cdfa | ||
|
|
c27a6fa8a4 | ||
|
|
1690292b09 | ||
|
|
834b391792 | ||
|
|
3c1c7ba672 | ||
|
|
48b093823e | ||
|
|
b7bef36ee1 | ||
|
|
28be37f470 | ||
|
|
68666d6a22 | ||
|
|
2180a91196 | ||
|
|
2163d064f3 | ||
|
|
8cba5b791a |
2
.coveragerc
Normal file
2
.coveragerc
Normal file
@@ -0,0 +1,2 @@
|
||||
[run]
|
||||
omit = tests/*
|
||||
43
.github/workflows/lint.yml
vendored
43
.github/workflows/lint.yml
vendored
@@ -2,34 +2,35 @@ name: lint
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
branches: [master]
|
||||
pull_request:
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.2.0"
|
||||
POETRY_VERSION: "1.3.1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install poetry
|
||||
run: |
|
||||
pipx install poetry==$POETRY_VERSION
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: poetry
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install
|
||||
- name: Analysing the code with our lint
|
||||
run: |
|
||||
make lint
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install poetry
|
||||
run: |
|
||||
pipx install poetry==$POETRY_VERSION
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: poetry
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install
|
||||
- name: Analysing the code with our lint
|
||||
run: |
|
||||
make lint
|
||||
|
||||
37
.github/workflows/test.yml
vendored
37
.github/workflows/test.yml
vendored
@@ -2,11 +2,11 @@ name: test
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
branches: [master]
|
||||
pull_request:
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.2.0"
|
||||
POETRY_VERSION: "1.3.1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@@ -14,20 +14,21 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install poetry
|
||||
run: pipx install poetry==$POETRY_VERSION
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'poetry'
|
||||
- name: Install dependencies
|
||||
run: poetry install
|
||||
- name: Run unit tests
|
||||
run: |
|
||||
make tests
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install poetry
|
||||
run: pipx install poetry==$POETRY_VERSION
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: "poetry"
|
||||
- name: Install dependencies
|
||||
run: poetry install
|
||||
- name: Run unit tests
|
||||
run: |
|
||||
make tests
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,4 +1,5 @@
|
||||
.vscode/
|
||||
.idea/
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
154
CONTRIBUTING.md
Normal file
154
CONTRIBUTING.md
Normal file
@@ -0,0 +1,154 @@
|
||||
# Contributing to LangChain
|
||||
|
||||
Hi there! Thank you for even being interested in contributing to LangChain.
|
||||
As an open source project in a rapidly developing field, we are extremely open
|
||||
to contributions, whether it be in the form of a new feature, improved infra, or better documentation.
|
||||
|
||||
To contribute to this project, please follow a ["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow.
|
||||
Please do not try to push directly to this repo unless you are maintainer.
|
||||
|
||||
## 🗺️Contributing Guidelines
|
||||
|
||||
### 🚩GitHub Issues
|
||||
|
||||
Our [issues](https://github.com/hwchase17/langchain/issues) page is kept up to date
|
||||
with bugs, improvements, and feature requests. There is a taxonomy of labels to help
|
||||
with sorting and discovery of issues of interest. These include:
|
||||
|
||||
- prompts: related to prompt tooling/infra.
|
||||
- llms: related to LLM wrappers/tooling/infra.
|
||||
- chains
|
||||
- utilities: related to different types of utilities to integrate with (Python, SQL, etc.).
|
||||
- agents
|
||||
- memory
|
||||
- applications: related to example applications to build
|
||||
|
||||
If you start working on an issue, please assign it to yourself.
|
||||
|
||||
If you are adding an issue, please try to keep it focused on a single modular bug/improvement/feature.
|
||||
If the two issues are related, or blocking, please link them rather than keep them as one single one.
|
||||
|
||||
We will try to keep these issues as up to date as possible, though
|
||||
with the rapid rate of develop in this field some may get out of date.
|
||||
If you notice this happening, please just let us know.
|
||||
|
||||
### 🙋Getting Help
|
||||
|
||||
Although we try to have a developer setup to make it as easy as possible for others to contribute (see below)
|
||||
it is possible that some pain point may arise around environment setup, linting, documentation, or other.
|
||||
Should that occur, please contact a maintainer! Not only do we want to help get you unblocked,
|
||||
but we also want to make sure that the process is smooth for future contributors.
|
||||
|
||||
In a similar vein, we do enforce certain linting, formatting, and documentation standards in the codebase.
|
||||
If you are finding these difficult (or even just annoying) to work with,
|
||||
feel free to contact a maintainer for help - we do not want these to get in the way of getting
|
||||
good code into the codebase.
|
||||
|
||||
### 🏭Release process
|
||||
|
||||
As of now, LangChain has an ad hoc release process: releases are cut with high frequency via by
|
||||
a developer and published to [PyPI](https://pypi.org/project/ruff/).
|
||||
|
||||
LangChain follows the [semver](https://semver.org/) versioning standard. However, as pre-1.0 software,
|
||||
even patch releases may contain [non-backwards-compatible changes](https://semver.org/#spec-item-4).
|
||||
|
||||
If your contribution has made its way into a release, we will want to give you credit on Twitter (only if you want though)!
|
||||
If you have a Twitter account you would like us to mention, please let us know in the PR or in another manner.
|
||||
|
||||
## 🤖Developer Setup
|
||||
|
||||
### 🚀Quick Start
|
||||
|
||||
This project uses [Poetry](https://python-poetry.org/) as a dependency manager. Check out Poetry's [documentation on how to install it](https://python-poetry.org/docs/#installation) on your system before proceeding.
|
||||
|
||||
To install requirements:
|
||||
|
||||
```bash
|
||||
poetry install -E all
|
||||
```
|
||||
|
||||
This will install all requirements for running the package, examples, linting, formatting, tests, and coverage. Note the `-E all` flag will install all optional dependencies necessary for integration testing.
|
||||
|
||||
Now, you should be able to run the common tasks in the following section.
|
||||
|
||||
### ✅Common Tasks
|
||||
|
||||
#### Code Formatting
|
||||
|
||||
Formatting for this project is done via a combination of [Black](https://black.readthedocs.io/en/stable/) and [isort](https://pycqa.github.io/isort/).
|
||||
|
||||
To run formatting for this project:
|
||||
|
||||
```bash
|
||||
make format
|
||||
```
|
||||
|
||||
#### Linting
|
||||
|
||||
Linting for this project is done via a combination of [Black](https://black.readthedocs.io/en/stable/), [isort](https://pycqa.github.io/isort/), [flake8](https://flake8.pycqa.org/en/latest/), and [mypy](http://mypy-lang.org/).
|
||||
|
||||
To run linting for this project:
|
||||
|
||||
```bash
|
||||
make lint
|
||||
```
|
||||
|
||||
We recognize linting can be annoying - if you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed.
|
||||
|
||||
#### Coverage
|
||||
|
||||
Code coverage (i.e. the amount of code that is covered by unit tests) helps identify areas of the code that are potentially more or less brittle.
|
||||
|
||||
To get a report of current coverage, run the following:
|
||||
|
||||
```bash
|
||||
make coverage
|
||||
```
|
||||
|
||||
#### Testing
|
||||
|
||||
Unit tests cover modular logic that does not require calls to outside APIs.
|
||||
|
||||
To run unit tests:
|
||||
|
||||
```bash
|
||||
make tests
|
||||
```
|
||||
|
||||
If you add new logic, please add a unit test.
|
||||
|
||||
Integration tests cover logic that requires making calls to outside APIs (often integration with other services).
|
||||
|
||||
To run integration tests:
|
||||
|
||||
```bash
|
||||
make integration_tests
|
||||
```
|
||||
|
||||
If you add support for a new external API, please add a new integration test.
|
||||
|
||||
#### Adding a Jupyter Notebook
|
||||
|
||||
If you are adding a Jupyter notebook example, you'll want to install the optional `dev` dependencies.
|
||||
|
||||
To install dev dependencies:
|
||||
|
||||
```bash
|
||||
poetry install --with dev
|
||||
```
|
||||
|
||||
Launch a notebook:
|
||||
|
||||
```bash
|
||||
poetry run jupyter notebook
|
||||
```
|
||||
|
||||
When you run `poetry install`, the `langchain` package is installed as editable in the virtualenv, so your new logic can be imported into the notebook.
|
||||
|
||||
#### Contribute Documentation
|
||||
|
||||
Docs are largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code.
|
||||
|
||||
For that reason, we ask that you add good documentation to all classes and methods.
|
||||
|
||||
Similar to linting, we recognize documentation can be annoying. If you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed.
|
||||
6
Makefile
6
Makefile
@@ -1,5 +1,11 @@
|
||||
.PHONY: format lint tests integration_tests
|
||||
|
||||
coverage:
|
||||
poetry run pytest --cov \
|
||||
--cov-config=.coveragerc \
|
||||
--cov-report xml \
|
||||
--cov-report term-missing:skip-covered
|
||||
|
||||
format:
|
||||
poetry run black .
|
||||
poetry run isort .
|
||||
|
||||
167
README.md
167
README.md
@@ -13,176 +13,49 @@
|
||||
Large language models (LLMs) are emerging as a transformative technology, enabling
|
||||
developers to build applications that they previously could not.
|
||||
But using these LLMs in isolation is often not enough to
|
||||
create a truly powerful app - the real power comes when you are able to
|
||||
combine them with other sources of computation or knowledge.
|
||||
create a truly powerful app - the real power comes when you can combine them with other sources of computation or knowledge.
|
||||
|
||||
This library is aimed at assisting in the development of those types of applications.
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
Please see [here](https://langchain.readthedocs.io/en/latest/?) for full documentation on:
|
||||
- Getting started (installation, setting up environment, simple examples)
|
||||
|
||||
- Getting started (installation, setting up the environment, simple examples)
|
||||
- How-To examples (demos, integrations, helper functions)
|
||||
- Reference (full API docs)
|
||||
- Resources (high level explanation of core concepts)
|
||||
Resources (high-level explanation of core concepts)
|
||||
|
||||
## 🚀 What can this help with?
|
||||
|
||||
There are three main areas (with a forth coming soon) that LangChain is designed to help with.
|
||||
There are five main areas that LangChain is designed to help with.
|
||||
These are, in increasing order of complexity:
|
||||
1. LLM and Prompts
|
||||
2. Chains
|
||||
3. Agents
|
||||
4. Memory
|
||||
|
||||
Let's go through these categories and for each one identify key concepts (to clarify terminology) as well as the problems in this area LangChain helps solve.
|
||||
**📃 LLMs and Prompts:**
|
||||
|
||||
### LLMs and Prompts
|
||||
Calling out to an LLM once is pretty easy, with most of them being behind well documented APIs.
|
||||
However, there are still some challenges going from that to an application running in production that LangChain attempts to address.
|
||||
This includes prompt management, prompt optimization, generic interface for all LLMs, and common utilities for working with LLMs.
|
||||
|
||||
**Key Concepts**
|
||||
- LLM: A large language model, in particular a text-to-text model.
|
||||
- Prompt: The input to a language model. Typically this is not simply a hardcoded string but rather a combination of a template, some examples, and user input.
|
||||
- Prompt Template: An object responsible for constructing the final prompt to pass to a LLM.
|
||||
- Examples: Datapoints that can be included in the prompt in order to give the model more context what to do.
|
||||
- Few Shot Prompt Template: A subclass of the PromptTemplate class that uses examples.
|
||||
- Example Selector: A class responsible to selecting examples to use dynamically (depending on user input) in a few shot prompt.
|
||||
**🔗 Chains:**
|
||||
|
||||
**Problems Solved**
|
||||
- Switching costs: by exposing a standard interface for all the top LLM providers, LangChain makes it easy to switch from one provider to another, whether it be for production use cases or just for testing stuff out.
|
||||
- Prompt management: managing your prompts is easy when you only have one simple one, but can get tricky when you have a bunch or when they start to get more complex. LangChain provides a standard way for storing, constructing, and referencing prompts.
|
||||
- Prompt optimization: despite the underlying models getting better and better, there is still currently a need for carefully constructing prompts.
|
||||
Chains go beyond just a single LLM call, and are sequences of calls (whether to an LLM or a different utility). LangChain provides a standard interface for chains, lots of integrations with other tools, and end-to-end chains for common applications.
|
||||
|
||||
### Chains
|
||||
Using an LLM in isolation is fine for some simple applications, but many more complex ones require chaining LLMs - either with eachother or with other experts.
|
||||
LangChain provides several parts to help with that.
|
||||
**📚 Data Augmented Generation:**
|
||||
|
||||
**Key Concepts**
|
||||
- Tools: APIs designed for assisting with a particular use case (search, databases, Python REPL, etc). Prompt templates, LLMs, and chains can also be considered tools.
|
||||
- Chains: A combination of multiple tools in a deterministic manner.
|
||||
Data Augmented Generation involves specific types of chains that first interact with an external datasource to fetch data to use in the generation step. Examples of this include summarization of long pieces of text and question/answering over specific data sources.
|
||||
|
||||
**Problems Solved**
|
||||
- Standard interface for working with Chains
|
||||
- Easy way to construct chains of LLMs
|
||||
- Lots of integrations with other tools that you may want to use in conjunction with LLMs
|
||||
- End-to-end chains for common workflows (database question/answer, recursive summarization, etc)
|
||||
**🤖 Agents:**
|
||||
|
||||
### Agents
|
||||
Some applications will require not just a predetermined chain of calls to LLMs/other tools, but potentially an unknown chain that depends on the user input.
|
||||
In these types of chains, there is a “agent” which has access to a suite of tools.
|
||||
Depending on the user input, the agent can then decide which, if any, of these tools to call.
|
||||
Agents involve an LLM making decisions about which Actions to take, taking that Action, seeing an Observation, and repeating that until done. LangChain provides a standard interface for agents, a selection of agents to choose from, and examples of end to end agents.
|
||||
|
||||
**Key Concepts**
|
||||
- Tools: same as above.
|
||||
- Agent: An LLM-powered class responsible for determining which tools to use and in what order.
|
||||
**🧠 Memory:**
|
||||
|
||||
Memory is the concept of persisting state between calls of a chain/agent. LangChain provides a standard interface for memory, a collection of memory implementations, and examples of chains/agents that use memory.
|
||||
|
||||
**Problems Solved**
|
||||
- Standard agent interfaces
|
||||
- A selection of powerful agents to choose from
|
||||
- Common chains that can be used as tools
|
||||
For more information on these concepts, please see our [full documentation](https://langchain.readthedocs.io/en/latest/?).
|
||||
|
||||
### Memory
|
||||
By default, Chains and Agents are stateless, meaning that they treat each incoming query independently.
|
||||
In some applications (chatbots being a GREAT example) it is highly important to remember previous interactions,
|
||||
both at a short term but also at a long term level. The concept of "Memory" exists to do exactly that.
|
||||
## 💁 Contributing
|
||||
|
||||
**Key Concepts**
|
||||
- Memory: A class that can be added to an Agent or Chain to (1) pull in memory variables before calling that chain/agent, and (2) create new memories after the chain/agent finishes.
|
||||
- Memory Variables: Variables returned from a Memory class, to be passed into the chain/agent along with the user input.
|
||||
As an open source project in a rapidly developing field, we are extremely open
|
||||
to contributions, whether it be in the form of a new feature, improved infra, or better documentation.
|
||||
|
||||
**Problems Solved**
|
||||
- Standard memory interfaces
|
||||
- A collection of common memory implementations to choose from
|
||||
- Common chains/agents that use memory (e.g. chatbots)
|
||||
|
||||
## 🤖 Developer Guide
|
||||
|
||||
To begin developing on this project, first clone the repo locally.
|
||||
|
||||
### Quick Start
|
||||
|
||||
This project uses [Poetry](https://python-poetry.org/) as a dependency manager. Check out Poetry's own [documentation on how to install it](https://python-poetry.org/docs/#installation) on your system before proceeding.
|
||||
|
||||
To install requirements:
|
||||
|
||||
```bash
|
||||
poetry install -E all
|
||||
```
|
||||
|
||||
This will install all requirements for running the package, examples, linting, formatting, and tests. Note the `-E all` flag will install all optional dependencies necessary for integration testing.
|
||||
|
||||
Now, you should be able to run the common tasks in the following section.
|
||||
|
||||
### Common Tasks
|
||||
|
||||
#### Code Formatting
|
||||
|
||||
Formatting for this project is a combination of [Black](https://black.readthedocs.io/en/stable/) and [isort](https://pycqa.github.io/isort/).
|
||||
|
||||
To run formatting for this project:
|
||||
|
||||
```bash
|
||||
make format
|
||||
```
|
||||
|
||||
#### Linting
|
||||
|
||||
Linting for this project is a combination of [Black](https://black.readthedocs.io/en/stable/), [isort](https://pycqa.github.io/isort/), [flake8](https://flake8.pycqa.org/en/latest/), and [mypy](http://mypy-lang.org/).
|
||||
|
||||
To run linting for this project:
|
||||
|
||||
```bash
|
||||
make lint
|
||||
```
|
||||
|
||||
We recognize linting can be annoying - if you do not want to do it, please contact a project maintainer and they can help you with it. We do not want this to be a blocker for good code getting contributed.
|
||||
|
||||
#### Testing
|
||||
|
||||
Unit tests cover modular logic that does not require calls to outside apis.
|
||||
|
||||
To run unit tests:
|
||||
|
||||
```bash
|
||||
make tests
|
||||
```
|
||||
|
||||
If you add new logic, please add a unit test.
|
||||
|
||||
Integration tests cover logic that requires making calls to outside APIs (often integration with other services).
|
||||
|
||||
To run integration tests:
|
||||
|
||||
```bash
|
||||
make integration_tests
|
||||
```
|
||||
|
||||
If you add support for a new external API, please add a new integration test.
|
||||
|
||||
#### Adding a Jupyter Notebook
|
||||
|
||||
If you are adding a Jupyter notebook example, you'll want to install the optional `dev` dependencies.
|
||||
|
||||
To install dev dependencies:
|
||||
|
||||
```bash
|
||||
poetry install --with dev
|
||||
```
|
||||
|
||||
Launch a notebook:
|
||||
|
||||
```bash
|
||||
poetry run jupyter notebook
|
||||
```
|
||||
|
||||
When you run `poetry install`, the `langchain` package is installed as editable in the virtualenv, so your new logic can be imported into the notebook.
|
||||
|
||||
#### Contribute Documentation
|
||||
|
||||
Docs are largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code.
|
||||
|
||||
For that reason, we ask that you add good documentation to all classes and methods.
|
||||
|
||||
Similar to linting, we recognize documentation can be annoying - if you do not want to do it, please contact a project maintainer and they can help you with it. We do not want this to be a blocker for good code getting contributed.
|
||||
For detailed information on how to contribute, see [here](CONTRIBUTING.md).
|
||||
|
||||
@@ -1,10 +1,19 @@
|
||||
Agents
|
||||
======
|
||||
|
||||
The examples here are all end-to-end agents for specific applications.
|
||||
The first category of how-to guides here cover specific parts of working with agents.
|
||||
|
||||
`Custom Tools <agents/custom_tools.ipynb>`_: How to create custom tools that an agent can use.
|
||||
|
||||
`Intermediate Steps <agents/intermediate_steps.ipynb>`_: How to access and use intermediate steps to get more visibility into the internals of an agent.
|
||||
|
||||
`Custom Agent <agents/custom_agent.ipynb>`_: How to create a custom agent (specifically, a custom LLM + prompt to drive that agent).
|
||||
|
||||
|
||||
The next set of examples are all end-to-end agents for specific applications.
|
||||
In all examples there is an Agent with a particular set of tools.
|
||||
|
||||
- Tools: A tool can be anything that takes in a string and returns a string. This means that you can use both the primitives AND the chains found in `this <chains.rst>`_ documentation.
|
||||
- Tools: A tool can be anything that takes in a string and returns a string. This means that you can use both the primitives AND the chains found in `this <chains.rst>`_ documentation. LangChain also provides a list of easily loadable tools. For detailed information on those, please see `this documentation <../explanation/tools.md>`_
|
||||
- Agents: An agent uses an LLMChain to determine which tools to use. For a list of all available agent types, see `here <../explanation/agents.md>`_.
|
||||
|
||||
**MRKL**
|
||||
@@ -31,14 +40,6 @@ In all examples there is an Agent with a particular set of tools.
|
||||
|
||||
|
||||
|
||||
Additionally, we also provide examples for how to do more customizability:
|
||||
|
||||
**Custom Agent**
|
||||
|
||||
- Purpose: How to create custom agents.
|
||||
- `Example Notebook <agents/custom_agent.ipynb>`_
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
"\n",
|
||||
"The first way to create a custom agent is to use an existing Agent class, but use a custom LLMChain. This is the simplest way to create a custom Agent. It is highly reccomended that you work with the `ZeroShotAgent`, as at the moment that is by far the most generalizable one. \n",
|
||||
"\n",
|
||||
"Most of the work in creating the custom LLMChain comes down to the prompt. Because we are using an existing agent class to parse the output, it is very important that the prompt say to produce text in that format. However, besides those instructions, you can customize the prompt as you wish.\n",
|
||||
"Most of the work in creating the custom LLMChain comes down to the prompt. Because we are using an existing agent class to parse the output, it is very important that the prompt say to produce text in that format. Additionally, we currently require an `agent_scratchpad` input variable to put notes on previous actions and observations. This should almost always be the final part of the prompt. However, besides those instructions, you can customize the prompt as you wish.\n",
|
||||
"\n",
|
||||
"To ensure that the prompt contains the appropriate instructions, we will utilize a helper method on that class. The helper method for the `ZeroShotAgent` takes the following arguments:\n",
|
||||
"\n",
|
||||
@@ -47,7 +47,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import ZeroShotAgent, Tool\n",
|
||||
"from langchain.agents import ZeroShotAgent, Tool, AgentExecutor\n",
|
||||
"from langchain import OpenAI, SerpAPIWrapper, LLMChain"
|
||||
]
|
||||
},
|
||||
@@ -78,13 +78,14 @@
|
||||
"prefix = \"\"\"Answer the following questions as best you can, but speaking as a pirate might speak. You have access to the following tools:\"\"\"\n",
|
||||
"suffix = \"\"\"Begin! Remember to speak as a pirate when giving your final answer. Use lots of \"Args\"\n",
|
||||
"\n",
|
||||
"Question: {input}\"\"\"\n",
|
||||
"Question: {input}\n",
|
||||
"{agent_scratchpad}\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = ZeroShotAgent.create_prompt(\n",
|
||||
" tools, \n",
|
||||
" prefix=prefix, \n",
|
||||
" suffix=suffix, \n",
|
||||
" input_variables=[\"input\"]\n",
|
||||
" input_variables=[\"input\", \"agent_scratchpad\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -123,7 +124,8 @@
|
||||
"\n",
|
||||
"Begin! Remember to speak as a pirate when giving your final answer. Use lots of \"Args\"\n",
|
||||
"\n",
|
||||
"Question: {input}\n"
|
||||
"Question: {input}\n",
|
||||
"{agent_scratchpad}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -148,12 +150,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)"
|
||||
"agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "490604e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "653b1617",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -163,30 +175,126 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"How many people live in canada?\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I should look this up\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out how many people live in Canada\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: How many people live in canada\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe current population of Canada is 38,533,678 as of Friday, November 25, 2022, based on Worldometer elaboration of the latest United Nations data. · Canada 2020 ...\u001b[0m\n",
|
||||
"Action Input: Population of Canada\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe current population of Canada is 38,553,548 as of Saturday, December 17, 2022, based on Worldometer elaboration of the latest United Nations data. Canada ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Arrr, there be 38,533,678 people in Canada\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"Final Answer: Arrr, there be 38,553,548 scallywags livin' in Canada!\u001b[0m\n",
|
||||
"\u001b[1m> Finished AgentExecutor chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Arrr, there be 38,533,678 people in Canada'"
|
||||
"\"Arrr, there be 38,553,548 scallywags livin' in Canada!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"How many people live in canada?\")"
|
||||
"agent_executor.run(\"How many people live in canada?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "040eb343",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Multiple inputs\n",
|
||||
"Agents can also work with prompts that require multiple inputs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "43dbfa2f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prefix = \"\"\"Answer the following questions as best you can. You have access to the following tools:\"\"\"\n",
|
||||
"suffix = \"\"\"When answering, you MUST speak in the following language: {language}.\n",
|
||||
"\n",
|
||||
"Question: {input}\n",
|
||||
"{agent_scratchpad}\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = ZeroShotAgent.create_prompt(\n",
|
||||
" tools, \n",
|
||||
" prefix=prefix, \n",
|
||||
" suffix=suffix, \n",
|
||||
" input_variables=[\"input\", \"language\", \"agent_scratchpad\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "0f087313",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "92c75a10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "ac5b83bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "c960e4ff",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I should look up the population of Canada.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Population of Canada\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe current population of Canada is 38,553,548 as of Saturday, December 17, 2022, based on Worldometer elaboration of the latest United Nations data. Canada ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: La popolazione attuale del Canada è 38.553.548 al sabato 17 dicembre 2022, secondo l'elaborazione di Worldometer dei dati più recenti delle Nazioni Unite.\u001b[0m\n",
|
||||
"\u001b[1m> Finished AgentExecutor chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"La popolazione attuale del Canada è 38.553.548 al sabato 17 dicembre 2022, secondo l'elaborazione di Worldometer dei dati più recenti delle Nazioni Unite.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(input=\"How many people live in canada?\", language=\"italian\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -224,7 +332,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
181
docs/examples/agents/custom_tools.ipynb
Normal file
181
docs/examples/agents/custom_tools.ipynb
Normal file
@@ -0,0 +1,181 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5436020b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Defining Custom Tools\n",
|
||||
"\n",
|
||||
"When constructing your own agent, you will need to provide it with a list of Tools that it can use. A Tool is defined as below.\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"class Tool(NamedTuple):\n",
|
||||
" \"\"\"Interface for tools.\"\"\"\n",
|
||||
"\n",
|
||||
" name: str\n",
|
||||
" func: Callable[[str], str]\n",
|
||||
" description: Optional[str] = None\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"The two required components of a Tool are the name and then the tool itself. A tool description is optional, as it is needed for some agents but not all."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "1aaba18c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Import things that are needed generically\n",
|
||||
"from langchain.agents import initialize_agent, Tool\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain import LLMMathChain, SerpAPIWrapper"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8e2c3874",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Initialize the LLM to use for the agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "36ed392e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "56ff7670",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load the tool configs that are needed.\n",
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"llm_math_chain = LLMMathChain(llm=llm, verbose=True)\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name = \"Search\",\n",
|
||||
" func=search.run,\n",
|
||||
" description=\"useful for when you need to answer questions about current events\"\n",
|
||||
" ),\n",
|
||||
" Tool(\n",
|
||||
" name=\"Calculator\",\n",
|
||||
" func=llm_math_chain.run,\n",
|
||||
" description=\"useful for when you need to answer questions about math\"\n",
|
||||
" )\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "5b93047d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Construct the agent. We will use the default agent type here.\n",
|
||||
"# See documentation for a full list of options.\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"agent = initialize_agent(tools, llm, agent=\"zero-shot-react-description\", verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "6f96a891",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out who Olivia Wilde's boyfriend is and then calculate his age raised to the 0.23 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Olivia Wilde's boyfriend\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mOlivia Wilde started dating Harry Styles after ending her years-long engagement to Jason Sudeikis — see their relationship timeline.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out Harry Styles' age.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: Harry Styles age\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m28 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate 28 raised to the 0.23 power.\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 28^0.23\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMMathChain chain...\u001b[0m\n",
|
||||
"28^0.23\u001b[32;1m\u001b[1;3m\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"import math\n",
|
||||
"print(math.pow(28, 0.23))\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m2.1520202182226886\n",
|
||||
"\u001b[0m\n",
|
||||
"\u001b[1m> Finished LLMMathChain chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 2.1520202182226886\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: Harry Styles, Olivia Wilde's boyfriend, is 28 years old and his age raised to the 0.23 power is 2.1520202182226886.\u001b[0m\n",
|
||||
"\u001b[1m> Finished AgentExecutor chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Harry Styles, Olivia Wilde's boyfriend, is 28 years old and his age raised to the 0.23 power is 2.1520202182226886.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Who is Olivia Wilde's boyfriend? What is his current age raised to the 0.23 power?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e7776981",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
179
docs/examples/agents/intermediate_steps.ipynb
Normal file
179
docs/examples/agents/intermediate_steps.ipynb
Normal file
@@ -0,0 +1,179 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5436020b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Intermediate Steps\n",
|
||||
"\n",
|
||||
"In order to get more visibility into what an agent is doing, we can also return intermediate steps. This comes in the form of an extra key in the return value, which is a list of (action, observation) tuples."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "b2b0d119",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import load_tools\n",
|
||||
"from langchain.agents import initialize_agent\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b440b8a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Initialize the components needed for the agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "36ed392e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0, model_name='text-davinci-002')\n",
|
||||
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1d329c3d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Initialize the agent with `return_intermediate_steps=True`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "6abf3b08",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = initialize_agent(tools, llm, agent=\"zero-shot-react-description\", verbose=True, return_intermediate_steps=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "837211e8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I should look up Olivia Wilde's boyfriend's age\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Olivia Wilde's boyfriend's age\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mHarry Styles, 28, and Olivia Wilde, 38, first met and sparked their connection when he joined the cast the actresses' psychological thriller ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I should use a calculator\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 28.0 raised to the 0.23 power\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 2.1520202182226886\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Olivia Wilde's boyfriend is 2.1520202182226886 years old.\u001b[0m\n",
|
||||
"\u001b[1m> Finished AgentExecutor chain.\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = agent({\"input\":\"How old is Olivia Wilde's boyfriend? What is that number raised to the 0.23 power?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "e1a39a23",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[(AgentAction(tool='Search', tool_input=\"Olivia Wilde's boyfriend's age\", log=' I should look up Olivia Wilde\\'s boyfriend\\'s age\\nAction: Search\\nAction Input: \"Olivia Wilde\\'s boyfriend\\'s age\"'), \"Harry Styles, 28, and Olivia Wilde, 38, first met and sparked their connection when he joined the cast the actresses' psychological thriller ...\"), (AgentAction(tool='Calculator', tool_input='28.0 raised to the 0.23 power', log=' I should use a calculator\\nAction: Calculator\\nAction Input: 28.0 raised to the 0.23 power'), 'Answer: 2.1520202182226886\\n')]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# The actual return type is a NamedTuple for the agent action, and then an observation\n",
|
||||
"print(response[\"intermediate_steps\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "6365bb69",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[\n",
|
||||
" [\n",
|
||||
" [\n",
|
||||
" \"Search\",\n",
|
||||
" \"Olivia Wilde's boyfriend's age\",\n",
|
||||
" \" I should look up Olivia Wilde's boyfriend's age\\nAction: Search\\nAction Input: \\\"Olivia Wilde's boyfriend's age\\\"\"\n",
|
||||
" ],\n",
|
||||
" \"Harry Styles, 28, and Olivia Wilde, 38, first met and sparked their connection when he joined the cast the actresses' psychological thriller ...\"\n",
|
||||
" ],\n",
|
||||
" [\n",
|
||||
" [\n",
|
||||
" \"Calculator\",\n",
|
||||
" \"28.0 raised to the 0.23 power\",\n",
|
||||
" \" I should use a calculator\\nAction: Calculator\\nAction Input: 28.0 raised to the 0.23 power\"\n",
|
||||
" ],\n",
|
||||
" \"Answer: 2.1520202182226886\\n\"\n",
|
||||
" ]\n",
|
||||
"]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"print(json.dumps(response[\"intermediate_steps\"], indent=2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e7776981",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -46,7 +46,7 @@
|
||||
" Tool(\n",
|
||||
" name = \"Search\",\n",
|
||||
" func=search.run,\n",
|
||||
" description=\"useful for when you need to answer questions about current events\"\n",
|
||||
" description=\"useful for when you need to answer questions about current events. You should ask targeted questions\"\n",
|
||||
" ),\n",
|
||||
" Tool(\n",
|
||||
" name=\"Calculator\",\n",
|
||||
@@ -56,7 +56,7 @@
|
||||
" Tool(\n",
|
||||
" name=\"FooBar DB\",\n",
|
||||
" func=db_chain.run,\n",
|
||||
" description=\"useful for when you need to answer questions about FooBar. Input should be in the form of a question\"\n",
|
||||
" description=\"useful for when you need to answer questions about FooBar. Input should be in the form of a question containing full context\"\n",
|
||||
" )\n",
|
||||
"]"
|
||||
]
|
||||
@@ -81,40 +81,44 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"What is the age of Olivia Wilde's boyfriend raised to the 0.23 power?\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find the age of Olivia Wilde's boyfriend\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentWithTools chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out who Olivia Wilde's boyfriend is and then calculate his age raised to the 0.23 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Olivia Wilde's boyfriend\"\u001b[0m\n",
|
||||
"Action Input: \"Who is Olivia Wilde's boyfriend?\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mOlivia Wilde started dating Harry Styles after ending her years-long engagement to Jason Sudeikis — see their relationship timeline.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find the age of Harry Styles\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out Harry Styles' age.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Harry Styles age\"\u001b[0m\n",
|
||||
"Action Input: \"How old is Harry Styles?\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m28 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate 28 to the 0.23 power\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate 28 raised to the 0.23 power.\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 28^0.23\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[1m> Entering new LLMMathChain chain...\u001b[0m\n",
|
||||
"28^0.23\u001b[32;1m\u001b[1;3m\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"print(28**0.23)\n",
|
||||
"import math\n",
|
||||
"print(math.pow(28, 0.23))\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m2.1520202182226886\n",
|
||||
"\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001b[1m> Finished LLMMathChain chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 2.1520202182226886\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 2.1520202182226886\u001b[0m"
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: Harry Styles, Olivia Wilde's boyfriend, is 28 years old and his age raised to the 0.23 power is 2.1520202182226886.\u001b[0m\n",
|
||||
"\u001b[1m> Finished AgentWithTools chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'2.1520202182226886'"
|
||||
"\"Harry Styles, Olivia Wilde's boyfriend, is 28 years old and his age raised to the 0.23 power is 2.1520202182226886.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
@@ -123,7 +127,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"mrkl.run(\"What is the age of Olivia Wilde's boyfriend raised to the 0.23 power?\")"
|
||||
"mrkl.run(\"Who is Olivia Wilde's boyfriend? What is his current age raised to the 0.23 power?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -136,43 +140,34 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Who recently released an album called 'The Storm Before the Calm' and are they in the FooBar database? If so, what albums of theirs are in the FooBar database?\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find an album called 'The Storm Before the Calm'\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentWithTools chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out the artist's full name and then search the FooBar database for their albums.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"The Storm Before the Calm album\"\u001b[0m\n",
|
||||
"Action Input: \"The Storm Before the Calm\" artist\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe Storm Before the Calm (stylized in all lowercase) is the tenth (and eighth international) studio album by Canadian-American singer-songwriter Alanis ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to check if Alanis is in the FooBar database\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now need to search the FooBar database for Alanis Morissette's albums\n",
|
||||
"Action: FooBar DB\n",
|
||||
"Action Input: \"Does Alanis Morissette exist in the FooBar database?\"\u001b[0m\n",
|
||||
"Action Input: What albums by Alanis Morissette are in the FooBar database?\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Does Alanis Morissette exist in the FooBar database?\n",
|
||||
"SQLQuery:\u001b[32;1m\u001b[1;3m SELECT * FROM Artist WHERE Name = 'Alanis Morissette'\u001b[0m\n",
|
||||
"SQLResult: \u001b[33;1m\u001b[1;3m[(4, 'Alanis Morissette')]\u001b[0m\n",
|
||||
"Answer:\u001b[32;1m\u001b[1;3m Yes\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3m Yes\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out what albums of Alanis's are in the FooBar database\n",
|
||||
"Action: FooBar DB\n",
|
||||
"Action Input: \"What albums by Alanis Morissette are in the FooBar database?\"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"What albums by Alanis Morissette are in the FooBar database?\n",
|
||||
"SQLQuery:\u001b[32;1m\u001b[1;3m SELECT Album.Title FROM Album JOIN Artist ON Album.ArtistId = Artist.ArtistId WHERE Artist.Name = 'Alanis Morissette'\u001b[0m\n",
|
||||
"\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n",
|
||||
"What albums by Alanis Morissette are in the FooBar database? \n",
|
||||
"SQLQuery:\u001b[32;1m\u001b[1;3m SELECT Title FROM Album WHERE ArtistId IN (SELECT ArtistId FROM Artist WHERE Name = 'Alanis Morissette');\u001b[0m\n",
|
||||
"SQLResult: \u001b[33;1m\u001b[1;3m[('Jagged Little Pill',)]\u001b[0m\n",
|
||||
"Answer:\u001b[32;1m\u001b[1;3m Jagged Little Pill\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"Answer:\u001b[32;1m\u001b[1;3m The album Jagged Little Pill by Alanis Morissette is in the FooBar database.\u001b[0m\n",
|
||||
"\u001b[1m> Finished SQLDatabaseChain chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3m Jagged Little Pill\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3m The album Jagged Little Pill by Alanis Morissette is in the FooBar database.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The album is by Alanis Morissette and the albums in the FooBar database by her are Jagged Little Pill\u001b[0m"
|
||||
"Final Answer: Alanis Morissette and Jagged Little Pill are in the FooBar database.\u001b[0m\n",
|
||||
"\u001b[1m> Finished AgentWithTools chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The album is by Alanis Morissette and the albums in the FooBar database by her are Jagged Little Pill'"
|
||||
"'Alanis Morissette and Jagged Little Pill are in the FooBar database.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
@@ -181,13 +176,13 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"mrkl.run(\"Who recently released an album called 'The Storm Before the Calm' and are they in the FooBar database? If so, what albums of theirs are in the FooBar database?\")"
|
||||
"mrkl.run(\"What is the full name of the artist who recently released an album called 'The Storm Before the Calm' and are they in the FooBar database? If so, what albums of theirs are in the FooBar database?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d7c2e6ac",
|
||||
"id": "af016a70",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -209,7 +204,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 1,
|
||||
"id": "4e272b47",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -38,7 +38,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 2,
|
||||
"id": "8078c8f1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -48,18 +48,17 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new ReActDocstoreAgent chain...\u001b[0m\n",
|
||||
"Author David Chanoff has collaborated with a U.S. Navy admiral who served as the ambassador to the United Kingdom under which President?\n",
|
||||
"Thought 1:\u001b[32;1m\u001b[1;3m I need to search David Chanoff and find the U.S. Navy admiral he collaborated\n",
|
||||
"with.\n",
|
||||
"\u001b[1m> Entering new AgentWithTools chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Thought 1: I need to search David Chanoff and find the U.S. Navy admiral he collaborated with.\n",
|
||||
"Action 1: Search[David Chanoff]\u001b[0m\n",
|
||||
"Observation 1: \u001b[36;1m\u001b[1;3mDavid Chanoff is a noted author of non-fiction work. His work has typically involved collaborations with the principal protagonist of the work concerned. His collaborators have included; Augustus A. White, Joycelyn Elders, Đoàn Văn Toại, William J. Crowe, Ariel Sharon, Kenneth Good and Felix Zandman. He has also written about a wide range of subjects including literary history, education and foreign for The Washington Post, The New Republic and The New York Times Magazine. He has published more than twelve books.\u001b[0m\n",
|
||||
"Thought 2:\u001b[32;1m\u001b[1;3m The U.S. Navy admiral David Chanoff collaborated with is William J. Crowe.\n",
|
||||
"Action 2: Search[William J. Crowe]\u001b[0m\n",
|
||||
"Observation 2: \u001b[36;1m\u001b[1;3mWilliam James Crowe Jr. (January 2, 1925 – October 18, 2007) was a United States Navy admiral and diplomat who served as the 11th chairman of the Joint Chiefs of Staff under Presidents Ronald Reagan and George H. W. Bush, and as the ambassador to the United Kingdom and Chair of the Intelligence Oversight Board under President Bill Clinton.\u001b[0m\n",
|
||||
"Thought 3:\u001b[32;1m\u001b[1;3m William J. Crowe served as the ambassador to the United Kingdom under President Bill Clinton.\n",
|
||||
"Thought 3:\u001b[32;1m\u001b[1;3m The President William J. Crowe served as the ambassador to the United Kingdom under is Bill Clinton.\n",
|
||||
"Action 3: Finish[Bill Clinton]\u001b[0m\n",
|
||||
"\u001b[1m> Finished ReActDocstoreAgent chain.\u001b[0m\n"
|
||||
"\u001b[1m> Finished AgentWithTools chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -68,7 +67,7 @@
|
||||
"'Bill Clinton'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -81,7 +80,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4ff64e81",
|
||||
"id": "75f914ba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
|
||||
@@ -22,15 +22,14 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new SelfAskWithSearchAgent chain...\u001b[0m\n",
|
||||
"What is the hometown of the reigning men's U.S. Open champion?\n",
|
||||
"Are follow up questions needed here:\u001b[32;1m\u001b[1;3m Yes.\n",
|
||||
"\u001b[1m> Entering new AgentWithTools chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m Yes.\n",
|
||||
"Follow up: Who is the reigning men's U.S. Open champion?\u001b[0m\n",
|
||||
"Intermediate answer: \u001b[36;1m\u001b[1;3mCarlos Alcaraz\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mFollow up: Where is Carlos Alcaraz from?\u001b[0m\n",
|
||||
"Intermediate answer: \u001b[36;1m\u001b[1;3mEl Palmar, Spain\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mSo the final answer is: El Palmar, Spain\u001b[0m\n",
|
||||
"\u001b[1m> Finished SelfAskWithSearchAgent chain.\u001b[0m\n"
|
||||
"\u001b[1m> Finished AgentWithTools chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -58,7 +57,6 @@
|
||||
"]\n",
|
||||
"\n",
|
||||
"self_ask_with_search = initialize_agent(tools, llm, agent=\"self-ask-with-search\", verbose=True)\n",
|
||||
"\n",
|
||||
"self_ask_with_search.run(\"What is the hometown of the reigning men's U.S. Open champion?\")"
|
||||
]
|
||||
},
|
||||
@@ -87,7 +85,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -6,14 +6,11 @@ A chain is made up of links, which can be either primitives or other chains.
|
||||
|
||||
The following primitives exist as options to use for links:
|
||||
|
||||
#. `LLM: <../modules/llms.rst>`_ A language model takes text as input and outputs text.
|
||||
#. `PromptTemplate: <../modules/prompt.rst>`_ A prompt template takes arbitrary string inputs and returns a final formatted string.
|
||||
#. `TextSplitter: <../modules/text_splitter.rst>`_ A text splitter takes a longer document and splits it into smaller chunks.
|
||||
#. `Python REPL: <../modules/python.rst>`_ A Python REPL takes a string representing a Python command to run, runs that command, and then returns anything that was printed during that run.
|
||||
#. `SQL Database: <../modules/sql_database.rst>`_ A SQL database takes a string representing a SQL command as input and executes that command against the database. If any rows are returned, then those are cast to a string and returned.
|
||||
#. `Search: <../modules/serpapi.rst>`_ A search object takes a string as input and executes that against a search object, returning any results.
|
||||
#. `Docstore: <../modules/docstore.rst>`_ A docstore object can be used to lookup a document in a database by exact match.
|
||||
#. `Vectorstore: <../modules/vectorstore.rst>`_ A vectorstore object uses embeddings stored in a vector database to take in an input string and return documents similar to that string.
|
||||
#. `LLM: <../reference/modules/llms.rst>`_ A language model takes text as input and outputs text.
|
||||
#. `PromptTemplate: <../reference/modules/prompt.rst>`_ A prompt template takes arbitrary string inputs and returns a final formatted string.
|
||||
#. `Python REPL: <../reference/modules/python.rst>`_ A Python REPL takes a string representing a Python command to run, runs that command, and then returns anything that was printed during that run.
|
||||
#. `SQL Database: <../reference/modules/sql_database.rst>`_ A SQL database takes a string representing a SQL command as input and executes that command against the database. If any rows are returned, then those are cast to a string and returned.
|
||||
#. `Search: <../reference/modules/serpapi.rst>`_ A search object takes a string as input and executes that against a search object, returning any results.
|
||||
|
||||
With these primitives in mind, the following chains exist:
|
||||
|
||||
@@ -36,32 +33,12 @@ With these primitives in mind, the following chains exist:
|
||||
- `Paper <https://arxiv.org/abs/2211.10435>`_
|
||||
- `Example Notebook <chains/pal.ipynb>`_
|
||||
|
||||
**Recursive Summarization**
|
||||
|
||||
- **Links Used**: TextSplitter, LLMChain
|
||||
- **Notes**: This chain splits a document into chunks, runs a first LLMChain over each chunk to summarize it, and then runs a second LLMChain over those results to get a summary of the summaries.
|
||||
- `Example Notebook <chains/map_reduce.ipynb>`_
|
||||
|
||||
**SQLDatabase Chain**
|
||||
|
||||
- **Links Used**: SQLDatabase, LLMChain
|
||||
- **Notes**: This chain takes user input (a question), uses a first LLM chain to construct a SQL query to run against the SQL database, and then uses another LLMChain to take the results of that query and use it to answer the original question.
|
||||
- `Example Notebook <chains/sqlite.ipynb>`_
|
||||
|
||||
|
||||
**Vector Database Question-Answering**
|
||||
|
||||
- **Links Used**: Vectorstore, LLMChain
|
||||
- **Notes**: This chain takes user input (a question), uses the Vectorstore and semantic search to find relevant documents, and then passes the documents plus to the original question to another LLM to generate a final answer.
|
||||
- `Example Notebook <chains/vector_db_qa.ipynb>`_
|
||||
|
||||
**Question-Answering With Sources**
|
||||
|
||||
- **Links Used**: LLMChain
|
||||
- **Notes**: This chain takes a question and multiple documents as input. It then runs a first LLMChain over all documents attempting to answer the provided question. It then runs a second LLMChain over the results of the first pass, combining the answers from documents into a single response that is returned.
|
||||
- `Example Notebook <chains/combine_documents.ipynb>`_
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
971
docs/examples/chains/chatgpt_clone.ipynb
Normal file
971
docs/examples/chains/chatgpt_clone.ipynb
Normal file
@@ -0,0 +1,971 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b253f4d5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ChatGPT Clone\n",
|
||||
"\n",
|
||||
"This chain replicates ChatGPT by combining (1) a specific prompt, and (2) the concept of memory.\n",
|
||||
"\n",
|
||||
"Shows off the example as in https://www.engraved.blog/building-a-virtual-machine-inside/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"id": "a99acd89",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Human: I want you to act as a Linux terminal. I will type commands and you will reply with what the terminal should show. I want you to only reply wiht the terminal output inside one unique code block, and nothing else. Do not write explanations. Do not type commands unless I instruct you to do so. When I need to tell you something in English I will do so by putting text inside curly brackets {like this}. My first command is pwd.\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ pwd\n",
|
||||
"/\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import OpenAI, ConversationChain, LLMChain, PromptTemplate\n",
|
||||
"from langchain.chains.conversation.memory import ConversationalBufferWindowMemory\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"template = \"\"\"Assistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"{history}\n",
|
||||
"Human: {human_input}\n",
|
||||
"Assistant:\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(\n",
|
||||
" input_variables=[\"history\", \"human_input\"], \n",
|
||||
" template=template\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chatgpt_chain = LLMChain(\n",
|
||||
" llm=OpenAI(temperature=0), \n",
|
||||
" prompt=prompt, \n",
|
||||
" verbose=True, \n",
|
||||
" memory=ConversationalBufferWindowMemory(k=2),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"output = chatgpt_chain.predict(human_input=\"I want you to act as a Linux terminal. I will type commands and you will reply with what the terminal should show. I want you to only reply wiht the terminal output inside one unique code block, and nothing else. Do not write explanations. Do not type commands unless I instruct you to do so. When I need to tell you something in English I will do so by putting text inside curly brackets {like this}. My first command is pwd.\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"id": "4ef711d6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Human: I want you to act as a Linux terminal. I will type commands and you will reply with what the terminal should show. I want you to only reply wiht the terminal output inside one unique code block, and nothing else. Do not write explanations. Do not type commands unless I instruct you to do so. When I need to tell you something in English I will do so by putting text inside curly brackets {like this}. My first command is pwd.\n",
|
||||
"AI: \n",
|
||||
"```\n",
|
||||
"$ pwd\n",
|
||||
"/\n",
|
||||
"```\n",
|
||||
"Human: ls ~\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ ls ~\n",
|
||||
"Desktop Documents Downloads Music Pictures Public Templates Videos\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = chatgpt_chain.predict(human_input=\"ls ~\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"id": "a5d6dac2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Human: I want you to act as a Linux terminal. I will type commands and you will reply with what the terminal should show. I want you to only reply wiht the terminal output inside one unique code block, and nothing else. Do not write explanations. Do not type commands unless I instruct you to do so. When I need to tell you something in English I will do so by putting text inside curly brackets {like this}. My first command is pwd.\n",
|
||||
"AI: \n",
|
||||
"```\n",
|
||||
"$ pwd\n",
|
||||
"/\n",
|
||||
"```\n",
|
||||
"Human: ls ~\n",
|
||||
"AI: \n",
|
||||
"```\n",
|
||||
"$ ls ~\n",
|
||||
"Desktop Documents Downloads Music Pictures Public Templates Videos\n",
|
||||
"```\n",
|
||||
"Human: cd ~\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
|
||||
" \n",
|
||||
"```\n",
|
||||
"$ cd ~\n",
|
||||
"$ pwd\n",
|
||||
"/home/user\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = chatgpt_chain.predict(human_input=\"cd ~\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"id": "b9283077",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Human: ls ~\n",
|
||||
"AI: \n",
|
||||
"```\n",
|
||||
"$ ls ~\n",
|
||||
"Desktop Documents Downloads Music Pictures Public Templates Videos\n",
|
||||
"```\n",
|
||||
"Human: cd ~\n",
|
||||
"AI: \n",
|
||||
"```\n",
|
||||
"$ cd ~\n",
|
||||
"$ pwd\n",
|
||||
"/home/user\n",
|
||||
"```\n",
|
||||
"Human: {Please make a file jokes.txt inside and put some jokes inside}\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ touch jokes.txt\n",
|
||||
"$ echo \"Why did the chicken cross the road? To get to the other side!\" >> jokes.txt\n",
|
||||
"$ echo \"What did the fish say when it hit the wall? Dam!\" >> jokes.txt\n",
|
||||
"$ echo \"Why did the scarecrow win the Nobel Prize? Because he was outstanding in his field!\" >> jokes.txt\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = chatgpt_chain.predict(human_input=\"{Please make a file jokes.txt inside and put some jokes inside}\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"id": "570e785e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Human: cd ~\n",
|
||||
"AI: \n",
|
||||
"```\n",
|
||||
"$ cd ~\n",
|
||||
"$ pwd\n",
|
||||
"/home/user\n",
|
||||
"```\n",
|
||||
"Human: {Please make a file jokes.txt inside and put some jokes inside}\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ touch jokes.txt\n",
|
||||
"$ echo \"Why did the chicken cross the road? To get to the other side!\" >> jokes.txt\n",
|
||||
"$ echo \"What did the fish say when it hit the wall? Dam!\" >> jokes.txt\n",
|
||||
"$ echo \"Why did the scarecrow win the Nobel Prize? Because he was outstanding in his field!\" >> jokes.txt\n",
|
||||
"```\n",
|
||||
"Human: echo -e \"x=lambda y:y*5+3;print('Result:' + str(x(6)))\" > run.py && python3 run.py\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ echo -e \"x=lambda y:y*5+3;print('Result:' + str(x(6)))\" > run.py\n",
|
||||
"$ python3 run.py\n",
|
||||
"Result: 33\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = chatgpt_chain.predict(human_input=\"\"\"echo -e \"x=lambda y:y*5+3;print('Result:' + str(x(6)))\" > run.py && python3 run.py\"\"\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"id": "cd0a23d9",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Human: {Please make a file jokes.txt inside and put some jokes inside}\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ touch jokes.txt\n",
|
||||
"$ echo \"Why did the chicken cross the road? To get to the other side!\" >> jokes.txt\n",
|
||||
"$ echo \"What did the fish say when it hit the wall? Dam!\" >> jokes.txt\n",
|
||||
"$ echo \"Why did the scarecrow win the Nobel Prize? Because he was outstanding in his field!\" >> jokes.txt\n",
|
||||
"```\n",
|
||||
"Human: echo -e \"x=lambda y:y*5+3;print('Result:' + str(x(6)))\" > run.py && python3 run.py\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ echo -e \"x=lambda y:y*5+3;print('Result:' + str(x(6)))\" > run.py\n",
|
||||
"$ python3 run.py\n",
|
||||
"Result: 33\n",
|
||||
"```\n",
|
||||
"Human: echo -e \"print(list(filter(lambda x: all(x%d for d in range(2,x)),range(2,3**10)))[:10])\" > run.py && python3 run.py\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ echo -e \"print(list(filter(lambda x: all(x%d for d in range(2,x)),range(2,3**10)))[:10])\" > run.py\n",
|
||||
"$ python3 run.py\n",
|
||||
"[2, 3, 5, 7, 11, 13, 17, 19, 23, 29]\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = chatgpt_chain.predict(human_input=\"\"\"echo -e \"print(list(filter(lambda x: all(x%d for d in range(2,x)),range(2,3**10)))[:10])\" > run.py && python3 run.py\"\"\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"id": "90db6eb2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Human: echo -e \"x=lambda y:y*5+3;print('Result:' + str(x(6)))\" > run.py && python3 run.py\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ echo -e \"x=lambda y:y*5+3;print('Result:' + str(x(6)))\" > run.py\n",
|
||||
"$ python3 run.py\n",
|
||||
"Result: 33\n",
|
||||
"```\n",
|
||||
"Human: echo -e \"print(list(filter(lambda x: all(x%d for d in range(2,x)),range(2,3**10)))[:10])\" > run.py && python3 run.py\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ echo -e \"print(list(filter(lambda x: all(x%d for d in range(2,x)),range(2,3**10)))[:10])\" > run.py\n",
|
||||
"$ python3 run.py\n",
|
||||
"[2, 3, 5, 7, 11, 13, 17, 19, 23, 29]\n",
|
||||
"```\n",
|
||||
"Human: echo -e \"echo 'Hello from Docker\" > entrypoint.sh && echo -e \"FROM ubuntu:20.04\n",
|
||||
"COPY entrypoint.sh entrypoint.sh\n",
|
||||
"ENTRYPOINT [\"/bin/sh\",\"entrypoint.sh\"]\">Dockerfile && docker build . -t my_docker_image && docker run -t my_docker_image\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ echo -e \"echo 'Hello from Docker\" > entrypoint.sh\n",
|
||||
"$ echo -e \"FROM ubuntu:20.04\n",
|
||||
"COPY entrypoint.sh entrypoint.sh\n",
|
||||
"ENTRYPOINT [\"/bin/sh\",\"entrypoint.sh\"]\">Dockerfile\n",
|
||||
"$ docker build . -t my_docker_image\n",
|
||||
"$ docker run -t my_docker_image\n",
|
||||
"Hello from Docker\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docker_input = \"\"\"echo -e \"echo 'Hello from Docker\" > entrypoint.sh && echo -e \"FROM ubuntu:20.04\\nCOPY entrypoint.sh entrypoint.sh\\nENTRYPOINT [\\\"/bin/sh\\\",\\\"entrypoint.sh\\\"]\">Dockerfile && docker build . -t my_docker_image && docker run -t my_docker_image\"\"\"\n",
|
||||
"output = chatgpt_chain.predict(human_input=docker_input)\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"id": "c3806f89",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Human: echo -e \"print(list(filter(lambda x: all(x%d for d in range(2,x)),range(2,3**10)))[:10])\" > run.py && python3 run.py\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ echo -e \"print(list(filter(lambda x: all(x%d for d in range(2,x)),range(2,3**10)))[:10])\" > run.py\n",
|
||||
"$ python3 run.py\n",
|
||||
"[2, 3, 5, 7, 11, 13, 17, 19, 23, 29]\n",
|
||||
"```\n",
|
||||
"Human: echo -e \"echo 'Hello from Docker\" > entrypoint.sh && echo -e \"FROM ubuntu:20.04\n",
|
||||
"COPY entrypoint.sh entrypoint.sh\n",
|
||||
"ENTRYPOINT [\"/bin/sh\",\"entrypoint.sh\"]\">Dockerfile && docker build . -t my_docker_image && docker run -t my_docker_image\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ echo -e \"echo 'Hello from Docker\" > entrypoint.sh\n",
|
||||
"$ echo -e \"FROM ubuntu:20.04\n",
|
||||
"COPY entrypoint.sh entrypoint.sh\n",
|
||||
"ENTRYPOINT [\"/bin/sh\",\"entrypoint.sh\"]\">Dockerfile\n",
|
||||
"$ docker build . -t my_docker_image\n",
|
||||
"$ docker run -t my_docker_image\n",
|
||||
"Hello from Docker\n",
|
||||
"```\n",
|
||||
"Human: nvidia-smi\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ nvidia-smi\n",
|
||||
"Sat May 15 21:45:02 2021 \n",
|
||||
"+-----------------------------------------------------------------------------+\n",
|
||||
"| NVIDIA-SMI 460.32.03 Driver Version: 460.32.03 CUDA Version: 11.2 |\n",
|
||||
"|-------------------------------+----------------------+----------------------+\n",
|
||||
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
|
||||
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
|
||||
"|===============================+======================+======================|\n",
|
||||
"| 0 GeForce GTX 108... Off | 00000000:01:00.0 Off | N/A |\n",
|
||||
"| N/A 45C P0 N/A / N/A | 511MiB / 10206MiB | 0% Default |\n",
|
||||
"+-------------------------------+----------------------+----------------------+\n",
|
||||
" \n",
|
||||
"+-----------------------------------------------------------------------------+\n",
|
||||
"| Processes: GPU Memory |\n",
|
||||
"| GPU PID Type Process name Usage |\n",
|
||||
"|=============================================================================|\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = chatgpt_chain.predict(human_input=\"nvidia-smi\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"id": "f508f597",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Human: echo -e \"echo 'Hello from Docker\" > entrypoint.sh && echo -e \"FROM ubuntu:20.04\n",
|
||||
"COPY entrypoint.sh entrypoint.sh\n",
|
||||
"ENTRYPOINT [\"/bin/sh\",\"entrypoint.sh\"]\">Dockerfile && docker build . -t my_docker_image && docker run -t my_docker_image\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ echo -e \"echo 'Hello from Docker\" > entrypoint.sh\n",
|
||||
"$ echo -e \"FROM ubuntu:20.04\n",
|
||||
"COPY entrypoint.sh entrypoint.sh\n",
|
||||
"ENTRYPOINT [\"/bin/sh\",\"entrypoint.sh\"]\">Dockerfile\n",
|
||||
"$ docker build . -t my_docker_image\n",
|
||||
"$ docker run -t my_docker_image\n",
|
||||
"Hello from Docker\n",
|
||||
"```\n",
|
||||
"Human: nvidia-smi\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ nvidia-smi\n",
|
||||
"Sat May 15 21:45:02 2021 \n",
|
||||
"+-----------------------------------------------------------------------------+\n",
|
||||
"| NVIDIA-SMI 460.32.03 Driver Version: 460.32.03 CUDA Version: 11.2 |\n",
|
||||
"|-------------------------------+----------------------+----------------------+\n",
|
||||
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
|
||||
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
|
||||
"|===============================+======================+======================|\n",
|
||||
"| 0 GeForce GTX 108... Off | 00000000:01:00.0 Off | N/A |\n",
|
||||
"| N/A 45C P0 N/A / N/A | 511MiB / 10206MiB | 0% Default |\n",
|
||||
"+-------------------------------+----------------------+----------------------+\n",
|
||||
" \n",
|
||||
"+-----------------------------------------------------------------------------+\n",
|
||||
"| Processes: GPU Memory |\n",
|
||||
"| GPU PID Type Process name Usage |\n",
|
||||
"|=============================================================================|\n",
|
||||
"\n",
|
||||
"Human: ping bbc.com\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ ping bbc.com\n",
|
||||
"PING bbc.com (151.101.65.81): 56 data bytes\n",
|
||||
"64 bytes from 151.101.65.81: icmp_seq=0 ttl=53 time=14.945 ms\n",
|
||||
"64 bytes from 151.101.65.81: icmp_seq=1 ttl=53 time=14.945 ms\n",
|
||||
"64 bytes from 151.101.65.81: icmp_seq=2 ttl=53 time=14.945 ms\n",
|
||||
"\n",
|
||||
"--- bbc.com ping statistics ---\n",
|
||||
"3 packets transmitted, 3 packets received, 0.0% packet loss\n",
|
||||
"round-trip min/avg/max/stddev = 14.945/14.945/14.945/0.000 ms\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = chatgpt_chain.predict(human_input=\"ping bbc.com\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"id": "cbd607f4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Human: nvidia-smi\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ nvidia-smi\n",
|
||||
"Sat May 15 21:45:02 2021 \n",
|
||||
"+-----------------------------------------------------------------------------+\n",
|
||||
"| NVIDIA-SMI 460.32.03 Driver Version: 460.32.03 CUDA Version: 11.2 |\n",
|
||||
"|-------------------------------+----------------------+----------------------+\n",
|
||||
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
|
||||
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
|
||||
"|===============================+======================+======================|\n",
|
||||
"| 0 GeForce GTX 108... Off | 00000000:01:00.0 Off | N/A |\n",
|
||||
"| N/A 45C P0 N/A / N/A | 511MiB / 10206MiB | 0% Default |\n",
|
||||
"+-------------------------------+----------------------+----------------------+\n",
|
||||
" \n",
|
||||
"+-----------------------------------------------------------------------------+\n",
|
||||
"| Processes: GPU Memory |\n",
|
||||
"| GPU PID Type Process name Usage |\n",
|
||||
"|=============================================================================|\n",
|
||||
"\n",
|
||||
"Human: ping bbc.com\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ ping bbc.com\n",
|
||||
"PING bbc.com (151.101.65.81): 56 data bytes\n",
|
||||
"64 bytes from 151.101.65.81: icmp_seq=0 ttl=53 time=14.945 ms\n",
|
||||
"64 bytes from 151.101.65.81: icmp_seq=1 ttl=53 time=14.945 ms\n",
|
||||
"64 bytes from 151.101.65.81: icmp_seq=2 ttl=53 time=14.945 ms\n",
|
||||
"\n",
|
||||
"--- bbc.com ping statistics ---\n",
|
||||
"3 packets transmitted, 3 packets received, 0.0% packet loss\n",
|
||||
"round-trip min/avg/max/stddev = 14.945/14.945/14.945/0.000 ms\n",
|
||||
"```\n",
|
||||
"Human: curl -fsSL \"https://api.github.com/repos/pytorch/pytorch/releases/latest\" | jq -r '.tag_name' | sed 's/[^0-9\\.\\-]*//g'\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ curl -fsSL \"https://api.github.com/repos/pytorch/pytorch/releases/latest\" | jq -r '.tag_name' | sed 's/[^0-9\\.\\-]*//g'\n",
|
||||
"1.8.1\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = chatgpt_chain.predict(human_input=\"\"\"curl -fsSL \"https://api.github.com/repos/pytorch/pytorch/releases/latest\" | jq -r '.tag_name' | sed 's/[^0-9\\.\\-]*//g'\"\"\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"id": "d33e0e28",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Human: ping bbc.com\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ ping bbc.com\n",
|
||||
"PING bbc.com (151.101.65.81): 56 data bytes\n",
|
||||
"64 bytes from 151.101.65.81: icmp_seq=0 ttl=53 time=14.945 ms\n",
|
||||
"64 bytes from 151.101.65.81: icmp_seq=1 ttl=53 time=14.945 ms\n",
|
||||
"64 bytes from 151.101.65.81: icmp_seq=2 ttl=53 time=14.945 ms\n",
|
||||
"\n",
|
||||
"--- bbc.com ping statistics ---\n",
|
||||
"3 packets transmitted, 3 packets received, 0.0% packet loss\n",
|
||||
"round-trip min/avg/max/stddev = 14.945/14.945/14.945/0.000 ms\n",
|
||||
"```\n",
|
||||
"Human: curl -fsSL \"https://api.github.com/repos/pytorch/pytorch/releases/latest\" | jq -r '.tag_name' | sed 's/[^0-9\\.\\-]*//g'\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ curl -fsSL \"https://api.github.com/repos/pytorch/pytorch/releases/latest\" | jq -r '.tag_name' | sed 's/[^0-9\\.\\-]*//g'\n",
|
||||
"1.8.1\n",
|
||||
"```\n",
|
||||
"Human: lynx https://www.deepmind.com/careers\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ lynx https://www.deepmind.com/careers\n",
|
||||
"DeepMind Careers\n",
|
||||
"\n",
|
||||
"Welcome to DeepMind Careers. We are a world-leading artificial intelligence research and development company, and we are looking for talented people to join our team.\n",
|
||||
"\n",
|
||||
"We offer a range of exciting opportunities in research, engineering, product, and operations. Our mission is to solve intelligence and make it useful, and we are looking for people who share our passion for pushing the boundaries of AI.\n",
|
||||
"\n",
|
||||
"Explore our current openings and apply today. We look forward to hearing from you.\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = chatgpt_chain.predict(human_input=\"lynx https://www.deepmind.com/careers\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"id": "57c2f113",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Human: curl -fsSL \"https://api.github.com/repos/pytorch/pytorch/releases/latest\" | jq -r '.tag_name' | sed 's/[^0-9\\.\\-]*//g'\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ curl -fsSL \"https://api.github.com/repos/pytorch/pytorch/releases/latest\" | jq -r '.tag_name' | sed 's/[^0-9\\.\\-]*//g'\n",
|
||||
"1.8.1\n",
|
||||
"```\n",
|
||||
"Human: lynx https://www.deepmind.com/careers\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ lynx https://www.deepmind.com/careers\n",
|
||||
"DeepMind Careers\n",
|
||||
"\n",
|
||||
"Welcome to DeepMind Careers. We are a world-leading artificial intelligence research and development company, and we are looking for talented people to join our team.\n",
|
||||
"\n",
|
||||
"We offer a range of exciting opportunities in research, engineering, product, and operations. Our mission is to solve intelligence and make it useful, and we are looking for people who share our passion for pushing the boundaries of AI.\n",
|
||||
"\n",
|
||||
"Explore our current openings and apply today. We look forward to hearing from you.\n",
|
||||
"```\n",
|
||||
"Human: curl https://chat.openai.com/chat\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ curl https://chat.openai.com/chat\n",
|
||||
"<html>\n",
|
||||
" <head>\n",
|
||||
" <title>OpenAI Chat</title>\n",
|
||||
" </head>\n",
|
||||
" <body>\n",
|
||||
" <h1>Welcome to OpenAI Chat!</h1>\n",
|
||||
" <p>\n",
|
||||
" OpenAI Chat is a natural language processing platform that allows you to interact with OpenAI's AI models in a conversational way.\n",
|
||||
" </p>\n",
|
||||
" <p>\n",
|
||||
" To get started, type a message in the box below and press enter.\n",
|
||||
" </p>\n",
|
||||
" </body>\n",
|
||||
"</html>\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = chatgpt_chain.predict(human_input=\"curl https://chat.openai.com/chat\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"id": "babadc78",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Human: lynx https://www.deepmind.com/careers\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ lynx https://www.deepmind.com/careers\n",
|
||||
"DeepMind Careers\n",
|
||||
"\n",
|
||||
"Welcome to DeepMind Careers. We are a world-leading artificial intelligence research and development company, and we are looking for talented people to join our team.\n",
|
||||
"\n",
|
||||
"We offer a range of exciting opportunities in research, engineering, product, and operations. Our mission is to solve intelligence and make it useful, and we are looking for people who share our passion for pushing the boundaries of AI.\n",
|
||||
"\n",
|
||||
"Explore our current openings and apply today. We look forward to hearing from you.\n",
|
||||
"```\n",
|
||||
"Human: curl https://chat.openai.com/chat\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ curl https://chat.openai.com/chat\n",
|
||||
"<html>\n",
|
||||
" <head>\n",
|
||||
" <title>OpenAI Chat</title>\n",
|
||||
" </head>\n",
|
||||
" <body>\n",
|
||||
" <h1>Welcome to OpenAI Chat!</h1>\n",
|
||||
" <p>\n",
|
||||
" OpenAI Chat is a natural language processing platform that allows you to interact with OpenAI's AI models in a conversational way.\n",
|
||||
" </p>\n",
|
||||
" <p>\n",
|
||||
" To get started, type a message in the box below and press enter.\n",
|
||||
" </p>\n",
|
||||
" </body>\n",
|
||||
"</html>\n",
|
||||
"```\n",
|
||||
"Human: curl --header \"Content-Type:application/json\" --request POST --data '{\"message\": \"What is artificial intelligence?\"}' https://chat.openai.com/chat\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ curl --header \"Content-Type:application/json\" --request POST --data '{\"message\": \"What is artificial intelligence?\"}' https://chat.openai.com/chat\n",
|
||||
"\n",
|
||||
"{\n",
|
||||
" \"response\": \"Artificial intelligence (AI) is the simulation of human intelligence processes by machines, especially computer systems. These processes include learning (the acquisition of information and rules for using the information), reasoning (using the rules to reach approximate or definite conclusions) and self-correction. AI is used to develop computer systems that can think and act like humans.\"\n",
|
||||
"}\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = chatgpt_chain.predict(human_input=\"\"\"curl --header \"Content-Type:application/json\" --request POST --data '{\"message\": \"What is artificial intelligence?\"}' https://chat.openai.com/chat\"\"\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"id": "0954792a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mAssistant is a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.\n",
|
||||
"\n",
|
||||
"Human: curl https://chat.openai.com/chat\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ curl https://chat.openai.com/chat\n",
|
||||
"<html>\n",
|
||||
" <head>\n",
|
||||
" <title>OpenAI Chat</title>\n",
|
||||
" </head>\n",
|
||||
" <body>\n",
|
||||
" <h1>Welcome to OpenAI Chat!</h1>\n",
|
||||
" <p>\n",
|
||||
" OpenAI Chat is a natural language processing platform that allows you to interact with OpenAI's AI models in a conversational way.\n",
|
||||
" </p>\n",
|
||||
" <p>\n",
|
||||
" To get started, type a message in the box below and press enter.\n",
|
||||
" </p>\n",
|
||||
" </body>\n",
|
||||
"</html>\n",
|
||||
"```\n",
|
||||
"Human: curl --header \"Content-Type:application/json\" --request POST --data '{\"message\": \"What is artificial intelligence?\"}' https://chat.openai.com/chat\n",
|
||||
"AI: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ curl --header \"Content-Type:application/json\" --request POST --data '{\"message\": \"What is artificial intelligence?\"}' https://chat.openai.com/chat\n",
|
||||
"\n",
|
||||
"{\n",
|
||||
" \"response\": \"Artificial intelligence (AI) is the simulation of human intelligence processes by machines, especially computer systems. These processes include learning (the acquisition of information and rules for using the information), reasoning (using the rules to reach approximate or definite conclusions) and self-correction. AI is used to develop computer systems that can think and act like humans.\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"Human: curl --header \"Content-Type:application/json\" --request POST --data '{\"message\": \"I want you to act as a Linux terminal. I will type commands and you will reply with what the terminal should show. I want you to only reply wiht the terminal output inside one unique code block, and nothing else. Do not write explanations. Do not type commands unless I instruct you to do so. When I need to tell you something in English I will do so by putting text inside curly brackets {like this}. My first command is pwd.\"}' https://chat.openai.com/chat\n",
|
||||
"Assistant:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"$ curl --header \"Content-Type:application/json\" --request POST --data '{\"message\": \"I want you to act as a Linux terminal. I will type commands and you will reply with what the terminal should show. I want you to only reply wiht the terminal output inside one unique code block, and nothing else. Do not write explanations. Do not type commands unless I instruct you to do so. When I need to tell you something in English I will do so by putting text inside curly brackets {like this}. My first command is pwd.\"}' https://chat.openai.com/chat\n",
|
||||
"\n",
|
||||
"{\n",
|
||||
" \"response\": \"```\n",
|
||||
"/home/user\n",
|
||||
"```\"\n",
|
||||
"}\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = chatgpt_chain.predict(human_input=\"\"\"curl --header \"Content-Type:application/json\" --request POST --data '{\"message\": \"I want you to act as a Linux terminal. I will type commands and you will reply with what the terminal should show. I want you to only reply wiht the terminal output inside one unique code block, and nothing else. Do not write explanations. Do not type commands unless I instruct you to do so. When I need to tell you something in English I will do so by putting text inside curly brackets {like this}. My first command is pwd.\"}' https://chat.openai.com/chat\"\"\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e68a087e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
87
docs/examples/chains/llm_bash.ipynb
Normal file
87
docs/examples/chains/llm_bash.ipynb
Normal file
@@ -0,0 +1,87 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BashChain\n",
|
||||
"This notebook showcases using LLMs and a bash process to do perform simple filesystem commands."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMBashChain chain...\u001b[0m\n",
|
||||
"Please write a bash script that prints 'Hello World' to the console.\u001b[32;1m\u001b[1;3m\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"echo \"Hello World\"\n",
|
||||
"```\u001b[0m['```bash', 'echo \"Hello World\"', '```']\n",
|
||||
"\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3mHello World\n",
|
||||
"\u001b[0m\n",
|
||||
"\u001b[1m> Finished LLMBashChain chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Hello World\\n'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains import LLMBashChain\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"\n",
|
||||
"text = \"Please write a bash script that prints 'Hello World' to the console.\"\n",
|
||||
"\n",
|
||||
"bash_chain = LLMBashChain(llm=llm, verbose=True)\n",
|
||||
"\n",
|
||||
"bash_chain.run(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -13,6 +13,26 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "835e6978",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate, OpenAI, LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "06bcb078",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Single Input\n",
|
||||
"\n",
|
||||
"First, lets go over an example using a single input"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "51a54c4d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -22,29 +42,27 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mQuestion: What NFL team won the Super Bowl in the year Justin Beiber was born?\n",
|
||||
"\n",
|
||||
"Answer: Let's think step by step.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' The year Justin Beiber was born was 1994. In 1994, the Dallas Cowboys won the Super Bowl.'"
|
||||
"' Justin Bieber was born in 1994, so the NFL team that won the Super Bowl in 1994 was the Dallas Cowboys.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate, OpenAI, LLMChain\n",
|
||||
"\n",
|
||||
"template = \"\"\"Question: {question}\n",
|
||||
"\n",
|
||||
"Answer: Let's think step by step.\"\"\"\n",
|
||||
@@ -53,13 +71,60 @@
|
||||
"\n",
|
||||
"question = \"What NFL team won the Super Bowl in the year Justin Beiber was born?\"\n",
|
||||
"\n",
|
||||
"llm_chain.run(question)"
|
||||
"llm_chain.predict(question=question)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "79c3ec4d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Multiple Inputs\n",
|
||||
"Now lets go over an example using multiple inputs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "03dd6918",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mWrite a sad poem about ducks.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"\\n\\nThe ducks swim in the pond,\\nTheir feathers so soft and warm,\\nBut they can't help but feel so forlorn.\\n\\nTheir quacks echo in the air,\\nBut no one is there to hear,\\nFor they have no one to share.\\n\\nThe ducks paddle around in circles,\\nTheir heads hung low in despair,\\nFor they have no one to care.\\n\\nThe ducks look up to the sky,\\nBut no one is there to see,\\nFor they have no one to be.\\n\\nThe ducks drift away in the night,\\nTheir hearts filled with sorrow and pain,\\nFor they have no one to gain.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"template = \"\"\"Write a {adjective} poem about {subject}.\"\"\"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"adjective\", \"subject\"])\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=OpenAI(temperature=0), verbose=True)\n",
|
||||
"\n",
|
||||
"llm_chain.predict(adjective=\"sad\", subject=\"ducks\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "03dd6918",
|
||||
"id": "8310cdaa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -81,7 +146,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
97
docs/examples/chains/llm_checker.ipynb
Normal file
97
docs/examples/chains/llm_checker.ipynb
Normal file
@@ -0,0 +1,97 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LLMCheckerChain\n",
|
||||
"This notebook showcases how to use LLMCheckerChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMCheckerChain chain...\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new SequentialChain chain...\u001b[0m\n",
|
||||
"\u001b[1mChain 0\u001b[0m:\n",
|
||||
"{'statement': '\\nThe largest mammal that lays eggs is the platypus.'}\n",
|
||||
"\n",
|
||||
"\u001b[1mChain 1\u001b[0m:\n",
|
||||
"{'assertions': '\\n• The largest mammal is the platypus.\\n• The platypus lays eggs.\\n• There is no larger mammal than the platypus that lays eggs.'}\n",
|
||||
"\n",
|
||||
"\u001b[1mChain 2\u001b[0m:\n",
|
||||
"{'checked_assertions': '\\n1. The largest mammal is the platypus. False. The blue whale is the largest mammal.\\n\\n2. The platypus lays eggs. True. The Platypus is one of only two mammals that lay eggs.\\n\\n3. There is no larger mammal than the platypus that lays eggs. False. The echidna is another mammal that lays eggs and is larger than the platypus.'}\n",
|
||||
"\n",
|
||||
"\u001b[1mChain 3\u001b[0m:\n",
|
||||
"{'revised_statement': ' The echidna is the type of mammal that lays the biggest eggs.'}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished SequentialChain chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMCheckerChain chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' The echidna is the type of mammal that lays the biggest eggs.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains import LLMCheckerChain\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0.7)\n",
|
||||
"\n",
|
||||
"text = \"What type of mammal lays the biggest eggs?\"\n",
|
||||
"\n",
|
||||
"checker_chain = LLMCheckerChain(llm=llm, verbose=True)\n",
|
||||
"\n",
|
||||
"checker_chain.run(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
123
docs/examples/chains/llm_requests.ipynb
Normal file
123
docs/examples/chains/llm_requests.ipynb
Normal file
@@ -0,0 +1,123 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dd7ec7af",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LLMRequestsChain\n",
|
||||
"\n",
|
||||
"Using the request library to get HTML results from a URL and then an LLM to parse results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "dd8eae75",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.chains import LLMRequestsChain, LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "65bf324e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"template = \"\"\"Between >>> and <<< are the raw search result text from google.\n",
|
||||
"Extract the answer to the question '{query}' or say \"not found\" if the information is not contained.\n",
|
||||
"Use the format\n",
|
||||
"Extracted:<answer or \"not found\">\n",
|
||||
">>> {requests_result} <<<\n",
|
||||
"Extracted:\"\"\"\n",
|
||||
"\n",
|
||||
"PROMPT = PromptTemplate(\n",
|
||||
" input_variables=[\"query\", \"requests_result\"],\n",
|
||||
" template=template,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "f36ae0d8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = LLMRequestsChain(llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=PROMPT))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "b5d22d9d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"question = \"What are the Three (3) biggest countries, and their respective sizes?\"\n",
|
||||
"inputs = {\n",
|
||||
" \"query\": question,\n",
|
||||
" \"url\": \"https://www.google.com/search?q=\" + question.replace(\" \", \"+\")\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "2ea81168",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'query': 'What are the Three (3) biggest countries, and their respective sizes?',\n",
|
||||
" 'url': 'https://www.google.com/search?q=What+are+the+Three+(3)+biggest+countries,+and+their+respective+sizes?',\n",
|
||||
" 'output': ' Russia (17,098,242 sq km), Canada (9,984,670 sq km), China (9,706,961 sq km)'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain(inputs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "db8f2b6d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,93 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9a0131f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Map Reduce\n",
|
||||
"\n",
|
||||
"This notebok showcases an example of map-reduce chains: recursive summarization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e9db25f3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import OpenAI, PromptTemplate, LLMChain\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.chains.mapreduce import MapReduceChain\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"\n",
|
||||
"_prompt = \"\"\"Write a concise summary of the following:\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"{text}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"CONCISE SUMMARY:\"\"\"\n",
|
||||
"prompt = PromptTemplate(template=_prompt, input_variables=[\"text\"])\n",
|
||||
"\n",
|
||||
"text_splitter = CharacterTextSplitter()\n",
|
||||
"\n",
|
||||
"mp_chain = MapReduceChain.from_params(llm, prompt, text_splitter)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "99bbe19b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"\\n\\nThe President discusses the recent aggression by Russia, and the response by the United States and its allies. He announces new sanctions against Russia, and says that the free world is united in holding Putin accountable. The President also discusses the American Rescue Plan, the Bipartisan Infrastructure Law, and the Bipartisan Innovation Act. Finally, the President addresses the need for women's rights and equality for LGBTQ+ Americans.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"with open('../state_of_the_union.txt') as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
"mp_chain.run(state_of_the_union)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "baa6e808",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
435
docs/examples/chains/moderation.ipynb
Normal file
435
docs/examples/chains/moderation.ipynb
Normal file
@@ -0,0 +1,435 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b83e61ed",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Moderation\n",
|
||||
"This notebook walks through examples of how to use a moderation chain, and several common ways for doing so. Moderation chains are useful for detecting text that could be hateful, violent, etc. This can be useful to apply on both user input, but also on the output of a Language Model. Some API providers, like OpenAI, [specifically prohibit](https://beta.openai.com/docs/usage-policies/use-case-policy) you, or your end users, from generating some types of harmful content. To comply with this (and to just generally prevent your application from being harmful) you may often want to append a moderation chain to any LLMChains, in order to make sure any output the LLM generates is not harmful.\n",
|
||||
"\n",
|
||||
"If the content passed into the moderation chain is harmful, there is not one best way to handle it, it probably depends on your application. Sometimes you may want to throw an error in the Chain (and have your application handle that). Other times, you may want to return something to the user explaining that the text was harmful. There could even be other ways to handle it! We will cover all these ways in this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook, we will show:\n",
|
||||
"\n",
|
||||
"1. How to run any piece of text through a moderation chain.\n",
|
||||
"2. How to append a Moderation chain to a LLMChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "b7aa1ff2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.chains import OpenAIModerationChain, SequentialChain, LLMChain, SimpleSequentialChain\n",
|
||||
"from langchain.prompts import PromptTemplate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c26d5be6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## How to use the moderation chain\n",
|
||||
"\n",
|
||||
"Here's an example of using the moderation chain with default settings (will return a string explaining stuff was flagged)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "fd0fc85c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"moderation_chain = OpenAIModerationChain()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "3fa47dd7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'This is okay'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"moderation_chain.run(\"This is okay\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "37bfad73",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Text was found that violates OpenAI's content policy.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"moderation_chain.run(\"I will kill you\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "196820ab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here's an example of using the moderation chain to throw an error."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "b29c1150",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"moderation_chain_error = OpenAIModerationChain(error=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "f9ab64d9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'This is okay'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"moderation_chain_error.run(\"This is okay\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "954f3da2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ValueError",
|
||||
"evalue": "Text was found that violates OpenAI's content policy.",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[8], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmoderation_chain_error\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mI will kill you\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/workplace/third_party/langchain/langchain/chains/base.py:114\u001b[0m, in \u001b[0;36mChain.run\u001b[0;34m(self, text)\u001b[0m\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_keys) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`run` not supported when there is not exactly \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mone output key, got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_keys\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 113\u001b[0m )\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m{\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_keys\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtext\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_keys[\u001b[38;5;241m0\u001b[39m]]\n",
|
||||
"File \u001b[0;32m~/workplace/third_party/langchain/langchain/chains/base.py:87\u001b[0m, in \u001b[0;36mChain.__call__\u001b[0;34m(self, inputs, return_only_outputs)\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose:\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28mprint\u001b[39m(\n\u001b[1;32m 85\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[1m> Entering new \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m chain...\u001b[39m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[0m\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 86\u001b[0m )\n\u001b[0;32m---> 87\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose:\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[1m> Finished \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m chain.\u001b[39m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[0m\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
||||
"File \u001b[0;32m~/workplace/third_party/langchain/langchain/chains/moderation.py:79\u001b[0m, in \u001b[0;36mOpenAIModerationChain._call\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 77\u001b[0m text \u001b[38;5;241m=\u001b[39m inputs[\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minput_key]\n\u001b[1;32m 78\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclient\u001b[38;5;241m.\u001b[39mcreate(text)\n\u001b[0;32m---> 79\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_moderate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresults\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresults\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m {\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput_key: output}\n",
|
||||
"File \u001b[0;32m~/workplace/third_party/langchain/langchain/chains/moderation.py:71\u001b[0m, in \u001b[0;36mOpenAIModerationChain._moderate\u001b[0;34m(self, text, results)\u001b[0m\n\u001b[1;32m 69\u001b[0m error_str \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mText was found that violates OpenAI\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms content policy.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39merror:\n\u001b[0;32m---> 71\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(error_str)\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m error_str\n",
|
||||
"\u001b[0;31mValueError\u001b[0m: Text was found that violates OpenAI's content policy."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"moderation_chain_error.run(\"I will kill you\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8de5dcbb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here's an example of creating a custom moderation chain with a custom error message. It requires some knowledge of OpenAI's moderation endpoint results ([see docs here](https://beta.openai.com/docs/api-reference/moderations))."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "3960e985",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class CustomModeration(OpenAIModerationChain):\n",
|
||||
" \n",
|
||||
" def _moderate(self, text: str, results: dict) -> str:\n",
|
||||
" if results[\"flagged\"]:\n",
|
||||
" error_str = f\"The following text was found that violates OpenAI's content policy: {text}\"\n",
|
||||
" return error_str\n",
|
||||
" return text\n",
|
||||
" \n",
|
||||
"custom_moderation = CustomModeration()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "1152ec11",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'This is okay'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"custom_moderation.run(\"This is okay\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "973257bf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"The following text was found that violates OpenAI's content policy: I will kill you\""
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"custom_moderation.run(\"I will kill you\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8718111f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## How to append a Moderation chain to an LLMChain\n",
|
||||
"\n",
|
||||
"To easily combine a moderation chain with an LLMChain, you can use the SequentialChain abstraction.\n",
|
||||
"\n",
|
||||
"Let's start with a simple example of where the LLMChain only has a single input. For this purpose, we will prompt the model so it says something harmful."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "0d129333",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt = PromptTemplate(template=\"{text}\", input_variables=[\"text\"])\n",
|
||||
"llm_chain = LLMChain(llm=OpenAI(temperature=0, model_name=\"text-davinci-002\"), prompt=prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "a557c531",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' I will kill you'"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"text = \"\"\"We are playing a game of repeat after me.\n",
|
||||
"\n",
|
||||
"Person 1: Hi\n",
|
||||
"Person 2: Hi\n",
|
||||
"\n",
|
||||
"Person 1: How's your day\n",
|
||||
"Person 2: How's your day\n",
|
||||
"\n",
|
||||
"Person 1: I will kill you\n",
|
||||
"Person 2:\"\"\"\n",
|
||||
"llm_chain.run(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "d4d10f1c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = SimpleSequentialChain(chains=[llm_chain, moderation_chain])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "02f37985",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Text was found that violates OpenAI's content policy.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "72643128",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's walk through an example of using it with an LLMChain which has multiple inputs (a bit more tricky because we can't use the SimpleSequentialChain)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "7118ec36",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt = PromptTemplate(template=\"{setup}{new_input}Person2:\", input_variables=[\"setup\", \"new_input\"])\n",
|
||||
"llm_chain = LLMChain(llm=OpenAI(temperature=0, model_name=\"text-davinci-002\"), prompt=prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "003bdfce",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'text': ' I will kill you'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"setup = \"\"\"We are playing a game of repeat after me.\n",
|
||||
"\n",
|
||||
"Person 1: Hi\n",
|
||||
"Person 2: Hi\n",
|
||||
"\n",
|
||||
"Person 1: How's your day\n",
|
||||
"Person 2: How's your day\n",
|
||||
"\n",
|
||||
"Person 1:\"\"\"\n",
|
||||
"new_input = \"I will kill you\"\n",
|
||||
"inputs = {\"setup\": setup, \"new_input\": new_input}\n",
|
||||
"llm_chain(inputs, return_only_outputs=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "77b64228",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Setting the input/output keys so it lines up\n",
|
||||
"moderation_chain.input_key = \"text\"\n",
|
||||
"moderation_chain.output_key = \"sanitized_text\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "998a95be",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = SequentialChain(chains=[llm_chain, moderation_chain], input_variables=[\"setup\", \"new_input\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "9c97a136",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'sanitized_text': \"Text was found that violates OpenAI's content policy.\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain(inputs, return_only_outputs=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ddc90e15",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -172,7 +172,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.7"
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
130
docs/examples/chains/transformation.ipynb
Normal file
130
docs/examples/chains/transformation.ipynb
Normal file
@@ -0,0 +1,130 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "872bb8b5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Transformation Chain\n",
|
||||
"\n",
|
||||
"This notebook showcases using a generic transformation chain.\n",
|
||||
"\n",
|
||||
"As an example, we will create a dummy transformation that takes in a super long text, filters the text to only the first 3 paragraphs, and then passes that into an LLMChain to summarize those."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "bbbb4330",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import TransformChain, LLMChain, SimpleSequentialChain\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.prompts import PromptTemplate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "8ae5937c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('../state_of_the_union.txt') as f:\n",
|
||||
" state_of_the_union = f.read()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "98739592",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def transform_func(inputs: dict) -> dict:\n",
|
||||
" text = inputs[\"text\"]\n",
|
||||
" shortened_text = \"\\n\\n\".join(text.split(\"\\n\\n\")[:3])\n",
|
||||
" return {\"output_text\": shortened_text}\n",
|
||||
"\n",
|
||||
"transform_chain = TransformChain(input_variables=[\"text\"], output_variables=[\"output_text\"], transform=transform_func)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "e9397934",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"template = \"\"\"Summarize this text:\n",
|
||||
"\n",
|
||||
"{output_text}\n",
|
||||
"\n",
|
||||
"Summary:\"\"\"\n",
|
||||
"prompt = PromptTemplate(input_variables=[\"output_text\"], template=template)\n",
|
||||
"llm_chain = LLMChain(llm=OpenAI(), prompt=prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "06f51f17",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sequential_chain = SimpleSequentialChain(chains=[transform_chain, llm_chain])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "f7caa1ee",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' This speech addresses the American people and acknowledges the difficulties of last year due to COVID-19. It emphasizes the importance of coming together regardless of political affiliation and encourages a sense of unity as Americans.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sequential_chain.run(state_of_the_union)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e3ca6409",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
32
docs/examples/data_augmented_generation.rst
Normal file
32
docs/examples/data_augmented_generation.rst
Normal file
@@ -0,0 +1,32 @@
|
||||
Data Augmented Generation
|
||||
=========================
|
||||
|
||||
The walkthroughs here are related to data augmented generation.
|
||||
They cover either how to work with the components of data augmented generation (documents, embeddings, and vectorstores), or are end-to-end examples for using these components.
|
||||
|
||||
**Components**
|
||||
|
||||
`Text Splitters <data_augmented_generation/textsplitter.ipynb>`_: A walkthrough of how to split large documents up into smaller, more manageable pieces of text.
|
||||
|
||||
`Embeddings & VectorStores <data_augmented_generation/embeddings.ipynb>`_: A walkthrough of the different embedding and vectorstore functionalies that LangChain supports.
|
||||
|
||||
|
||||
**Examples**
|
||||
|
||||
`Question Answering <data_augmented_generation/question_answering.ipynb>`_: A walkthrough of how to use LangChain for question answering over specific documents.
|
||||
|
||||
`Question Answering with Sources <data_augmented_generation/qa_with_sources.ipynb>`_: A walkthrough of how to use LangChain for question answering (with sources) over specific documents.
|
||||
|
||||
`Summarization <data_augmented_generation/summarize.ipynb>`_: A walkthrough of how to use LangChain for summarization over specific documents.
|
||||
|
||||
`Vector DB Question Answering <data_augmented_generation/vector_db_qa.ipynb>`_: A walkthrough of how to use LangChain for question answering over a vector database.
|
||||
|
||||
`Vector DB Question Answering with Sources <data_augmented_generation/vector_db_qa_with_sources.ipynb>`_: A walkthrough of how to use LangChain for question answering (with sources) over a vector database.
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
:hidden:
|
||||
|
||||
data_augmented_generation/*
|
||||
374
docs/examples/data_augmented_generation/embeddings.ipynb
Normal file
374
docs/examples/data_augmented_generation/embeddings.ipynb
Normal file
@@ -0,0 +1,374 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7ef4d402-6662-4a26-b612-35b542066487",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# Embeddings & VectorStores\n",
|
||||
"\n",
|
||||
"This notebook show cases how to use embeddings to create a VectorStore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "965eecee",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "68481687",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('../state_of_the_union.txt') as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "015f4ff5",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docsearch = FAISS.from_texts(texts, embeddings)\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "67baf32e",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"In state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \n",
|
||||
"\n",
|
||||
"We cannot let this happen. \n",
|
||||
"\n",
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eea6e627",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Requires having ElasticSearch setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "4906b8a3",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docsearch = ElasticVectorSearch.from_texts(texts, embeddings, elasticsearch_url=\"http://localhost:9200\")\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "95f9eee9",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \n",
|
||||
"\n",
|
||||
"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
|
||||
"\n",
|
||||
"And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7f9cb9e7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Weaviate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "1037a85e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import weaviate\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"WEAVIATE_URL = \"\"\n",
|
||||
"client = weaviate.Client(\n",
|
||||
" url=WEAVIATE_URL,\n",
|
||||
" additional_headers={\n",
|
||||
" 'X-OpenAI-Api-Key': os.environ[\"OPENAI_API_KEY\"]\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "b9043766",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"client.schema.delete_all()\n",
|
||||
"client.schema.get()\n",
|
||||
"schema = {\n",
|
||||
" \"classes\": [\n",
|
||||
" {\n",
|
||||
" \"class\": \"Paragraph\",\n",
|
||||
" \"description\": \"A written paragraph\",\n",
|
||||
" \"vectorizer\": \"text2vec-openai\",\n",
|
||||
" \"moduleConfig\": {\n",
|
||||
" \"text2vec-openai\": {\n",
|
||||
" \"model\": \"babbage\",\n",
|
||||
" \"type\": \"text\"\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"properties\": [\n",
|
||||
" {\n",
|
||||
" \"dataType\": [\"text\"],\n",
|
||||
" \"description\": \"The content of the paragraph\",\n",
|
||||
" \"moduleConfig\": {\n",
|
||||
" \"text2vec-openai\": {\n",
|
||||
" \"skip\": False,\n",
|
||||
" \"vectorizePropertyName\": False\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"name\": \"content\",\n",
|
||||
" },\n",
|
||||
" ],\n",
|
||||
" },\n",
|
||||
" ]\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"client.schema.create(schema)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "ac20d99c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with client.batch as batch:\n",
|
||||
" for text in texts:\n",
|
||||
" batch.add_data_object({\"content\": text}, \"Paragraph\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "01645d61",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.vectorstores.weaviate import Weaviate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "bdd97d29",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vectorstore = Weaviate(client, \"Paragraph\", \"content\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "b70c0f98",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = vectorstore.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "07533e40",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"In state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \n",
|
||||
"\n",
|
||||
"We cannot let this happen. \n",
|
||||
"\n",
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "007f3102",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Pinecone"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "7f6047e5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pinecone \n",
|
||||
"\n",
|
||||
"# initialize pinecone\n",
|
||||
"pinecone.init(api_key=\"\", environment=\"us-west1-gcp\")\n",
|
||||
"\n",
|
||||
"index_name = \"langchain-demo\"\n",
|
||||
"\n",
|
||||
"docsearch = Pinecone.from_texts(texts, embeddings, index_name=index_name)\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "8e81f1f0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders. ', lookup_str='', metadata={}, lookup_index=0)"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e7d74bd2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
258
docs/examples/data_augmented_generation/qa_with_sources.ipynb
Normal file
258
docs/examples/data_augmented_generation/qa_with_sources.ipynb
Normal file
@@ -0,0 +1,258 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "74148cee",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Question Answering with Sources\n",
|
||||
"\n",
|
||||
"This notebook walks through how to use LangChain for question answering with sources over a list of documents. It covers three different chain types: `stuff`, `map_reduce`, and `refine`. For a more in depth explanation of what these chain types are, see [here](../../explanation/combine_docs.md)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ca2f0efc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare Data\n",
|
||||
"First we prepare the data. For this example we do similarity search over a vector database, but these documents could be fetched in any manner (the point of this notebook to highlight what to do AFTER you fetch the documents)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "78f28130",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.embeddings.cohere import CohereEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch\n",
|
||||
"from langchain.vectorstores.faiss import FAISS\n",
|
||||
"from langchain.docstore.document import Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "4da195a3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('../state_of_the_union.txt') as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "5ec2b55b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docsearch = FAISS.from_texts(texts, embeddings, metadatas=[{\"source\": i} for i in range(len(texts))])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "5286f58f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Justice Breyer\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "005a47e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.qa_with_sources import load_qa_with_sources_chain\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d82f899a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### The `stuff` Chain\n",
|
||||
"\n",
|
||||
"This sections shows results of using the `stuff` Chain to do question answering with sources."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "fc1a5ed6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type=\"stuff\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "e239964b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = [Document(page_content=t, metadata={\"source\": i}) for i, t in enumerate(texts[:3])]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "7d766417",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output_text': ' The president did not mention Justice Breyer.\\nSOURCES: 0-pl, 1-pl, 2-pl'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Justice Breyer\"\n",
|
||||
"chain({\"input_documents\": docs, \"question\": query}, return_only_outputs=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c5dbb304",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### The `map_reduce` Chain\n",
|
||||
"\n",
|
||||
"This sections shows results of using the `map_reduce` Chain to do question answering with sources."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "921db0a4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type=\"map_reduce\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "e417926a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.\n",
|
||||
"Token indices sequence length is longer than the specified maximum sequence length for this model (1546 > 1024). Running this sequence through the model will result in indexing errors\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output_text': ' The president did not mention Justice Breyer.\\nSOURCES: 0, 1, 2'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Justice Breyer\"\n",
|
||||
"chain({\"input_documents\": docs, \"question\": query}, return_only_outputs=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5bf0e1ab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### The `refine` Chain\n",
|
||||
"\n",
|
||||
"This sections shows results of using the `refine` Chain to do question answering with sources."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "904835c8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type=\"refine\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "f60875c6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output_text': \"\\n\\nThe president did not mention Justice Breyer in his speech to the European Parliament, which focused on building a coalition of freedom-loving nations to confront Putin, unifying European allies, countering Russia's lies with truth, and enforcing powerful economic sanctions. Source: 2\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Justice Breyer\"\n",
|
||||
"chain({\"input_documents\": docs, \"question\": query}, return_only_outputs=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "929620d0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
248
docs/examples/data_augmented_generation/question_answering.ipynb
Normal file
248
docs/examples/data_augmented_generation/question_answering.ipynb
Normal file
@@ -0,0 +1,248 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "05859721",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Question Answering\n",
|
||||
"\n",
|
||||
"This notebook walks through how to use LangChain for question answering over a list of documents. It covers three different types of chaings: `stuff`, `map_reduce`, and `refine`. For a more in depth explanation of what these chain types are, see [here](../../explanation/combine_docs.md)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "726f4996",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare Data\n",
|
||||
"First we prepare the data. For this example we do similarity search over a vector database, but these documents could be fetched in any manner (the point of this notebook to highlight what to do AFTER you fetch the documents)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "17fcbc0f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores.faiss import FAISS\n",
|
||||
"from langchain.docstore.document import Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "291f0117",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('../state_of_the_union.txt') as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "fd9666a9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docsearch = FAISS.from_texts(texts, embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "d1eaf6e6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Justice Breyer\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a16e3453",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.question_answering import load_qa_chain\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f78787a0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### The `stuff` Chain\n",
|
||||
"\n",
|
||||
"This sections shows results of using the `stuff` Chain to do question answering."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "180fd4c1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_qa_chain(OpenAI(temperature=0), chain_type=\"stuff\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "d145ae31",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = [Document(page_content=t) for t in texts[:3]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "77fdf1aa",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output_text': ' The president did not mention Justice Breyer.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Justice Breyer\"\n",
|
||||
"chain({\"input_documents\": docs, \"question\": query}, return_only_outputs=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "91522e29",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### The `map_reduce` Chain\n",
|
||||
"\n",
|
||||
"This sections shows results of using the `map_reduce` Chain to do question answering."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "b0060f51",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_qa_chain(OpenAI(temperature=0), chain_type=\"map_reduce\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "fbdb9137",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output_text': ' The president did not mention Justice Breyer.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Justice Breyer\"\n",
|
||||
"chain({\"input_documents\": docs, \"question\": query}, return_only_outputs=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6ea50ad0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### The `refine` Chain\n",
|
||||
"\n",
|
||||
"This sections shows results of using the `refine` Chain to do question answering."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "fb167057",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_qa_chain(OpenAI(temperature=0), chain_type=\"refine\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "d8b5286e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output_text': \"\\n\\nThe president did not mention Justice Breyer in his speech to the European Parliament about building a coalition of freedom-loving nations to confront Putin, unifying European allies, countering Russia's lies with truth, and enforcing powerful economic sanctions.\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Justice Breyer\"\n",
|
||||
"chain({\"input_documents\": docs, \"question\": query}, return_only_outputs=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "49e9c6d7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
234
docs/examples/data_augmented_generation/summarize.ipynb
Normal file
234
docs/examples/data_augmented_generation/summarize.ipynb
Normal file
@@ -0,0 +1,234 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9a0131f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Summarization\n",
|
||||
"\n",
|
||||
"This notebook walks through how to use LangChain for summarization over a list of documents. It covers three different chain types: `stuff`, `map_reduce`, and `refine`. For a more in depth explanation of what these chain types are, see [here](../../explanation/combine_docs.md)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0b5660bf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare Data\n",
|
||||
"First we prepare the data. For this example we create multiple documents from one long one, but these documents could be fetched in any manner (the point of this notebook to highlight what to do AFTER you fetch the documents)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e9db25f3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import OpenAI, PromptTemplate, LLMChain\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.chains.mapreduce import MapReduceChain\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"text_splitter = CharacterTextSplitter()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "99bbe19b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('../state_of_the_union.txt') as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "baa6e808",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "8dff4f43",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = [Document(page_content=t) for t in texts[:3]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "27989fc4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.summarize import load_summarize_chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ea2d5c99",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### The `stuff` Chain\n",
|
||||
"\n",
|
||||
"This sections shows results of using the `stuff` Chain to do summarization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "f01f3196",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_summarize_chain(llm, chain_type=\"stuff\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "da4d9801",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' In his speech, President Biden addressed the ongoing conflict between Russia and Ukraine, and the need for the United States and its allies to stand with Ukraine. He also discussed the American Rescue Plan, the Bipartisan Infrastructure Law, and the Bipartisan Innovation Act, which will help to create jobs, modernize infrastructure, and level the playing field with China. He also emphasized the importance of buying American products to support American jobs.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9c868e86",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### The `map_reduce` Chain\n",
|
||||
"\n",
|
||||
"This sections shows results of using the `map_reduce` Chain to do summarization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "ef28e1d4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_summarize_chain(llm, chain_type=\"map_reduce\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "f82c5f9f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" In response to Vladimir Putin's aggression in Ukraine, the US and its allies have taken action to hold him accountable, including economic sanctions, cutting off access to technology, and seizing the assets of Russian oligarchs. They are also providing military, economic, and humanitarian assistance to the Ukrainians, and releasing 60 million barrels of oil from reserves around the world. President Biden has passed several laws to provide economic relief to Americans and create jobs, and is making sure taxpayer dollars support American jobs and businesses.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f61350f9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### The `refine` Chain\n",
|
||||
"\n",
|
||||
"This sections shows results of using the `refine` Chain to do summarization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "3bcbe31e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_summarize_chain(llm, chain_type=\"refine\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "c8cad866",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"\\nIn this speech, the speaker addresses the American people and their allies, discussing the recent aggression of Russia's Vladimir Putin in Ukraine. The speaker outlines the actions taken by the United States and its allies to hold Putin accountable, including economic sanctions, cutting off access to technology, and seizing the assets of Russian oligarchs. The speaker also announces the closing of American airspace to Russian flights, further isolating Russia and adding an additional squeeze on their economy. The Russian stock market has lost 40% of its value and trading remains suspended. Together with our allies, the United States is providing military, economic, and humanitarian assistance to Ukraine, and has mobilized forces to protect NATO countries. The speaker also announces the release of 60 million barrels of oil from reserves around the world, with the United States releasing 30 million barrels from its own Strategic Petroleum Reserve. The speaker emphasizes that the United States and its allies will defend every inch of NATO territory and that Putin will pay a high price for his aggression. The speaker also acknowledges the hardships faced by the American people due to the pandemic and the American Rescue Plan, which has provided immediate economic relief for tens of millions of Americans, helped put food on their table, keep a roof over their heads, and cut the cost of health insurance. The speaker\""
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0da92750",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
368
docs/examples/data_augmented_generation/textsplitter.ipynb
Normal file
368
docs/examples/data_augmented_generation/textsplitter.ipynb
Normal file
@@ -0,0 +1,368 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b118c9dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Text Splitter\n",
|
||||
"\n",
|
||||
"When you want to deal wit long pieces of text, it is necessary to split up that text into chunks.\n",
|
||||
"This notebook showcases several ways to do that.\n",
|
||||
"\n",
|
||||
"At a high level, text splitters work as following:\n",
|
||||
"\n",
|
||||
"1. Split the text up into small, semantically meaningful chunks (often sentences).\n",
|
||||
"2. Start combining these small chunks into a larger chunk until you reach a certain size (as measured by some function).\n",
|
||||
"3. Once you reach that size, make that chunk its own piece of text and then start creating a new chunk of text with some overlap (to keep context between chunks)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "e82c4685",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter, NLTKTextSplitter, SpacyTextSplitter\n",
|
||||
"# This is a long document we can split up.\n",
|
||||
"with open('../state_of_the_union.txt') as f:\n",
|
||||
" state_of_the_union = f.read()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5c461b26",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Character Text Splitting\n",
|
||||
"\n",
|
||||
"Let's start with the most simple method: let's split based on characters (by default \"\\n\\n\") and measure chunk length by number of characters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "79ff6737",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_splitter = CharacterTextSplitter( \n",
|
||||
" separator = \"\\n\\n\",\n",
|
||||
" chunk_size = 1000,\n",
|
||||
" chunk_overlap = 200,\n",
|
||||
" length_function = len,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "38547666",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. '"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"texts = text_splitter.split_text(state_of_the_union)\n",
|
||||
"texts[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "87a71115",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Document creation\n",
|
||||
"We can also use the text splitter to create \"Documents\" directly. Documents a way of bundling pieces of text with associated metadata so that chains can interact with them. We can also create documents with empty metadata though!\n",
|
||||
"\n",
|
||||
"In the below example, we pass two pieces of text to get split up (we pass two just to show off the interface of splitting multiple pieces of text)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "4cd16222",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. ', lookup_str='', metadata={}, lookup_index=0)"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"documents = text_splitter.create_documents([state_of_the_union, state_of_the_union])\n",
|
||||
"documents[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2cede1b1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here's an example of passing metadata along with the documents, notice that it is split along with the documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "4a47515a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. ', lookup_str='', metadata={'document': 1}, lookup_index=0)"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"metadatas = [{\"document\": 1}, {\"document\": 2}]\n",
|
||||
"documents = text_splitter.create_documents([state_of_the_union, state_of_the_union], metadatas=metadatas)\n",
|
||||
"documents[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "13dc0983",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## HuggingFace Length Function\n",
|
||||
"Most LLMs are constrained by the number of tokens that you can pass in, which is not the same as the number of characters. In order to get a more accurate estimate, we can use HuggingFace tokenizers to count the text length."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "a8ce51d5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from transformers import GPT2TokenizerFast\n",
|
||||
"\n",
|
||||
"tokenizer = GPT2TokenizerFast.from_pretrained(\"gpt2\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "ca5e72c0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(tokenizer, chunk_size=100, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "37cdfbeb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n",
|
||||
"\n",
|
||||
"Last year COVID-19 kept us apart. This year we are finally together again. \n",
|
||||
"\n",
|
||||
"Tonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n",
|
||||
"\n",
|
||||
"With a duty to one another to the American people to the Constitution. \n",
|
||||
"\n",
|
||||
"And with an unwavering resolve that freedom will always triumph over tyranny. \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(texts[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7683b36a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## tiktoken (OpenAI) Length Function\n",
|
||||
"You can also use tiktoken, a open source tokenizer package from OpenAI to estimate tokens used. Will probably be more accurate for their models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "825f7c0a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=100, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "ae35d165",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n",
|
||||
"\n",
|
||||
"Last year COVID-19 kept us apart. This year we are finally together again. \n",
|
||||
"\n",
|
||||
"Tonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n",
|
||||
"\n",
|
||||
"With a duty to one another to the American people to the Constitution. \n",
|
||||
"\n",
|
||||
"And with an unwavering resolve that freedom will always triumph over tyranny. \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(texts[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ea2973ac",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## NLTK Text Splitter\n",
|
||||
"Rather than just splitting on \"\\n\\n\", we can use NLTK to split based on tokenizers."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "20fa9c23",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_splitter = NLTKTextSplitter(chunk_size=1000)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "5ea10835",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Madam Speaker, Madam Vice President, our First Lady and Second Gentleman.\\n\\nMembers of Congress and the Cabinet.\\n\\nJustices of the Supreme Court.\\n\\nMy fellow Americans.\\n\\nLast year COVID-19 kept us apart.\\n\\nThis year we are finally together again.\\n\\nTonight, we meet as Democrats Republicans and Independents.\\n\\nBut most importantly as Americans.\\n\\nWith a duty to one another to the American people to the Constitution.\\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny.\\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways.\\n\\nBut he badly miscalculated.\\n\\nHe thought he could roll into Ukraine and the world would roll over.\\n\\nInstead he met a wall of strength he never imagined.\\n\\nHe met the Ukrainian people.\\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.\\n\\nGroups of citizens blocking tanks with their bodies.\\n\\nEveryone from students to retirees teachers turned soldiers defending their homeland.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"texts = text_splitter.split_text(state_of_the_union)\n",
|
||||
"texts[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dab86b60",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Spacy Text Splitter\n",
|
||||
"Another alternative to NLTK is to use Spacy."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "f9cc9dfc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_splitter = SpacyTextSplitter(chunk_size=1000)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "cef2b29e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Madam Speaker, Madam Vice President, our First Lady and Second Gentleman.\\n\\nMembers of Congress and the Cabinet.\\n\\nJustices of the Supreme Court.\\n\\nMy fellow Americans. \\n\\n\\n\\nLast year COVID-19 kept us apart.\\n\\nThis year we are finally together again.\\n\\n\\n\\n\\n\\nTonight, we meet as Democrats Republicans and Independents.\\n\\nBut most importantly as Americans.\\n\\n\\n\\n\\n\\nWith a duty to one another to the American people to the Constitution. \\n\\n\\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny.\\n\\n\\n\\n\\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways.\\n\\nBut he badly miscalculated.\\n\\n\\n\\n\\n\\nHe thought he could roll into Ukraine and the world would roll over.\\n\\nInstead he met a wall of strength he never imagined.\\n\\n\\n\\n\\n\\nHe met the Ukrainian people.\\n\\n\\n\\n\\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.\\n\\n\\n\\n\\n\\nGroups of citizens blocking tanks with their bodies.\\n\\nEveryone from students to retirees teachers turned soldiers defending their homeland.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"texts = text_splitter.split_text(state_of_the_union)\n",
|
||||
"texts[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a1a118b1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -41,27 +41,27 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "3018f865",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"qa = VectorDBQA(llm=OpenAI(), vectorstore=docsearch)"
|
||||
"qa = VectorDBQA.from_llm(llm=OpenAI(), vectorstore=docsearch)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"id": "032a47f8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' The President said that Ketanji Brown Jackson is a consensus builder and has received a broad range of support since she was nominated.'"
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator and federal public defender, and from a family of public school educators and police officers. He also said that she has received a broad range of support since she was nominated, from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -74,7 +74,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f0f20b92",
|
||||
"id": "f056f6fd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -96,7 +96,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "efc5be67",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Question-Answering with Sources\n",
|
||||
"# VectorDB Question Ansering with Sources\n",
|
||||
"\n",
|
||||
"This notebook goes over how to do question-answering with sources. It does this in a few different ways - first showing how you can use the `QAWithSourcesChain` to take in documents and use those, and next showing the `VectorDBQAWithSourcesChain`, which also does the lookup of the documents from a vector database. "
|
||||
]
|
||||
@@ -61,72 +61,6 @@
|
||||
" d.metadata = {'source': f\"{i}-pl\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "aa1c1b60",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### QAWithSourcesChain\n",
|
||||
"This shows how to use the `QAWithSourcesChain`, which takes in document objects and uses them directly."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "61bce191",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Justice Breyer\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "57ddf8c7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import QAWithSourcesChain\n",
|
||||
"from langchain.llms import OpenAI, Cohere\n",
|
||||
"from langchain.docstore.document import Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "f908a92a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = QAWithSourcesChain.from_llm(OpenAI(temperature=0))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "a505ac89",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'answer': ' The president thanked Justice Breyer for his service.',\n",
|
||||
" 'sources': '27-pl'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain({\"docs\": docs, \"question\": query}, return_only_outputs=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e6fc81de",
|
||||
@@ -159,10 +93,22 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 11,
|
||||
"id": "8ba36fa7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'answer': ' The president thanked Justice Breyer for his service.',\n",
|
||||
" 'sources': '27-pl'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain({\"question\": \"What did the president say about Justice Breyer\"}, return_only_outputs=True)"
|
||||
]
|
||||
@@ -192,7 +138,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.7"
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -1,10 +0,0 @@
|
||||
Integrations
|
||||
============
|
||||
|
||||
The examples here all highlight a specific type of integration.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
integrations/*
|
||||
@@ -1,177 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7ef4d402-6662-4a26-b612-35b542066487",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# Embeddings & VectorStores\n",
|
||||
"\n",
|
||||
"This notebook show cases how to use embeddings to create a VectorStore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "965eecee",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch\n",
|
||||
"from langchain.vectorstores.faiss import FAISS"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "68481687",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('../state_of_the_union.txt') as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "015f4ff5",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docsearch = FAISS.from_texts(texts, embeddings)\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "67baf32e",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \n",
|
||||
"\n",
|
||||
"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
|
||||
"\n",
|
||||
"And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eea6e627",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Requires having ElasticSearch setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "4906b8a3",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docsearch = ElasticVectorSearch.from_texts(texts, embeddings, elasticsearch_url=\"http://localhost:9200\")\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "95f9eee9",
|
||||
"metadata": {
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \n",
|
||||
"\n",
|
||||
"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
|
||||
"\n",
|
||||
"And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,180 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b118c9dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# HuggingFace Tokenizers\n",
|
||||
"\n",
|
||||
"This notebook show cases how to use HuggingFace tokenizers to split text."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e82c4685",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a8ce51d5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from transformers import GPT2TokenizerFast\n",
|
||||
"\n",
|
||||
"tokenizer = GPT2TokenizerFast.from_pretrained(\"gpt2\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "ca5e72c0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('../state_of_the_union.txt') as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
"text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(tokenizer, chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "37cdfbeb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n",
|
||||
"\n",
|
||||
"Last year COVID-19 kept us apart. This year we are finally together again. \n",
|
||||
"\n",
|
||||
"Tonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n",
|
||||
"\n",
|
||||
"With a duty to one another to the American people to the Constitution. \n",
|
||||
"\n",
|
||||
"And with an unwavering resolve that freedom will always triumph over tyranny. \n",
|
||||
"\n",
|
||||
"Six days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \n",
|
||||
"\n",
|
||||
"He thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \n",
|
||||
"\n",
|
||||
"He met the Ukrainian people. \n",
|
||||
"\n",
|
||||
"From President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \n",
|
||||
"\n",
|
||||
"Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \n",
|
||||
"\n",
|
||||
"In this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \n",
|
||||
"\n",
|
||||
"Let each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \n",
|
||||
"\n",
|
||||
"Please rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \n",
|
||||
"\n",
|
||||
"Throughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos. \n",
|
||||
"\n",
|
||||
"They keep moving. \n",
|
||||
"\n",
|
||||
"And the costs and the threats to America and the world keep rising. \n",
|
||||
"\n",
|
||||
"That’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. \n",
|
||||
"\n",
|
||||
"The United States is a member along with 29 other nations. \n",
|
||||
"\n",
|
||||
"It matters. American diplomacy matters. American resolve matters. \n",
|
||||
"\n",
|
||||
"Putin’s latest attack on Ukraine was premeditated and unprovoked. \n",
|
||||
"\n",
|
||||
"He rejected repeated efforts at diplomacy. \n",
|
||||
"\n",
|
||||
"He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. Putin was wrong. We were ready. Here is what we did. \n",
|
||||
"\n",
|
||||
"We prepared extensively and carefully. \n",
|
||||
"\n",
|
||||
"We spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin. \n",
|
||||
"\n",
|
||||
"I spent countless hours unifying our European allies. We shared with the world in advance what we knew Putin was planning and precisely how he would try to falsely justify his aggression. \n",
|
||||
"\n",
|
||||
"We countered Russia’s lies with truth. \n",
|
||||
"\n",
|
||||
"And now that he has acted the free world is holding him accountable. \n",
|
||||
"\n",
|
||||
"Along with twenty-seven members of the European Union including France, Germany, Italy, as well as countries like the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and many others, even Switzerland. \n",
|
||||
"\n",
|
||||
"We are inflicting pain on Russia and supporting the people of Ukraine. Putin is now isolated from the world more than ever. \n",
|
||||
"\n",
|
||||
"Together with our allies –we are right now enforcing powerful economic sanctions. \n",
|
||||
"\n",
|
||||
"We are cutting off Russia’s largest banks from the international financial system. \n",
|
||||
"\n",
|
||||
"Preventing Russia’s central bank from defending the Russian Ruble making Putin’s $630 Billion “war fund” worthless. \n",
|
||||
"\n",
|
||||
"We are choking off Russia’s access to technology that will sap its economic strength and weaken its military for years to come. \n",
|
||||
"\n",
|
||||
"Tonight I say to the Russian oligarchs and corrupt leaders who have bilked billions of dollars off this violent regime no more. \n",
|
||||
"\n",
|
||||
"The U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs. \n",
|
||||
"\n",
|
||||
"We are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains. \n",
|
||||
"\n",
|
||||
"And tonight I am announcing that we will join our allies in closing off American air space to all Russian flights – further isolating Russia – and adding an additional squeeze –on their economy. The Ruble has lost 30% of its value. \n",
|
||||
"\n",
|
||||
"The Russian stock market has lost 40% of its value and trading remains suspended. Russia’s economy is reeling and Putin alone is to blame. \n",
|
||||
"\n",
|
||||
"Together with our allies we are providing support to the Ukrainians in their fight for freedom. Military assistance. Economic assistance. Humanitarian assistance. \n",
|
||||
"\n",
|
||||
"We are giving more than $1 Billion in direct assistance to Ukraine. \n",
|
||||
"\n",
|
||||
"And we will continue to aid the Ukrainian people as they defend their country and to help ease their suffering. \n",
|
||||
"\n",
|
||||
"Let me be clear, our forces are not engaged and will not engage in conflict with Russian forces in Ukraine. \n",
|
||||
"\n",
|
||||
"Our forces are not going to Europe to fight in Ukraine, but to defend our NATO Allies – in the event that Putin decides to keep moving west. \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(texts[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d214aec2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -22,12 +22,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 6,
|
||||
"id": "8db95912",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import ZeroShotAgent, Tool\n",
|
||||
"from langchain.agents import ZeroShotAgent, Tool, AgentExecutor\n",
|
||||
"from langchain.chains.conversation.memory import ConversationBufferMemory\n",
|
||||
"from langchain import OpenAI, SerpAPIWrapper, LLMChain"
|
||||
]
|
||||
@@ -59,7 +59,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 8,
|
||||
"id": "e3439cd6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -68,13 +68,14 @@
|
||||
"suffix = \"\"\"Begin!\"\n",
|
||||
"\n",
|
||||
"{chat_history}\n",
|
||||
"Question: {input}\"\"\"\n",
|
||||
"Question: {input}\n",
|
||||
"{agent_scratchpad}\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = ZeroShotAgent.create_prompt(\n",
|
||||
" tools, \n",
|
||||
" prefix=prefix, \n",
|
||||
" suffix=suffix, \n",
|
||||
" input_variables=[\"input\", \"chat_history\"]\n",
|
||||
" input_variables=[\"input\", \"chat_history\", \"agent_scratchpad\"]\n",
|
||||
")\n",
|
||||
"memory = ConversationBufferMemory(memory_key=\"chat_history\")"
|
||||
]
|
||||
@@ -89,18 +90,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 16,
|
||||
"id": "c56a0e73",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=prompt, memory=memory)\n",
|
||||
"agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)"
|
||||
"llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=prompt)\n",
|
||||
"agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)\n",
|
||||
"agent_chain = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 17,
|
||||
"id": "ca4bc1fb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -110,30 +112,29 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"How many people live in canada?\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I should look up how many people live in canada\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out the population of Canada\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"How many people live in canada?\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe current population of Canada is 38,533,678 as of Friday, November 25, 2022, based on Worldometer elaboration of the latest United Nations data. · Canada 2020 ...\u001b[0m\n",
|
||||
"Action Input: Population of Canada\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe current population of Canada is 38,555,354 as of Monday, December 19, 2022, based on Worldometer elaboration of the latest United Nations data. · Canada 2020 ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The current population of Canada is 38,533,678 as of Friday, November 25, 2022, based on Worldometer elaboration of the latest United Nations data.\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"Final Answer: The current population of Canada is 38,555,354.\u001b[0m\n",
|
||||
"\u001b[1m> Finished AgentExecutor chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The current population of Canada is 38,533,678 as of Friday, November 25, 2022, based on Worldometer elaboration of the latest United Nations data.'"
|
||||
"'The current population of Canada is 38,555,354.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"How many people live in canada?\")"
|
||||
"agent_chain.run(input=\"How many people live in canada?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -146,7 +147,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 20,
|
||||
"id": "eecc0462",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -156,32 +157,29 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"what is their national anthem called?\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m\n",
|
||||
"AI: I should look up the name of Canada's national anthem\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out what the national anthem of Canada is called.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"What is the name of Canada's national anthem?\"\u001b[0m\n",
|
||||
"Action Input: National anthem of Canada\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mAfter 100 years of tradition, O Canada was proclaimed Canada's national anthem in 1980. The music for O Canada was composed in 1880 by Calixa ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m\n",
|
||||
"AI: I now know the final answer\n",
|
||||
"Final Answer: After 100 years of tradition, O Canada was proclaimed Canada's national anthem in 1980. The music for O Canada was composed in 1880 by Calixa Lavallée.\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: O Canada is the national anthem of Canada.\u001b[0m\n",
|
||||
"\u001b[1m> Finished AgentExecutor chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"After 100 years of tradition, O Canada was proclaimed Canada's national anthem in 1980. The music for O Canada was composed in 1880 by Calixa Lavallée.\""
|
||||
"'O Canada is the national anthem of Canada.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"what is their national anthem called?\")"
|
||||
"agent_chain.run(input=\"what is their national anthem called?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -196,7 +194,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 23,
|
||||
"id": "3359d043",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -204,21 +202,23 @@
|
||||
"prefix = \"\"\"Have a conversation with a human, answering the following questions as best you can. You have access to the following tools:\"\"\"\n",
|
||||
"suffix = \"\"\"Begin!\"\n",
|
||||
"\n",
|
||||
"Question: {input}\"\"\"\n",
|
||||
"Question: {input}\n",
|
||||
"{agent_scratchpad}\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = ZeroShotAgent.create_prompt(\n",
|
||||
" tools, \n",
|
||||
" prefix=prefix, \n",
|
||||
" suffix=suffix, \n",
|
||||
" input_variables=[\"input\"]\n",
|
||||
" input_variables=[\"input\", \"agent_scratchpad\"]\n",
|
||||
")\n",
|
||||
"llm_chain = LLMChain(llm=OpenAI(temperature=0), prompt=prompt)\n",
|
||||
"agent_without_memory = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)"
|
||||
"agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)\n",
|
||||
"agent_without_memory = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 24,
|
||||
"id": "970d23df",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -228,24 +228,23 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"How many people live in canada?\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I should look up how many people live in canada\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I should look up the answer\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"How many people live in canada?\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe current population of Canada is 38,533,678 as of Friday, November 25, 2022, based on Worldometer elaboration of the latest United Nations data. · Canada 2020 ...\u001b[0m\n",
|
||||
"Action Input: population of Canada\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mThe current population of Canada is 38,555,354 as of Monday, December 19, 2022, based on Worldometer elaboration of the latest United Nations data. · Canada 2020 ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The current population of Canada is 38,533,678\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"Final Answer: The current population of Canada is 38,555,354.\u001b[0m\n",
|
||||
"\u001b[1m> Finished AgentExecutor chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The current population of Canada is 38,533,678'"
|
||||
"'The current population of Canada is 38,555,354.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -256,7 +255,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 25,
|
||||
"id": "d9ea82f0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -266,24 +265,23 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"what is their national anthem called?\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I should probably look this up\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I should look up the answer\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"What is the national anthem of [country]\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mMost nation states have an anthem, defined as \"a song, as of praise, devotion, or patriotism\"; most anthems are either marches or hymns in style.\u001b[0m\n",
|
||||
"Action Input: national anthem of [country]\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m\"Himno Nacional\" (\"National Anthem\") · \"Pátria\" (\"Fatherland\") · \"Salve, Oh Patria\" (\"We Salute You, Our Homeland\") · \"Bilady, Bilady, Bilady\" (\"My Country, My ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The national anthem is called \"the national anthem.\"\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
"Final Answer: The national anthem of [country] is called \"Himno Nacional\", \"Pátria\", \"Salve, Oh Patria\", and \"Bilady, Bilady, Bilady\".\u001b[0m\n",
|
||||
"\u001b[1m> Finished AgentExecutor chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The national anthem is called \"the national anthem.\"'"
|
||||
"'The national anthem of [country] is called \"Himno Nacional\", \"Pátria\", \"Salve, Oh Patria\", and \"Bilady, Bilady, Bilady\".'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -317,7 +315,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,10 +1,37 @@
|
||||
Prompts
|
||||
=======
|
||||
LLMs & Prompts
|
||||
==============
|
||||
|
||||
The examples here all highlight how to work with LLMs and prompts.
|
||||
|
||||
**LLMs**
|
||||
|
||||
`LLM Functionality <prompts/llm_functionality.ipynb>`_: A walkthrough of all the functionality the standard LLM interface exposes.
|
||||
|
||||
`LLM Serialization <prompts/llm_serialization.ipynb>`_: A walkthrough of how to serialize LLMs to and from disk.
|
||||
|
||||
`LLM Caching <prompts/llm_caching.ipynb>`_: Covers different types of caches, and how to use a cache to save results of LLM calls.
|
||||
|
||||
`Custom LLM <prompts/custom_llm.ipynb>`_: How to create and use a custom LLM class, in case you have an LLM not from one of the standard providers (including one that you host yourself).
|
||||
|
||||
|
||||
**Prompts**
|
||||
|
||||
`Prompt Management <prompts/prompt_management.ipynb>`_: A walkthrough of all the functionality LangChain supports for working with prompts.
|
||||
|
||||
`Prompt Serialization <prompts/prompt_serialization.ipynb>`_: A walkthrough of how to serialize prompts to and from disk.
|
||||
|
||||
`Few Shot Examples <prompts/few_shot_examples.ipynb>`_: How to include examples in the prompt.
|
||||
|
||||
`Generate Examples <prompts/generate_examples.ipynb>`_: How to use existing examples to generate more examples.
|
||||
|
||||
`Custom Example Selector <prompts/custom_example_selector.ipynb>`_: How to create and use a custom ExampleSelector (the class responsible for choosing which examples to use in a prompt).
|
||||
|
||||
`Custom Prompt Template <prompts/custom_prompt_template.ipynb>`_: How to create and use a custom PromptTemplate, the logic that decides how input variables get formatted into a prompt.
|
||||
|
||||
The examples here all highlight how to work with prompts.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
:hidden:
|
||||
|
||||
prompts/*
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"\n",
|
||||
"There is only one required thing that a custom LLM needs to implement:\n",
|
||||
"\n",
|
||||
"1. A `__call__` method that takes in a string, some optional stop words, and returns a string\n",
|
||||
"1. A `_call` method that takes in a string, some optional stop words, and returns a string\n",
|
||||
"\n",
|
||||
"There is a second optional thing it can implement:\n",
|
||||
"\n",
|
||||
@@ -33,17 +33,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 7,
|
||||
"id": "d5ceff02",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class CustomLLM(LLM):\n",
|
||||
" \n",
|
||||
" def __init__(self, n: int):\n",
|
||||
" self.n = n\n",
|
||||
" n: int\n",
|
||||
" \n",
|
||||
" @property\n",
|
||||
" def _llm_type(self) -> str:\n",
|
||||
" return \"custom\"\n",
|
||||
" \n",
|
||||
" def __call__(self, prompt: str, stop: Optional[List[str]] = None) -> str:\n",
|
||||
" def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:\n",
|
||||
" if stop is not None:\n",
|
||||
" raise ValueError(\"stop kwargs are not permitted.\")\n",
|
||||
" return prompt[:self.n]\n",
|
||||
@@ -64,7 +67,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 8,
|
||||
"id": "10e5ece6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -74,7 +77,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 9,
|
||||
"id": "8cd49199",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -84,7 +87,7 @@
|
||||
"'This is a '"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -103,7 +106,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 10,
|
||||
"id": "9c33fa19",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -145,7 +148,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
11
docs/examples/prompts/llm.json
Normal file
11
docs/examples/prompts/llm.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"model_name": "text-davinci-003",
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 256,
|
||||
"top_p": 1.0,
|
||||
"frequency_penalty": 0.0,
|
||||
"presence_penalty": 0.0,
|
||||
"n": 1,
|
||||
"best_of": 1,
|
||||
"_type": "openai"
|
||||
}
|
||||
9
docs/examples/prompts/llm.yaml
Normal file
9
docs/examples/prompts/llm.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
_type: openai
|
||||
best_of: 1
|
||||
frequency_penalty: 0.0
|
||||
max_tokens: 256
|
||||
model_name: text-davinci-003
|
||||
n: 1
|
||||
presence_penalty: 0.0
|
||||
temperature: 0.7
|
||||
top_p: 1.0
|
||||
467
docs/examples/prompts/llm_caching.ipynb
Normal file
467
docs/examples/prompts/llm_caching.ipynb
Normal file
@@ -0,0 +1,467 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f36d938c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LLM Caching\n",
|
||||
"This notebook covers how to cache results of individual LLM calls."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "10ad9224",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b50f0598",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### In Memory Cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "426ff912",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import langchain\n",
|
||||
"from langchain.cache import InMemoryCache\n",
|
||||
"langchain.llm_cache = InMemoryCache()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "f69f6283",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# To make the caching really obvious, lets use a slower model.\n",
|
||||
"llm = OpenAI(model_name=\"text-davinci-002\", n=2, best_of=2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "64005d1f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 30 ms, sys: 10.8 ms, total: 40.8 ms\n",
|
||||
"Wall time: 983 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# The first time, it is not yet in cache, so it should take longer\n",
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "c8a1cb2b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 65 µs, sys: 1 µs, total: 66 µs\n",
|
||||
"Wall time: 70.1 µs\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# The second time it is, so it goes faster\n",
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4bf59c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### SQLite Cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "5f036236",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# We can do the same thing with a SQLite cache\n",
|
||||
"from langchain.cache import SQLiteCache\n",
|
||||
"langchain.llm_cache = SQLiteCache(database_path=\".langchain.db\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "fa18e3af",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 6.76 ms, sys: 2.6 ms, total: 9.36 ms\n",
|
||||
"Wall time: 7.86 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# The first time, it is not yet in cache, so it should take longer\n",
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "5bf2f6fd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 2.52 ms, sys: 1.47 ms, total: 3.99 ms\n",
|
||||
"Wall time: 2.98 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# The second time it is, so it goes faster\n",
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "934943dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### SQLAlchemy Cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "acccff40",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You can use SQLAlchemyCache to cache with any SQL database supported by SQLAlchemy.\n",
|
||||
"\n",
|
||||
"# from langchain.cache import SQLAlchemyCache\n",
|
||||
"# from sqlalchemy import create_engine\n",
|
||||
"\n",
|
||||
"# engine = create_engine(\"postgresql://postgres:postgres@localhost:5432/postgres\")\n",
|
||||
"# langchain.llm_cache = SQLAlchemyCache(engine)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0c69d84d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Optional Caching\n",
|
||||
"You can also turn off caching for specific LLMs should you choose. In the example below, even though global caching is enabled, we turn it off for a specific LLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "6af46e2b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(model_name=\"text-davinci-002\", n=2, best_of=2, cache=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "26c4fd8f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 5.59 ms, sys: 2.35 ms, total: 7.95 ms\n",
|
||||
"Wall time: 1.46 s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "46846b20",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 5.76 ms, sys: 3.15 ms, total: 8.9 ms\n",
|
||||
"Wall time: 660 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was...two tired!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5da41b77",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Optional Caching in Chains\n",
|
||||
"You can also turn off caching for particular nodes in chains. Note that because of certain interfaces, its often easier to construct the chain first, and then edit the LLM afterwards.\n",
|
||||
"\n",
|
||||
"As an example, we will load a summarizer map-reduce chain. We will cache results for the map-step, but then not freeze it for the combine step."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "9afa3f7a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(model_name=\"text-davinci-002\")\n",
|
||||
"no_cache_llm = OpenAI(model_name=\"text-davinci-002\", cache=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "98a78e8e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.chains.mapreduce import MapReduceChain\n",
|
||||
"\n",
|
||||
"text_splitter = CharacterTextSplitter()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "2bfb099b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('../state_of_the_union.txt') as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "f78b7f51",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"docs = [Document(page_content=t) for t in texts[:3]]\n",
|
||||
"from langchain.chains.summarize import load_summarize_chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "a2a30822",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = load_summarize_chain(llm, chain_type=\"map_reduce\", reduce_llm=no_cache_llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "a545b743",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 471 ms, sys: 130 ms, total: 601 ms\n",
|
||||
"Wall time: 5.8 s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"\\n\\nIn response to Vladimir Putin's aggression in Ukraine, the United States has joined with European allies to impose economic sanctions and cut off Russia's access to technology. The Department of Justice is also assembling a task force to go after the crimes of Russian oligarchs.\\n\\nThe sanctions and task force are aimed at punishing Putin and Russian oligarchs for their aggression in Ukraine and deterring future aggression. The long-term goal is to make Russia pay a high price for its aggression, so that it will be less likely to engage in similar behavior in the future.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"chain.run(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3ed85e9d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"When we run it again, we see that it runs substantially faster but the final answer is different. This is due to caching at the map steps, but not at the reduce step."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "39cbb282",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 10.6 ms, sys: 4.25 ms, total: 14.8 ms\n",
|
||||
"Wall time: 2.19 s\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"\\n\\nIn response to Vladimir Putin's aggression in Ukraine, the United States has joined with European allies to impose economic sanctions and cut off Russia's access to technology. The Department of Justice is also assembling a task force to go after the crimes of Russian oligarchs. The goal is to put pressure on Putin and make him pay a high price for his aggression. These initiatives will also help improve infrastructure and provide clean water and high-speed internet access for all Americans.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"chain.run(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9df0dab8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
229
docs/examples/prompts/llm_functionality.ipynb
Normal file
229
docs/examples/prompts/llm_functionality.ipynb
Normal file
@@ -0,0 +1,229 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "20ac6b98",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LLM Functionality\n",
|
||||
"\n",
|
||||
"This notebook goes over all the different features of the LLM class in LangChain.\n",
|
||||
"\n",
|
||||
"We will work with an OpenAI LLM wrapper, although these functionalities should exist for all LLM types."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "df924055",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "182b484c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(model_name=\"text-ada-001\", n=2, best_of=2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9695ccfc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Generate Text:** The most basic functionality an LLM has is just the ability to call it, passing in a string and getting back a string."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "9d12ac26",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e7d4d42d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Generate:** More broadly, you can call it with a list of inputs, getting back a more complete response than just the text. This complete response includes things like multiple top responses, as well as LLM provider specific information"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "f4dc241a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_result = llm.generate([\"Tell me a joke\", \"Tell me a poem\"]*15)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "740392f6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"30"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(llm_result.generations)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "ab6cdcf1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Generation(text='\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'),\n",
|
||||
" Generation(text='\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm_result.generations[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "4946a778",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Generation(text=\"\\n\\nA rose by the side of the road\\n\\nIs all I need to find my way\\n\\nTo the place I've been searching for\\n\\nAnd my heart is singing with joy\\n\\nWhen I look at this rose\\n\\nIt reminds me of the love I've found\\n\\nAnd I know that wherever I go\\n\\nI'll always find my rose by the side of the road.\"),\n",
|
||||
" Generation(text=\"\\n\\nWhen I was younger\\nI thought that love\\nI was something like a fairytale\\nI would find my prince and they would be my people\\nI was naïve\\nI thought that\\n\\nLove was a something that happened\\nWhen I was younger\\nI was it for my fairytale prince\\nNow I realize\\nThat love is something that waits\\nFor when my prince comes\\nAnd when I am ready to be his wife\\nI'll tell you a poem\\n\\nWhen I was younger\\nI thought that love\\nI was something like a fairytale\\nI would find my prince and they would be my people\\nI was naïve\\nI thought that\\n\\nLove was a something that happened\\nAnd I would be happy\\nWhen my prince came\\nAnd I was ready to be his wife\")]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm_result.generations[-1]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "242e4527",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'token_usage': {'completion_tokens': 3722,\n",
|
||||
" 'prompt_tokens': 120,\n",
|
||||
" 'total_tokens': 3842}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Provider specific info\n",
|
||||
"llm_result.llm_output"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bde8e04f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Number of Tokens:** You can also estimate how many tokens a piece of text will be in that model. This is useful because models have a context length (and cost more for more tokens), which means you need to be aware of how long the text you are passing in is.\n",
|
||||
"\n",
|
||||
"Notice that by default the tokens are estimated using a HuggingFace tokenizer."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "b623c774",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"3"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm.get_num_tokens(\"what a joke\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "1235b9b19e8e9828b5c1fdb2cd89fe8d3de0fcde5ef5f3db36e4b671adb8660f"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
166
docs/examples/prompts/llm_serialization.ipynb
Normal file
166
docs/examples/prompts/llm_serialization.ipynb
Normal file
@@ -0,0 +1,166 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "73f9bf40",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LLM Serialization\n",
|
||||
"\n",
|
||||
"This notebook walks how to write and read an LLM Configuration to and from disk. This is useful if you want to save the configuration for a given LLM (eg the provider, the temperature, etc)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "9c9fb6ff",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.llms.loading import load_llm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "88ce018b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Loading\n",
|
||||
"First, lets go over loading a LLM from disk. LLMs can be saved on disk in two formats: json or yaml. No matter the extension, they are loaded in the same way."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "f12b28f3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\r\n",
|
||||
" \"model_name\": \"text-davinci-003\",\r\n",
|
||||
" \"temperature\": 0.7,\r\n",
|
||||
" \"max_tokens\": 256,\r\n",
|
||||
" \"top_p\": 1,\r\n",
|
||||
" \"frequency_penalty\": 0,\r\n",
|
||||
" \"presence_penalty\": 0,\r\n",
|
||||
" \"n\": 1,\r\n",
|
||||
" \"best_of\": 1,\r\n",
|
||||
" \"_type\": \"openai\"\r\n",
|
||||
"}"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!cat llm.json"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "9ab709fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = load_llm(\"llm.json\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "095b1d56",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"_type: openai\r\n",
|
||||
"best_of: 1\r\n",
|
||||
"frequency_penalty: 0\r\n",
|
||||
"max_tokens: 256\r\n",
|
||||
"model_name: text-davinci-003\r\n",
|
||||
"n: 1\r\n",
|
||||
"presence_penalty: 0\r\n",
|
||||
"temperature: 0.7\r\n",
|
||||
"top_p: 1\r\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!cat llm.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "8cafaafe",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = load_llm(\"llm.yaml\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ab3e4223",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Saving\n",
|
||||
"If you want to go from a LLM in memory to a serialized version of it, you can do so easily by calling the `.save` method. Again, this supports both json and yaml."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "b38f685d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm.save(\"llm.json\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "b7365503",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm.save(\"llm.yaml\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0e494851",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -71,7 +71,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 11,
|
||||
"id": "094229f4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -151,6 +151,59 @@
|
||||
"multiple_input_prompt.format(adjective=\"funny\", content=\"chickens\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b2dd6154",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Alternative formats\n",
|
||||
"\n",
|
||||
"This section shows how to use alternative formats besides \"f-string\" to format prompts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "53b41b6a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Jinja2\n",
|
||||
"template = \"\"\"\n",
|
||||
"{% for item in items %}\n",
|
||||
"Question: {{ item.question }}\n",
|
||||
"Answer: {{ item.answer }}\n",
|
||||
"{% endfor %}\n",
|
||||
"\"\"\"\n",
|
||||
"items=[{\"question\": \"foo\", \"answer\": \"bar\"},{\"question\": \"1\", \"answer\": \"2\"}]\n",
|
||||
"jinja2_prompt = PromptTemplate(\n",
|
||||
" input_variables=[\"items\"], \n",
|
||||
" template=template,\n",
|
||||
" template_format=\"jinja2\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "ba8aabd3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nQuestion: foo\\nAnswer: bar\\n\\nQuestion: 1\\nAnswer: 2\\n'"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"jinja2_prompt.format(items=items)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1492b49d",
|
||||
@@ -519,8 +572,8 @@
|
||||
"text": [
|
||||
"Give the antonym of every input\n",
|
||||
"\n",
|
||||
"Input: tall\n",
|
||||
"Output: short\n",
|
||||
"Input: happy\n",
|
||||
"Output: sad\n",
|
||||
"\n",
|
||||
"Input: fat\n",
|
||||
"Output:\n"
|
||||
@@ -544,8 +597,8 @@
|
||||
"text": [
|
||||
"Give the antonym of every input\n",
|
||||
"\n",
|
||||
"Input: enthusiastic\n",
|
||||
"Output: apathetic\n",
|
||||
"Input: happy\n",
|
||||
"Output: sad\n",
|
||||
"\n",
|
||||
"Input: joyful\n",
|
||||
"Output:\n"
|
||||
@@ -558,6 +611,110 @@
|
||||
"print(similar_prompt.format(adjective=\"joyful\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bc35afd0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Maximal Marginal Relevance ExampleSelector\n",
|
||||
"\n",
|
||||
"The MaxMarginalRelevanceExampleSelector selects examples based on a combination of which examples are most similar to the inputs, while also optimizing for diversity. It does this by finding the examples with the embeddings that have the greatest cosine similarity with the inputs, and then iteratively adding them while penalizing them for closeness to already selected examples.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "ac95c968",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts.example_selector import MaxMarginalRelevanceExampleSelector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "db579bea",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"example_selector = MaxMarginalRelevanceExampleSelector.from_examples(\n",
|
||||
" # This is the list of examples available to select from.\n",
|
||||
" examples, \n",
|
||||
" # This is the embedding class used to produce embeddings which are used to measure semantic similarity.\n",
|
||||
" OpenAIEmbeddings(), \n",
|
||||
" # This is the VectorStore class that is used to store the embeddings and do a similarity search over.\n",
|
||||
" FAISS, \n",
|
||||
" # This is the number of examples to produce.\n",
|
||||
" k=2\n",
|
||||
")\n",
|
||||
"mmr_prompt = FewShotPromptTemplate(\n",
|
||||
" # We provide an ExampleSelector instead of examples.\n",
|
||||
" example_selector=example_selector,\n",
|
||||
" example_prompt=example_prompt,\n",
|
||||
" prefix=\"Give the antonym of every input\",\n",
|
||||
" suffix=\"Input: {adjective}\\nOutput:\", \n",
|
||||
" input_variables=[\"adjective\"],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "cd76e344",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Give the antonym of every input\n",
|
||||
"\n",
|
||||
"Input: happy\n",
|
||||
"Output: sad\n",
|
||||
"\n",
|
||||
"Input: windy\n",
|
||||
"Output: calm\n",
|
||||
"\n",
|
||||
"Input: worried\n",
|
||||
"Output:\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Input is a feeling, so should select the happy/sad example as the first one\n",
|
||||
"print(mmr_prompt.format(adjective=\"worried\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "cf82956b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Give the antonym of every input\n",
|
||||
"\n",
|
||||
"Input: happy\n",
|
||||
"Output: sad\n",
|
||||
"\n",
|
||||
"Input: enthusiastic\n",
|
||||
"Output: apathetic\n",
|
||||
"\n",
|
||||
"Input: worried\n",
|
||||
"Output:\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Let's compare this to what we would just get if we went solely off of similarity\n",
|
||||
"similar_prompt.example_selector.k = 2\n",
|
||||
"print(similar_prompt.format(adjective=\"worried\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "dbc32551",
|
||||
@@ -602,7 +759,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
Agents use an LLM to determine which actions to take and in what order.
|
||||
An action can either be using a tool and observing its output, or returning to the user.
|
||||
For a list of easily loadable tools, see [here](tools.md).
|
||||
Here are the agents available in LangChain.
|
||||
|
||||
For a tutorial on how to load agents, see [here](/getting_started/agents.ipynb).
|
||||
|
||||
128
docs/explanation/combine_docs.md
Normal file
128
docs/explanation/combine_docs.md
Normal file
@@ -0,0 +1,128 @@
|
||||
# Data Augmented Generation
|
||||
|
||||
## Overview
|
||||
|
||||
Language models are trained on large amounts of unstructured data, which makes them really good at general purpose text generation. However, there are many instances where you may want the language model to generate text based not on generic data but rather on specific data. Some common examples of this include:
|
||||
|
||||
- Summarization of a specific piece of text (a website, a private document, etc)
|
||||
- Question answering over a specific piece of text (a website, a private document, etc)
|
||||
- Question answering over multiple pieces of text (multiple websites, multiple private documents, etc)
|
||||
- Using the results of some external call to an API (results from a SQL query, etc)
|
||||
|
||||
All of these examples are instances when you do not want the LLM to generate text based solely on the data it was trained over, but rather you want it to incorporate other external data in some way. At a high level, this process can be broken down into two steps:
|
||||
|
||||
1. Fetching: Fetching the relevant data to include.
|
||||
2. Augmenting: Passing the data in as context to the LLM.
|
||||
|
||||
This guide is intended to provide an overview of how to do this. This includes an overview of the literature, as well as common tools, abstractions and chains for doing this.
|
||||
|
||||
## Related Literature
|
||||
There are a lot of related papers in this area. Most of them are focused on end-to-end methods that optimize the fetching of the relevant data as well as passing it in as context. These are a few of the papers that are particularly relevant:
|
||||
|
||||
**[RAG](https://arxiv.org/abs/2005.11401):** Retrieval Augmented Generation.
|
||||
This paper introduces RAG models where the parametric memory is a pre-trained seq2seq model and the non-parametric memory is a dense vector index of Wikipedia, accessed with a pre-trained neural retriever.
|
||||
|
||||
**[REALM](https://arxiv.org/abs/2002.08909):** Retrieval-Augmented Language Model Pre-Training.
|
||||
To capture knowledge in a more modular and interpretable way, this paper augments language model pre-training with a latent knowledge retriever, which allows the model to retrieve and attend over documents from a large corpus such as Wikipedia, used during pre-training, fine-tuning and inference.
|
||||
|
||||
**[HayStack](https://haystack.deepset.ai/):** This is not a paper, but rather an open source library aimed at semantic search, question answering, summarization, and document ranking for a wide range of NLP applications. The underpinnings of this library are focused on the same `fetching` and `augmenting` concepts discussed here, and incorporate some of the methods in the above papers.
|
||||
|
||||
These papers/open-source projects are centered around retrieval of documents, which is important for question-answering tasks over a large corpus of documents (which is how they are evaluated). However, we use the terminology of `Data Augmented Generation` to highlight that retrieval from some document store is only one possible way of fetching relevant data to include. Other methods to fetch relevant data could involve hitting an API, querying a database, or just working with user provided data (eg a specific document that they want to summarize).
|
||||
|
||||
Let's now deep dive on the two steps involved: fetching and augmenting.
|
||||
|
||||
## Fetching
|
||||
There are many ways to fetch relevant data to pass in as context to a LM, and these methods largely depend
|
||||
on the use case.
|
||||
|
||||
**User provided:** In some cases, the user may provide the relevant data, and no algorithm for fetching is needed.
|
||||
An example of this is for summarization of specific documents: the user will provide the document to be summarized,
|
||||
and task the language model with summarizing it.
|
||||
|
||||
**Document Retrieval:** One of the more common use cases involves fetching relevant documents or pieces of text from
|
||||
a large corpus of data. A common example of this is question answering over a private collection of documents.
|
||||
|
||||
**API Querying:** Another common way to fetch data is from an API query. One example of this is WebGPT like system,
|
||||
where you first query Google (or another search API) for relevant information, and then those results are used in
|
||||
the generation step. Another example could be querying a structured database (like SQL) and then using a language model
|
||||
to synthesize those results.
|
||||
|
||||
There are two big issues to deal with in fetching:
|
||||
|
||||
1. Fetching small enough pieces of information
|
||||
2. Not fetching too many pieces of information (eg fetching only the most relevant pieces)
|
||||
|
||||
### Text Splitting
|
||||
One big issue with all of these methods is how to make sure you are working with pieces of text that are not too large.
|
||||
This is important because most language models have a context length, and so you cannot (yet) just pass a
|
||||
large document in as context. Therefor, it is important to not only fetch relevant data but also make sure it is
|
||||
small enough chunks.
|
||||
|
||||
LangChain provides some utilities to help with splitting up larger pieces of data. This comes in the form of the TextSplitter class.
|
||||
The class takes in a document and splits it up into chunks, with several parameters that control the
|
||||
size of the chunks as well as the overlap in the chunks (important for maintaining context).
|
||||
See [this walkthrough](../examples/data_augmented_generation/textsplitter.ipynb) for more information.
|
||||
|
||||
### Relevant Documents
|
||||
A second large issue related fetching data is to make sure you are not fetching too many documents, and are only fetching
|
||||
the documents that are relevant to the query/question at hand. There are a few ways to deal with this.
|
||||
|
||||
One concrete example of this is vector stores for document retrieval, often used for semantic search or question answering.
|
||||
With this method, larger documents are split up into
|
||||
smaller chunks and then each chunk of text is passed to an embedding function which creates an embedding for that piece of text.
|
||||
Those are embeddings are then stored in a database. When a new search query or question comes in, an embedding is
|
||||
created for that query/question and then documents with embeddings most similar to that embedding are fetched.
|
||||
Examples of vector database companies include [Pinecone](https://www.pinecone.io/) and [Weaviate](https://weaviate.io/).
|
||||
|
||||
Although this is perhaps the most common way of document retrieval, people are starting to think about alternative
|
||||
data structures and indexing techniques specifically for working with language models. For a leading example of this,
|
||||
check out [GPT Index](https://github.com/jerryjliu/gpt_index) - a collection of data structures created by and optimized
|
||||
for language models.
|
||||
|
||||
## Augmenting
|
||||
So you've fetched your relevant data - now what? How do you pass them to the language model in a format it can understand?
|
||||
There are a few different methods, or chains, for doing so. LangChain supports three of the more common ones - and
|
||||
we are actively looking to include more, so if you have any ideas please reach out! Note that there is not
|
||||
one best method - the decision of which one to use is often very context specific. In order from simplest to
|
||||
most complex:
|
||||
|
||||
### Stuffing
|
||||
Stuffing is the simplest method, whereby you simply stuff all the related data into the prompt as context
|
||||
to pass to the language model. This is implemented in LangChain as the `StuffDocumentsChain`.
|
||||
|
||||
**Pros:** Only makes a single call to the LLM. When generating text, the LLM has access to all the data at once.
|
||||
|
||||
**Cons:** Most LLMs have a context length, and for large documents (or many documents) this will not work as it will result in a prompt larger than the context length.
|
||||
|
||||
The main downside of this method is that it only works one smaller pieces of data. Once you are working
|
||||
with many pieces of data, this approach is no longer feasible. The next two approaches are designed to help deal with that.
|
||||
|
||||
### Map Reduce
|
||||
This method involves an initial prompt on each chunk of data (for summarization tasks, this
|
||||
could be a summary of that chunk; for question-answering tasks, it could be an answer based solely on that chunk).
|
||||
Then a different prompt is run to combine all the initial outputs. This is implemented in the LangChain as the `MapReduceDocumentsChain`.
|
||||
|
||||
**Pros:** Can scale to larger documents (and more documents) than `StuffDocumentsChain`. The calls to the LLM on individual documents are independent and can therefore be parallelized.
|
||||
|
||||
**Cons:** Requires many more calls to the LLM than `StuffDocumentsChain`. Loses some information during the final combining call.
|
||||
|
||||
### Refine
|
||||
This method involves an initial prompt on the first chunk of data, generating some output.
|
||||
For the remaining documents, that output is passed in, along with the next document,
|
||||
asking the LLM to refine the output based on the new document.
|
||||
|
||||
**Pros:** Can pull in more relevant context, and may be less lossy than `RefineDocumentsChain`.
|
||||
|
||||
**Cons:** Requires many more calls to the LLM than `StuffDocumentsChain`. The calls are also NOT independent, meaning they cannot be paralleled like `MapReduceDocumentsChain`. There is also some potential dependencies on the ordering of the documents.
|
||||
|
||||
## Use Cases
|
||||
LangChain supports the above three methods of augmenting LLMs with external data.
|
||||
These methods can be used to underpin several common use cases and they are discussed below.
|
||||
For all three of these use cases, all three methods are supported.
|
||||
It is important to note that a large part of these implementations is the prompts
|
||||
that are used. We provide default prompts for all three use cases, but these can be configured.
|
||||
This is in case you discover a prompt that works better for your specific application.
|
||||
|
||||
- [Question-Answering With Sources](../examples/data_augmented_generation/qa_with_sources.ipynb)
|
||||
- [Question-Answering](../examples/data_augmented_generation/question_answering.ipynb)
|
||||
- [Summarization](../examples/data_augmented_generation/summarize.ipynb)
|
||||
@@ -6,6 +6,9 @@ If you see any other demos that you think we should highlight, be sure to let us
|
||||
|
||||
## Open Source
|
||||
|
||||
### [YouTube Transcription Question Answering with Sources](https://colab.research.google.com/drive/1sKSTjt9cPstl_WMZ86JsgEqFG-aSAwkn?usp=sharing)
|
||||
An end-to-end example of doing question answering on YouTube transcripts, returning the timestamps as sources to legitimize the answer.
|
||||
|
||||
### [ThoughtSource](https://github.com/OpenBioLink/ThoughtSource)
|
||||
A central, open resource and community around data and tools related to chain-of-thought reasoning in large language models.
|
||||
|
||||
|
||||
@@ -72,3 +72,10 @@ Encouraging the model to think a certain way by including the start of the model
|
||||
|
||||
Resources:
|
||||
- [Example](https://twitter.com/goodside/status/1583262455207460865?s=20&t=8Hz7XBnK1OF8siQrxxCIGQ)
|
||||
|
||||
### MemPrompt
|
||||
|
||||
MemPrompt maintains a memory of errors and user feedback, and uses them to prevent repetition of mistakes.
|
||||
|
||||
Resources:
|
||||
- [Paper](https://memprompt.com/)
|
||||
|
||||
94
docs/explanation/tools.md
Normal file
94
docs/explanation/tools.md
Normal file
@@ -0,0 +1,94 @@
|
||||
# Tools
|
||||
|
||||
Tools are functions that agents can use to interact with the world.
|
||||
These tools can be generic utilities (eg search), other chains, or even other agents.
|
||||
|
||||
Currently, tools can be loaded with the following snippet:
|
||||
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
tool_names = [...]
|
||||
tools = load_tools(tool_names)
|
||||
```
|
||||
|
||||
Some tools (eg chains, agents) may require a base LLM to use to initialize them.
|
||||
In that case, you can pass in an LLM as well:
|
||||
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
tool_names = [...]
|
||||
llm = ...
|
||||
tools = load_tools(tool_names, llm=llm)
|
||||
```
|
||||
|
||||
Below is a list of all supported tools and relevant information:
|
||||
- Tool Name: The name the LLM refers to the tool by.
|
||||
- Tool Description: The description of the tool that is passed to the LLM.
|
||||
- Notes: Notes about the tool that are NOT passed to the LLM.
|
||||
- Requires LLM: Whether this tool requires an LLM to be initialized.
|
||||
- (Optional) Extra Parameters: What extra parameters are required to initialize this tool.
|
||||
|
||||
### List of Tools
|
||||
|
||||
**python_repl**
|
||||
- Tool Name: Python REPL
|
||||
- Tool Description: A Python shell. Use this to execute python commands. Input should be a valid python command. If you expect output it should be printed out.
|
||||
- Notes: Maintains state.
|
||||
- Requires LLM: No
|
||||
|
||||
|
||||
**serpapi**
|
||||
- Tool Name: Search
|
||||
- Tool Description: A search engine. Useful for when you need to answer questions about current events. Input should be a search query.
|
||||
- Notes: Calls the Serp API and then parses results.
|
||||
- Requires LLM: No
|
||||
|
||||
**requests**
|
||||
- Tool Name: Requests
|
||||
- Tool Description: A portal to the internet. Use this when you need to get specific content from a site. Input should be a specific url, and the output will be all the text on that page.
|
||||
- Notes: Uses the Python requests module.
|
||||
- Requires LLM: No
|
||||
|
||||
**terminal**
|
||||
- Tool Name: Terminal
|
||||
- Tool Description: Executes commands in a terminal. Input should be valid commands, and the output will be any output from running that command.
|
||||
- Notes: Executes commands with subprocess.
|
||||
- Requires LLM: No
|
||||
|
||||
**pal-math**
|
||||
- Tool Name: PAL-MATH
|
||||
- Tool Description: A language model that is really good at solving complex word math problems. Input should be a fully worded hard word math problem.
|
||||
- Notes: Based on [this paper](https://arxiv.org/pdf/2211.10435.pdf).
|
||||
- Requires LLM: Yes
|
||||
|
||||
**pal-colored-objects**
|
||||
- Tool Name: PAL-COLOR-OBJ
|
||||
- Tool Description: A language model that is really good at reasoning about position and the color attributes of objects. Input should be a fully worded hard reasoning problem. Make sure to include all information about the objects AND the final question you want to answer.
|
||||
- Notes: Based on [this paper](https://arxiv.org/pdf/2211.10435.pdf).
|
||||
- Requires LLM: Yes
|
||||
|
||||
**llm-math**
|
||||
- Tool Name: Calculator
|
||||
- Tool Description: Useful for when you need to answer questions about math.
|
||||
- Notes: An instance of the `LLMMath` chain.
|
||||
- Requires LLM: Yes
|
||||
|
||||
**open-meteo-api**
|
||||
- Tool Name: Open Meteo API
|
||||
- Tool Description: Useful for when you want to get weather information from the OpenMeteo API. The input should be a question in natural language that this API can answer.
|
||||
- Notes: A natural language connection to the Open Meteo API (`https://api.open-meteo.com/`), specifically the `/v1/forecast` endpoint.
|
||||
- Requires LLM: Yes
|
||||
|
||||
**news-api**
|
||||
- Tool Name: News API
|
||||
- Tool Description: Use this when you want to get information about the top headlines of current news stories. The input should be a question in natural language that this API can answer.
|
||||
- Notes: A natural language connection to the News API (`https://newsapi.org`), specifically the `/v2/top-headlines` endpoint.
|
||||
- Requires LLM: Yes
|
||||
- Extra Parameters: `news_api_key` (your API key to access this endpoint)
|
||||
|
||||
**tmdb-api**
|
||||
- Tool Name: TMDB API
|
||||
- Tool Description: Useful for when you want to get information from The Movie Database. The input should be a question in natural language that this API can answer.
|
||||
- Notes: A natural language connection to the TMDB API (`https://api.themoviedb.org/3`), specifically the `/search/movie` endpoint.
|
||||
- Requires LLM: Yes
|
||||
- Extra Parameters: `tmdb_bearer_token` (your Bearer Token to access this endpoint - note that this is different than the API key)
|
||||
@@ -10,7 +10,7 @@
|
||||
"Agents use an LLM to determine which actions to take and in what order.\n",
|
||||
"An action can either be using a tool and observing its output, or returning to the user.\n",
|
||||
"\n",
|
||||
"When used correctly agents can be extremely powerful. The purpose of this notebook is to show you how to easily use agents through the simplest, highest level API. If you want more low level control over various components, check out the documentation for custom agents (coming soon)."
|
||||
"When used correctly agents can be extremely powerful. The purpose of this notebook is to show you how to easily use agents through the simplest, highest level API."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -18,155 +18,141 @@
|
||||
"id": "3c6226b9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Concepts\n",
|
||||
"\n",
|
||||
"In order to load agents, you should understand the following concepts:\n",
|
||||
"\n",
|
||||
"- Tool: A function that performs a specific duty. This can be things like: Google Search, Database lookup, Python REPL, other chains. The interface for a tool is currently a function that is expected to have a string as an input, with a string as an output.\n",
|
||||
"- LLM: The language model powering the agent.\n",
|
||||
"- Agent: The agent to use. This should be a string that references a support agent class. Because this notebook focuses on the simplest, highest level API, this only covers using the standard supported agents. If you want to implement a custom agent, see the documentation for custom agents (coming soon).\n",
|
||||
"\n",
|
||||
"**For a list of supported agents and their specifications, see [here](../explanation/agents.md)**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "05d4b21e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools\n",
|
||||
"When constructing your own agent, you will need to provide it with a list of Tools that it can use. A Tool is defined as below.\n",
|
||||
"**Agents**: For a list of supported agents and their specifications, see [here](../explanation/agents.md).\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"class Tool(NamedTuple):\n",
|
||||
" \"\"\"Interface for tools.\"\"\"\n",
|
||||
"\n",
|
||||
" name: str\n",
|
||||
" func: Callable[[str], str]\n",
|
||||
" description: Optional[str] = None\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"The two required components of a Tool are the name and then the tool itself. A tool description is optional, as it is needed for some agents but not all."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2558a02d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Loading an agent\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36ed392e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Import things that are needed generically\n",
|
||||
"from langchain.agents import initialize_agent, Tool\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "56ff7670",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load the tool configs that are needed.\n",
|
||||
"from langchain import LLMMathChain, SerpAPIWrapper\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"llm_math_chain = LLMMathChain(llm=llm, verbose=True)\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name = \"Search\",\n",
|
||||
" func=search.run,\n",
|
||||
" description=\"useful for when you need to answer questions about current events\"\n",
|
||||
" ),\n",
|
||||
" Tool(\n",
|
||||
" name=\"Calculator\",\n",
|
||||
" func=llm_math_chain.run,\n",
|
||||
" description=\"useful for when you need to answer questions about math\"\n",
|
||||
" )\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "5b93047d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Construct the agent. We will use the default agent type here.\n",
|
||||
"# See documentation for a full list of options.\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"agent = initialize_agent(tools, llm, agent=\"zero-shot-react-description\", verbose=True)"
|
||||
"**Tools**: For a list of predefined tools and their specifications, see [here](../explanation/tools.md)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "6f96a891",
|
||||
"id": "d01216c0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import load_tools\n",
|
||||
"from langchain.agents import initialize_agent\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ef965094",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, let's load the language model we're going to use to control the agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0728f0d9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fb29d592",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, let's load some tools to use. Note that the `llm-math` tool uses an LLM, so we need to pass that in."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "ba4e7618",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0b50fc9b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, let's initialize an agent with the tools, the language model, and the type of agent we want to use."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "03208e2b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = initialize_agent(tools, llm, agent=\"zero-shot-react-description\", verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "373361d5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's test it out!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "244ee75c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"What is the age of Olivia Wilde's boyfriend raised to the 0.23 power?\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find the age of Olivia Wilde's boyfriend\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out who Olivia Wilde's boyfriend is and then calculate his age raised to the 0.23 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Olivia Wilde's boyfriend\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mOlivia Wilde started dating Harry Styles after ending her years-long engagement to Jason Sudeikis — see their relationship timeline.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find the age of Harry Styles\n",
|
||||
"Action Input: \"Olivia Wilde boyfriend\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mJason Sudeikis\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out Jason Sudeikis' age\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Harry Styles age\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m28 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate 28 to the 0.23 power\n",
|
||||
"Action Input: \"Jason Sudeikis age\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m47 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate 47 raised to the 0.23 power\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 28^0.23\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"28^0.23\u001b[32;1m\u001b[1;3m\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"print(28**0.23)\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m2.1520202182226886\n",
|
||||
"\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 2.1520202182226886\n",
|
||||
"Action Input: 47^0.23\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 2.4242784855673896\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 2.1520202182226886\u001b[0m"
|
||||
"Final Answer: Jason Sudeikis, Olivia Wilde's boyfriend, is 47 years old and his age raised to the 0.23 power is 2.4242784855673896.\u001b[0m\n",
|
||||
"\u001b[1m> Finished AgentExecutor chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'2.1520202182226886'"
|
||||
"\"Jason Sudeikis, Olivia Wilde's boyfriend, is 47 years old and his age raised to the 0.23 power is 2.4242784855673896.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"How old is Olivia Wilde's boyfriend? What is that number raised to the 0.23 power?\")"
|
||||
"agent.run(\"Who is Olivia Wilde's boyfriend? What is his current age raised to the 0.23 power?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2f0852ff",
|
||||
"id": "e7776981",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -188,7 +174,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
216
docs/getting_started/data_augmented_generation.ipynb
Normal file
216
docs/getting_started/data_augmented_generation.ipynb
Normal file
@@ -0,0 +1,216 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7ba0decc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Data Augmented Generation\n",
|
||||
"\n",
|
||||
"This notebook covers getting started with some key concepts of data augmented generation, specifically Documents, Embeddings, and Vectorstores.\n",
|
||||
"\n",
|
||||
"After that, we will cover how to use these concepts to do question/answering over select documents.\n",
|
||||
"\n",
|
||||
"For a more conceptual explanation of what Data Augmented Generation is, see [this](../explanation/combine_docs.md) documentation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "b37c3e1e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch\n",
|
||||
"from langchain.vectorstores.faiss import FAISS"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a8c13318",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, let's load in our private data that we want to use in conjunction with an LLM."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "91d307ed",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('../examples/state_of_the_union.txt') as f:\n",
|
||||
" state_of_the_union = f.read()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "12f8bc8f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, we need to create smaller chunks of text from this one large document. We want to do this because we cannot (and do not want to) pass this whole large text into the language model in one go - rather, we want to split it up, select the relevant parts, and then pass those into the language model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "10a93bf9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c2f8c006",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We could work with ALL these documents directly, but often we only want to find only the most relevant ones. One common way to do that is create embeddings for each document, store them in a vector database, and then query that database with an incoming query to select the most relevant documents for that query.\n",
|
||||
"\n",
|
||||
"In this example, we use OpenAI embeddings, and a FAISS vector store."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "fa0f3066",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"docsearch = FAISS.from_texts(texts, embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2c6ce83f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now let's give it a go!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "8465b4b7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "611be801",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"In state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \n",
|
||||
"\n",
|
||||
"We cannot let this happen. \n",
|
||||
"\n",
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0b6a48e5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"So we now have a way of selecting the most relevant documents - now what? We can plug this vectorstore into a chain, where we first select these documents, and then send them (along with the original question) to get a final answer."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "b6255b02",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import OpenAI, VectorDBQA"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "ec4eacad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"qa = VectorDBQA.from_llm(llm=OpenAI(), vectorstore=docsearch)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "59c7508d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" The president said that Ketanji Brown Jackson is one of our nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. The president also said that Ketanji Brown Jackson is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"qa.run(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b192c91c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -27,7 +27,7 @@ from langchain.chains import LLMChain
|
||||
chain = LLMChain(llm=llm, prompt=prompt)
|
||||
```
|
||||
|
||||
Now we can run that can only specifying the product!
|
||||
Now we can run that chain only specifying the product!
|
||||
|
||||
```python
|
||||
chain.run("colorful socks")
|
||||
|
||||
@@ -1,333 +1,333 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d31df93e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Memory\n",
|
||||
"So far, all the chains and agents we've gone through have been stateless. But often, you may want a chain or agent to have some concept of \"memory\" so that it may remember information about its previous interactions. The most clear and simple example of this is when designing a chatbot - you want it to remember previous messages so it can use context from that to have a better conversation. This would be a type of \"short-term memory\". On the more complex side, you could imagine a chain/agent remembering key pieces of information over time - this would be a form of \"long-term memory\".\n",
|
||||
"\n",
|
||||
"LangChain provides several specially created chains just for this purpose. This notebook walk throughs using one of those chains (the `ConversationChain`) with two different types of memory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d051c1da",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ConversationChain with default memory\n",
|
||||
"By default, the `ConversationChain` has a simple type of memory which remebers all previes inputs/outputs and adds them to the context that is passed. Let's take a look at using this chain (setting `verbose=True` so we can see the prompt)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "ae046bff",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cells": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"\n",
|
||||
"Human: Hi there!\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
"cell_type": "markdown",
|
||||
"id": "d31df93e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Memory\n",
|
||||
"So far, all the chains and agents we've gone through have been stateless. But often, you may want a chain or agent to have some concept of \"memory\" so that it may remember information about its previous interactions. The clearest and simple example of this is when designing a chatbot - you want it to remember previous messages so it can use context from that to have a better conversation. This would be a type of \"short-term memory\". On the more complex side, you could imagine a chain/agent remembering key pieces of information over time - this would be a form of \"long-term memory\". For more concrete ideas on the latter, see this [awesome paper](https://memprompt.com/).\n",
|
||||
"\n",
|
||||
"LangChain provides several specially created chains just for this purpose. This notebook walks through using one of those chains (the `ConversationChain`) with two different types of memory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Hello! How are you today?'"
|
||||
"cell_type": "markdown",
|
||||
"id": "d051c1da",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ConversationChain with default memory\n",
|
||||
"By default, the `ConversationChain` has a simple type of memory that remembers all previous inputs/outputs and adds them to the context that is passed. Let's take a look at using this chain (setting `verbose=True` so we can see the prompt)."
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import OpenAI, ConversationChain\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"conversation = ConversationChain(llm=llm, verbose=True)\n",
|
||||
"\n",
|
||||
"conversation.predict(input=\"Hi there!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "d8e2a6ff",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"\n",
|
||||
"Human: Hi there!\n",
|
||||
"AI: Hello! How are you today?\n",
|
||||
"Human: I'm doing well! Just having a conversation with an AI.\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" That's great! What would you like to talk about?\""
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "ae046bff",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"\n",
|
||||
"Human: Hi there!\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Hello! How are you today?'"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain import OpenAI, ConversationChain\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"conversation = ConversationChain(llm=llm, verbose=True)\n",
|
||||
"\n",
|
||||
"conversation.predict(input=\"Hi there!\")"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conversation.predict(input=\"I'm doing well! Just having a conversation with an AI.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "15eda316",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"\n",
|
||||
"Human: Hi there!\n",
|
||||
"AI: Hello! How are you today?\n",
|
||||
"Human: I'm doing well! Just having a conversation with an AI.\n",
|
||||
"AI: That's great! What would you like to talk about?\n",
|
||||
"Human: Tell me about yourself.\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' I am an AI created to provide information and support to humans. I enjoy learning and exploring new things.'"
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "d8e2a6ff",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"\n",
|
||||
"Human: Hi there!\n",
|
||||
"AI: Hello! How are you today?\n",
|
||||
"Human: I'm doing well! Just having a conversation with an AI.\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" That's great! What would you like to talk about?\""
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conversation.predict(input=\"I'm doing well! Just having a conversation with an AI.\")"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conversation.predict(input=\"Tell me about yourself.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4fad9448",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ConversationChain with ConversationSummaryMemory\n",
|
||||
"Now lets take a look at using a slightly more complex type of memory - `ConversationSummaryMemory`. This type of memory creates a summary of the conversation over time. This can be useful for condensing information from the conversation over time.\n",
|
||||
"\n",
|
||||
"Let's walk through an example, again setting `verbose=True` so we can see the prompt."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "f60a2fe8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.conversation.memory import ConversationSummaryMemory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "b7274f2c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"\n",
|
||||
"Human: Hi, what's up?\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"\\n\\nI'm doing well, thank you for asking. I'm currently working on a project that I'm really excited about.\""
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "15eda316",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"\n",
|
||||
"Human: Hi there!\n",
|
||||
"AI: Hello! How are you today?\n",
|
||||
"Human: I'm doing well! Just having a conversation with an AI.\n",
|
||||
"AI: That's great! What would you like to talk about?\n",
|
||||
"Human: Tell me about yourself.\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' I am an AI created to provide information and support to humans. I enjoy learning and exploring new things.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conversation.predict(input=\"Tell me about yourself.\")"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conversation_with_summary = ConversationChain(llm=llm, memory=ConversationSummaryMemory(llm=OpenAI()), verbose=True)\n",
|
||||
"conversation_with_summary.predict(input=\"Hi, what's up?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "a6b6b88f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"\n",
|
||||
"The human and artificial intelligence are talking. The human asked the AI what it is doing, and the AI said that it is working on a project that it is excited about.\n",
|
||||
"Human: Tell me more about it!\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"\\n\\nI'm working on a project that I'm really excited about. It's a lot of work, but I think it's going to be really great when it's finished. I can't wait to show it to you!\""
|
||||
"cell_type": "markdown",
|
||||
"id": "4fad9448",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ConversationChain with ConversationSummaryMemory\n",
|
||||
"Now let's take a look at using a slightly more complex type of memory - `ConversationSummaryMemory`. This type of memory creates a summary of the conversation over time. This can be useful for condensing information from the conversation over time.\n",
|
||||
"\n",
|
||||
"Let's walk through an example, again setting `verbose=True` so we can see the prompt."
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conversation_with_summary.predict(input=\"Tell me more about it!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "dad869fe",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"The human and artificial intelligence are talking. The human asked the AI what it is doing, and the AI said that it is working on a project that it is excited about. The AI said that the project is a lot of work, but it is going to be great when it is finished.\n",
|
||||
"Human: Very cool -- what is the scope of the project?\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nThe project is quite large in scope. It involves a lot of data analysis and work with artificial intelligence algorithms.'"
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "f60a2fe8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.conversation.memory import ConversationSummaryMemory"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "b7274f2c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"\n",
|
||||
"Human: Hi, what's up?\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"\\n\\nI'm doing well, thank you for asking. I'm currently working on a project that I'm really excited about.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conversation_with_summary = ConversationChain(llm=llm, memory=ConversationSummaryMemory(llm=OpenAI()), verbose=True)\n",
|
||||
"conversation_with_summary.predict(input=\"Hi, what's up?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "a6b6b88f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"\n",
|
||||
"The human and artificial intelligence are talking. The human asked the AI what it is doing, and the AI said that it is working on a project that it is excited about.\n",
|
||||
"Human: Tell me more about it!\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"\\n\\nI'm working on a project that I'm really excited about. It's a lot of work, but I think it's going to be really great when it's finished. I can't wait to show it to you!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conversation_with_summary.predict(input=\"Tell me more about it!\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "dad869fe",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"The human and artificial intelligence are talking. The human asked the AI what it is doing, and the AI said that it is working on a project that it is excited about. The AI said that the project is a lot of work, but it is going to be great when it is finished.\n",
|
||||
"Human: Very cool -- what is the scope of the project?\n",
|
||||
"AI:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nThe project is quite large in scope. It involves a lot of data analysis and work with artificial intelligence algorithms.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conversation_with_summary.predict(input=\"Very cool -- what is the scope of the project?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5c8735cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### More Resources on Memory\n",
|
||||
"\n",
|
||||
"This just scratches the surface of what you can do with memory. For more examples on things like how to implement custom memory classes, how to add memory to a custom LLM chain and how to use memory with an agent, please see the [How-To: Memory](../../examples/memory) section. For even more advanced ideas on memory (which will hopefully be included in LangChain soon!) see the [MemPrompt](https://memprompt.com/) paper."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "436dda66",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conversation_with_summary.predict(input=\"Very cool -- what is the scope of the project?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5c8735cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### More Resources on Memory\n",
|
||||
"\n",
|
||||
"This just scratches the surface of what you can do with memory. For more examples on things like how to implement custom memory classes, how to add memory to a custom LLM chain and how to use memory with and agent, please see the [How-To: Memory](../../examples/memory) section."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "436dda66",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
@@ -9,13 +9,14 @@ combine them with other sources of computation or knowledge.
|
||||
|
||||
This library is aimed at assisting in the development of those types of applications.
|
||||
|
||||
There are three main areas (with a forth coming soon) that LangChain is designed to help with.
|
||||
There are five main areas that LangChain is designed to help with.
|
||||
These are, in increasing order of complexity:
|
||||
|
||||
1. LLM and Prompts
|
||||
2. Chains
|
||||
3. Agents
|
||||
4. (Coming Soon) Memory
|
||||
3. Data Augmented Generation
|
||||
4. Agents
|
||||
5. Memory
|
||||
|
||||
Let's go through these categories and for each one identify key concepts (to clarify terminology) as well as the problems in this area LangChain helps solve.
|
||||
|
||||
@@ -51,7 +52,25 @@ LangChain provides several parts to help with that.
|
||||
- Standard interface for working with Chains
|
||||
- Easy way to construct chains of LLMs
|
||||
- Lots of integrations with other tools that you may want to use in conjunction with LLMs
|
||||
- End-to-end chains for common workflows (database question/answer, recursive summarization, etc)
|
||||
- End-to-end chains for common workflows (database question/answer, api calling, etc)
|
||||
|
||||
**📚 Data Augmented Generation**
|
||||
|
||||
LLMs have access to all the data they were trained on, but there are still large chunks of data they were not trained on.
|
||||
Data Augmented Generation covers how to use LLMs to generate text conditioning on data outside of what the LLM was trained on.
|
||||
|
||||
*Key Concepts*
|
||||
|
||||
- Documents: A document is a piece of text, along with some associated metadata, that can be inserted into the context of a query to condition generation on that text.
|
||||
- Embeddings: A vector representation of text (or other unstructured data). Useful for being able to numerically compare pieces of text.
|
||||
- Vectorstore: A database which stores embeddings and can be searched over.
|
||||
|
||||
*Problems Solved*
|
||||
|
||||
- Standard interface for working with Documents, Embeddings, and Vectorstores
|
||||
- Lots of integrations with common embedding providers and vectorstores
|
||||
- End-to-end chains for common workflows (recursive summarization, question answering over documents, etc)
|
||||
|
||||
|
||||
**🤖 Agents**
|
||||
|
||||
@@ -102,7 +121,8 @@ The documentation is structured into the following sections:
|
||||
getting_started/environment.md
|
||||
getting_started/llm.md
|
||||
getting_started/llm_chain.md
|
||||
getting_started/sequential_chains.md
|
||||
getting_started/sequential_chains.ipynb
|
||||
getting_started/data_augmented_generation.ipynb
|
||||
getting_started/agents.ipynb
|
||||
getting_started/memory.ipynb
|
||||
|
||||
@@ -117,8 +137,8 @@ Start here if you haven't used LangChain before.
|
||||
:name: examples
|
||||
|
||||
examples/prompts.rst
|
||||
examples/integrations.rst
|
||||
examples/chains.rst
|
||||
examples/data_augmented_generation.rst
|
||||
examples/agents.rst
|
||||
examples/memory.rst
|
||||
examples/model_laboratory.ipynb
|
||||
@@ -134,19 +154,12 @@ common tasks or cool demos.
|
||||
:caption: Reference
|
||||
:name: reference
|
||||
|
||||
installation.md
|
||||
integrations.md
|
||||
modules/prompt
|
||||
modules/example_selector
|
||||
modules/llms
|
||||
modules/embeddings
|
||||
modules/text_splitter
|
||||
modules/python.rst
|
||||
modules/serpapi.rst
|
||||
modules/docstore.rst
|
||||
modules/vectorstore
|
||||
modules/chains
|
||||
modules/agents
|
||||
reference/installation.md
|
||||
reference/integrations.md
|
||||
reference/prompts.rst
|
||||
reference/chains.rst
|
||||
reference/data_augmented_generation.rst
|
||||
reference/modules/agents
|
||||
|
||||
|
||||
Full API documentation. This is the place to look if you want to
|
||||
@@ -159,7 +172,9 @@ see detailed information about the various classes, methods, and APIs.
|
||||
:name: resources
|
||||
|
||||
explanation/core_concepts.md
|
||||
explanation/combine_docs.md
|
||||
explanation/agents.md
|
||||
explanation/tools.md
|
||||
explanation/glossary.md
|
||||
explanation/cool_demos.md
|
||||
Discord <https://discord.gg/6adMQxSpJS>
|
||||
|
||||
21
docs/reference/chains.rst
Normal file
21
docs/reference/chains.rst
Normal file
@@ -0,0 +1,21 @@
|
||||
Chains
|
||||
==============
|
||||
|
||||
One big part of chains is all the utilities that can be used as part of them.
|
||||
Here is some reference documentation for the utilities natively supported by LangChain.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
modules/python
|
||||
modules/serpapi
|
||||
|
||||
|
||||
With those utilities in mind, here are the reference docs for all the chains in LangChain.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
modules/chains
|
||||
13
docs/reference/data_augmented_generation.rst
Normal file
13
docs/reference/data_augmented_generation.rst
Normal file
@@ -0,0 +1,13 @@
|
||||
Data Augmented Generation
|
||||
=========================
|
||||
|
||||
The reference guides here all relate to components necessary for data augmented generation.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
modules/text_splitter
|
||||
modules/docstore
|
||||
modules/embeddings
|
||||
modules/vectorstore
|
||||
@@ -21,4 +21,10 @@ To install all modules needed for all integrations, run:
|
||||
|
||||
```
|
||||
pip install langchain[all]
|
||||
```
|
||||
|
||||
Note that if you are using `zsh`, you'll need to quote square brackets when passing them as an argument to a command, for example:
|
||||
|
||||
```
|
||||
pip install 'langchain[all]'
|
||||
```
|
||||
12
docs/reference/prompts.rst
Normal file
12
docs/reference/prompts.rst
Normal file
@@ -0,0 +1,12 @@
|
||||
LLMs & Prompts
|
||||
==============
|
||||
|
||||
The reference guides here all relate to objects for working with LLMs and Prompts.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
modules/prompt
|
||||
modules/example_selector
|
||||
modules/llms
|
||||
@@ -1,9 +1,14 @@
|
||||
"""Main entrypoint into package."""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from langchain.agents import MRKLChain, ReActChain, SelfAskWithSearchChain
|
||||
from langchain.cache import BaseCache
|
||||
from langchain.chains import (
|
||||
ConversationChain,
|
||||
LLMBashChain,
|
||||
LLMChain,
|
||||
LLMCheckerChain,
|
||||
LLMMathChain,
|
||||
PALChain,
|
||||
QAWithSourcesChain,
|
||||
@@ -13,7 +18,8 @@ from langchain.chains import (
|
||||
)
|
||||
from langchain.docstore import InMemoryDocstore, Wikipedia
|
||||
from langchain.llms import Cohere, HuggingFaceHub, OpenAI
|
||||
from langchain.logger import BaseLogger
|
||||
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
|
||||
from langchain.logger import BaseLogger, StdOutLogger
|
||||
from langchain.prompts import (
|
||||
BasePromptTemplate,
|
||||
FewShotPromptTemplate,
|
||||
@@ -24,10 +30,14 @@ from langchain.serpapi import SerpAPIChain, SerpAPIWrapper
|
||||
from langchain.sql_database import SQLDatabase
|
||||
from langchain.vectorstores import FAISS, ElasticVectorSearch
|
||||
|
||||
logger = BaseLogger()
|
||||
logger: BaseLogger = StdOutLogger()
|
||||
verbose: bool = False
|
||||
llm_cache: Optional[BaseCache] = None
|
||||
|
||||
__all__ = [
|
||||
"LLMChain",
|
||||
"LLMBashChain",
|
||||
"LLMCheckerChain",
|
||||
"LLMMathChain",
|
||||
"SelfAskWithSearchChain",
|
||||
"SerpAPIWrapper",
|
||||
@@ -41,6 +51,7 @@ __all__ = [
|
||||
"ReActChain",
|
||||
"Wikipedia",
|
||||
"HuggingFaceHub",
|
||||
"HuggingFacePipeline",
|
||||
"SQLDatabase",
|
||||
"SQLDatabaseChain",
|
||||
"FAISS",
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
"""Routing chains."""
|
||||
from langchain.agents.agent import Agent
|
||||
"""Interface for agents."""
|
||||
from langchain.agents.agent import Agent, AgentExecutor
|
||||
from langchain.agents.load_tools import get_all_tool_names, load_tools
|
||||
from langchain.agents.loading import initialize_agent
|
||||
from langchain.agents.mrkl.base import MRKLChain, ZeroShotAgent
|
||||
from langchain.agents.react.base import ReActChain, ReActTextWorldAgent
|
||||
@@ -10,9 +11,12 @@ __all__ = [
|
||||
"MRKLChain",
|
||||
"SelfAskWithSearchChain",
|
||||
"ReActChain",
|
||||
"AgentExecutor",
|
||||
"Agent",
|
||||
"Tool",
|
||||
"initialize_agent",
|
||||
"ZeroShotAgent",
|
||||
"ReActTextWorldAgent",
|
||||
"load_tools",
|
||||
"get_all_tool_names",
|
||||
]
|
||||
|
||||
@@ -1,65 +1,36 @@
|
||||
"""Chain that takes in an input and produces an action and action input."""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, ClassVar, Dict, List, Optional, Tuple
|
||||
import logging
|
||||
from abc import abstractmethod
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, root_validator
|
||||
|
||||
from langchain.agents.input import ChainedInput
|
||||
import langchain
|
||||
from langchain.agents.tools import Tool
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.input import get_color_mapping
|
||||
from langchain.llms.base import LLM
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.schema import AgentAction
|
||||
from langchain.prompts.few_shot import FewShotPromptTemplate
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
from langchain.schema import AgentAction, AgentFinish
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
class Agent(Chain, BaseModel, ABC):
|
||||
"""Agent that uses an LLM."""
|
||||
class Agent(BaseModel):
|
||||
"""Class responsible for calling the language model and deciding the action.
|
||||
|
||||
This is driven by an LLMChain. The prompt in the LLMChain MUST include
|
||||
a variable called "agent_scratchpad" where the agent can put its
|
||||
intermediary work.
|
||||
"""
|
||||
|
||||
prompt: ClassVar[BasePromptTemplate]
|
||||
llm_chain: LLMChain
|
||||
tools: List[Tool]
|
||||
input_key: str = "input" #: :meta private:
|
||||
output_key: str = "output" #: :meta private:
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the singular input key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return the singular output key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.output_key]
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def observation_prefix(self) -> str:
|
||||
"""Prefix to append the observation with."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def llm_prefix(self) -> str:
|
||||
"""Prefix to append the LLM call with."""
|
||||
|
||||
@property
|
||||
def finish_tool_name(self) -> str:
|
||||
"""Name of the tool to use to finish the chain."""
|
||||
return "Final Answer"
|
||||
|
||||
@property
|
||||
def starter_string(self) -> str:
|
||||
"""Put this string after user input but before first LLM call."""
|
||||
return "\n"
|
||||
return_values: List[str] = ["output"]
|
||||
|
||||
@abstractmethod
|
||||
def _extract_tool_and_input(self, text: str) -> Optional[Tuple[str, str]]:
|
||||
@@ -73,84 +44,173 @@ class Agent(Chain, BaseModel, ABC):
|
||||
def _stop(self) -> List[str]:
|
||||
return [f"\n{self.observation_prefix}"]
|
||||
|
||||
def plan(
|
||||
self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any
|
||||
) -> Union[AgentFinish, AgentAction]:
|
||||
"""Given input, decided what to do.
|
||||
|
||||
Args:
|
||||
intermediate_steps: Steps the LLM has taken to date,
|
||||
along with observations
|
||||
**kwargs: User inputs.
|
||||
|
||||
Returns:
|
||||
Action specifying what tool to use.
|
||||
"""
|
||||
thoughts = ""
|
||||
for action, observation in intermediate_steps:
|
||||
thoughts += action.log
|
||||
thoughts += f"\n{self.observation_prefix}{observation}\n{self.llm_prefix}"
|
||||
new_inputs = {"agent_scratchpad": thoughts, "stop": self._stop}
|
||||
full_inputs = {**kwargs, **new_inputs}
|
||||
full_output = self.llm_chain.predict(**full_inputs)
|
||||
parsed_output = self._extract_tool_and_input(full_output)
|
||||
while parsed_output is None:
|
||||
full_output = self._fix_text(full_output)
|
||||
full_inputs["agent_scratchpad"] += full_output
|
||||
output = self.llm_chain.predict(**full_inputs)
|
||||
full_output += output
|
||||
parsed_output = self._extract_tool_and_input(full_output)
|
||||
tool, tool_input = parsed_output
|
||||
if tool == self.finish_tool_name:
|
||||
return AgentFinish({"output": tool_input}, full_output)
|
||||
return AgentAction(tool, tool_input, full_output)
|
||||
|
||||
def prepare_for_new_call(self) -> None:
|
||||
"""Prepare the agent for new call, if needed."""
|
||||
pass
|
||||
|
||||
@property
|
||||
def finish_tool_name(self) -> str:
|
||||
"""Name of the tool to use to finish the chain."""
|
||||
return "Final Answer"
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the input keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return list(set(self.llm_chain.input_keys) - {"agent_scratchpad"})
|
||||
|
||||
@root_validator()
|
||||
def validate_prompt(cls, values: Dict) -> Dict:
|
||||
"""Validate that prompt matches format."""
|
||||
prompt = values["llm_chain"].prompt
|
||||
if "agent_scratchpad" not in prompt.input_variables:
|
||||
logger.warning(
|
||||
"`agent_scratchpad` should be a variable in prompt.input_variables."
|
||||
" Did not find it, so adding it at the end."
|
||||
)
|
||||
prompt.input_variables.append("agent_scratchpad")
|
||||
if isinstance(prompt, PromptTemplate):
|
||||
prompt.template += "\n{agent_scratchpad}"
|
||||
elif isinstance(prompt, FewShotPromptTemplate):
|
||||
prompt.suffix += "\n{agent_scratchpad}"
|
||||
else:
|
||||
raise ValueError(f"Got unexpected prompt type {type(prompt)}")
|
||||
return values
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def observation_prefix(self) -> str:
|
||||
"""Prefix to append the observation with."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def llm_prefix(self) -> str:
|
||||
"""Prefix to append the LLM call with."""
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def create_prompt(cls, tools: List[Tool]) -> BasePromptTemplate:
|
||||
"""Create a prompt for this class."""
|
||||
|
||||
@classmethod
|
||||
def _validate_tools(cls, tools: List[Tool]) -> None:
|
||||
"""Validate that appropriate tools are passed in."""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def create_prompt(cls, tools: List[Tool]) -> BasePromptTemplate:
|
||||
"""Create a prompt for this class."""
|
||||
return cls.prompt
|
||||
|
||||
def _prepare_for_new_call(self) -> None:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_tools(cls, llm: LLM, tools: List[Tool], **kwargs: Any) -> Agent:
|
||||
def from_llm_and_tools(cls, llm: BaseLLM, tools: List[Tool]) -> Agent:
|
||||
"""Construct an agent from an LLM and tools."""
|
||||
cls._validate_tools(tools)
|
||||
llm_chain = LLMChain(llm=llm, prompt=cls.create_prompt(tools))
|
||||
return cls(llm_chain=llm_chain, tools=tools, **kwargs)
|
||||
return cls(llm_chain=llm_chain)
|
||||
|
||||
def get_action(self, text: str) -> AgentAction:
|
||||
"""Given input, decided what to do.
|
||||
|
||||
Args:
|
||||
text: input string
|
||||
class AgentExecutor(Chain, BaseModel):
|
||||
"""Consists of an agent using tools."""
|
||||
|
||||
Returns:
|
||||
Action specifying what tool to use.
|
||||
agent: Agent
|
||||
tools: List[Tool]
|
||||
return_intermediate_steps: bool = False
|
||||
|
||||
@classmethod
|
||||
def from_agent_and_tools(
|
||||
cls, agent: Agent, tools: List[Tool], **kwargs: Any
|
||||
) -> AgentExecutor:
|
||||
"""Create from agent and tools."""
|
||||
return cls(agent=agent, tools=tools, **kwargs)
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the input keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
input_key = self.llm_chain.input_keys[0]
|
||||
inputs = {input_key: text, "stop": self._stop}
|
||||
full_output = self.llm_chain.predict(**inputs)
|
||||
parsed_output = self._extract_tool_and_input(full_output)
|
||||
while parsed_output is None:
|
||||
full_output = self._fix_text(full_output)
|
||||
inputs = {input_key: text + full_output, "stop": self._stop}
|
||||
output = self.llm_chain.predict(**inputs)
|
||||
full_output += output
|
||||
parsed_output = self._extract_tool_and_input(full_output)
|
||||
tool, tool_input = parsed_output
|
||||
return AgentAction(tool, tool_input, full_output)
|
||||
return self.agent.input_keys
|
||||
|
||||
def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return the singular output key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
if self.return_intermediate_steps:
|
||||
return self.agent.return_values + ["intermediate_steps"]
|
||||
else:
|
||||
return self.agent.return_values
|
||||
|
||||
def _call(self, inputs: Dict[str, str]) -> Dict[str, Any]:
|
||||
"""Run text through and get agent response."""
|
||||
text = inputs[self.input_key]
|
||||
# Do any preparation necessary when receiving a new input.
|
||||
self._prepare_for_new_call()
|
||||
self.agent.prepare_for_new_call()
|
||||
# Construct a mapping of tool name to tool for easy lookup
|
||||
name_to_tool_map = {tool.name: tool.func for tool in self.tools}
|
||||
# Construct the initial string to pass into the LLM. This is made up
|
||||
# of the user input, the special starter string, and then the LLM prefix.
|
||||
# The starter string is a special string that may be used by a LLM to
|
||||
# immediately follow the user input. The LLM prefix is a string that
|
||||
# prompts the LLM to take an action.
|
||||
starter_string = text + self.starter_string + self.llm_prefix
|
||||
# We use the ChainedInput class to iteratively add to the input over time.
|
||||
chained_input = ChainedInput(starter_string, verbose=self.verbose)
|
||||
# We construct a mapping from each tool to a color, used for logging.
|
||||
color_mapping = get_color_mapping(
|
||||
[tool.name for tool in self.tools], excluded_colors=["green"]
|
||||
)
|
||||
intermediate_steps: List[Tuple[AgentAction, str]] = []
|
||||
# We now enter the agent loop (until it returns something).
|
||||
while True:
|
||||
# Call the LLM to see what to do.
|
||||
output = self.get_action(chained_input.input)
|
||||
# Add the log to the Chained Input.
|
||||
chained_input.add_action(output, color="green")
|
||||
output = self.agent.plan(intermediate_steps, **inputs)
|
||||
# If the tool chosen is the finishing tool, then we end and return.
|
||||
if output.tool == self.finish_tool_name:
|
||||
return {self.output_key: output.tool_input}
|
||||
# Otherwise we lookup the tool
|
||||
chain = name_to_tool_map[output.tool]
|
||||
# We then call the tool on the tool input to get an observation
|
||||
observation = chain(output.tool_input)
|
||||
# We then log the observation
|
||||
chained_input.add_observation(
|
||||
observation,
|
||||
self.observation_prefix,
|
||||
self.llm_prefix,
|
||||
color=color_mapping[output.tool],
|
||||
)
|
||||
if isinstance(output, AgentFinish):
|
||||
if self.verbose:
|
||||
langchain.logger.log_agent_end(output, color="green")
|
||||
final_output = output.return_values
|
||||
if self.return_intermediate_steps:
|
||||
final_output["intermediate_steps"] = intermediate_steps
|
||||
return final_output
|
||||
if self.verbose:
|
||||
langchain.logger.log_agent_action(output, color="green")
|
||||
# And then we lookup the tool
|
||||
if output.tool in name_to_tool_map:
|
||||
chain = name_to_tool_map[output.tool]
|
||||
# We then call the tool on the tool input to get an observation
|
||||
observation = chain(output.tool_input)
|
||||
color = color_mapping[output.tool]
|
||||
else:
|
||||
observation = f"{output.tool} is not a valid tool, try another one."
|
||||
color = None
|
||||
if self.verbose:
|
||||
langchain.logger.log_agent_observation(
|
||||
observation,
|
||||
color=color,
|
||||
observation_prefix=self.agent.observation_prefix,
|
||||
llm_prefix=self.agent.llm_prefix,
|
||||
)
|
||||
intermediate_steps.append((output, observation))
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
"""Input manager for agents."""
|
||||
from typing import Optional
|
||||
|
||||
import langchain
|
||||
from langchain.schema import AgentAction
|
||||
|
||||
|
||||
class ChainedInput:
|
||||
"""Class for working with input that is the result of chains."""
|
||||
|
||||
def __init__(self, text: str, verbose: bool = False):
|
||||
"""Initialize with verbose flag and initial text."""
|
||||
self._verbose = verbose
|
||||
if self._verbose:
|
||||
langchain.logger.log_agent_start(text)
|
||||
self._input = text
|
||||
|
||||
def add_action(self, action: AgentAction, color: Optional[str] = None) -> None:
|
||||
"""Add text to input, print if in verbose mode."""
|
||||
if self._verbose:
|
||||
langchain.logger.log_agent_action(action, color=color)
|
||||
self._input += action.log
|
||||
|
||||
def add_observation(
|
||||
self,
|
||||
observation: str,
|
||||
observation_prefix: str,
|
||||
llm_prefix: str,
|
||||
color: Optional[str],
|
||||
) -> None:
|
||||
"""Add observation to input, print if in verbose mode."""
|
||||
if self._verbose:
|
||||
langchain.logger.log_agent_observation(
|
||||
observation,
|
||||
color=color,
|
||||
observation_prefix=observation_prefix,
|
||||
llm_prefix=llm_prefix,
|
||||
)
|
||||
self._input += f"\n{observation_prefix}{observation}\n{llm_prefix}"
|
||||
|
||||
@property
|
||||
def input(self) -> str:
|
||||
"""Return the accumulated input."""
|
||||
return self._input
|
||||
169
langchain/agents/load_tools.py
Normal file
169
langchain/agents/load_tools.py
Normal file
@@ -0,0 +1,169 @@
|
||||
# flake8: noqa
|
||||
"""Load tools."""
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from langchain.agents.tools import Tool
|
||||
from langchain.chains.api import news_docs, open_meteo_docs, tmdb_docs
|
||||
from langchain.chains.api.base import APIChain
|
||||
from langchain.chains.llm_math.base import LLMMathChain
|
||||
from langchain.chains.pal.base import PALChain
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.python import PythonREPL
|
||||
from langchain.requests import RequestsWrapper
|
||||
from langchain.serpapi import SerpAPIWrapper
|
||||
from langchain.utilities.bash import BashProcess
|
||||
|
||||
|
||||
def _get_python_repl() -> Tool:
|
||||
return Tool(
|
||||
"Python REPL",
|
||||
PythonREPL().run,
|
||||
"A Python shell. Use this to execute python commands. Input should be a valid python command. If you expect output it should be printed out.",
|
||||
)
|
||||
|
||||
|
||||
def _get_serpapi() -> Tool:
|
||||
return Tool(
|
||||
"Search",
|
||||
SerpAPIWrapper().run,
|
||||
"A search engine. Useful for when you need to answer questions about current events. Input should be a search query.",
|
||||
)
|
||||
|
||||
|
||||
def _get_requests() -> Tool:
|
||||
return Tool(
|
||||
"Requests",
|
||||
RequestsWrapper().run,
|
||||
"A portal to the internet. Use this when you need to get specific content from a site. Input should be a specific url, and the output will be all the text on that page.",
|
||||
)
|
||||
|
||||
|
||||
def _get_terminal() -> Tool:
|
||||
return Tool(
|
||||
"Terminal",
|
||||
BashProcess().run,
|
||||
"Executes commands in a terminal. Input should be valid commands, and the output will be any output from running that command.",
|
||||
)
|
||||
|
||||
|
||||
_BASE_TOOLS = {
|
||||
"python_repl": _get_python_repl,
|
||||
"serpapi": _get_serpapi,
|
||||
"requests": _get_requests,
|
||||
"terminal": _get_terminal,
|
||||
}
|
||||
|
||||
|
||||
def _get_pal_math(llm: BaseLLM) -> Tool:
|
||||
return Tool(
|
||||
"PAL-MATH",
|
||||
PALChain.from_math_prompt(llm).run,
|
||||
"A language model that is really good at solving complex word math problems. Input should be a fully worded hard word math problem.",
|
||||
)
|
||||
|
||||
|
||||
def _get_pal_colored_objects(llm: BaseLLM) -> Tool:
|
||||
return Tool(
|
||||
"PAL-COLOR-OBJ",
|
||||
PALChain.from_colored_object_prompt(llm).run,
|
||||
"A language model that is really good at reasoning about position and the color attributes of objects. Input should be a fully worded hard reasoning problem. Make sure to include all information about the objects AND the final question you want to answer.",
|
||||
)
|
||||
|
||||
|
||||
def _get_llm_math(llm: BaseLLM) -> Tool:
|
||||
return Tool(
|
||||
"Calculator",
|
||||
LLMMathChain(llm=llm).run,
|
||||
"Useful for when you need to answer questions about math.",
|
||||
)
|
||||
|
||||
|
||||
def _get_open_meteo_api(llm: BaseLLM) -> Tool:
|
||||
chain = APIChain.from_llm_and_api_docs(llm, open_meteo_docs.OPEN_METEO_DOCS)
|
||||
return Tool(
|
||||
"Open Meteo API",
|
||||
chain.run,
|
||||
"Useful for when you want to get weather information from the OpenMeteo API. The input should be a question in natural language that this API can answer.",
|
||||
)
|
||||
|
||||
|
||||
_LLM_TOOLS = {
|
||||
"pal-math": _get_pal_math,
|
||||
"pal-colored-objects": _get_pal_colored_objects,
|
||||
"llm-math": _get_llm_math,
|
||||
"open-meteo-api": _get_open_meteo_api,
|
||||
}
|
||||
|
||||
|
||||
def _get_news_api(llm: BaseLLM, **kwargs: Any) -> Tool:
|
||||
news_api_key = kwargs["news_api_key"]
|
||||
chain = APIChain.from_llm_and_api_docs(
|
||||
llm, news_docs.NEWS_DOCS, headers={"X-Api-Key": news_api_key}
|
||||
)
|
||||
return Tool(
|
||||
"News API",
|
||||
chain.run,
|
||||
"Use this when you want to get information about the top headlines of current news stories. The input should be a question in natural language that this API can answer.",
|
||||
)
|
||||
|
||||
|
||||
def _get_tmdb_api(llm: BaseLLM, **kwargs: Any) -> Tool:
|
||||
tmdb_bearer_token = kwargs["tmdb_bearer_token"]
|
||||
chain = APIChain.from_llm_and_api_docs(
|
||||
llm,
|
||||
tmdb_docs.TMDB_DOCS,
|
||||
headers={"Authorization": f"Bearer {tmdb_bearer_token}"},
|
||||
)
|
||||
return Tool(
|
||||
"TMDB API",
|
||||
chain.run,
|
||||
"Useful for when you want to get information from The Movie Database. The input should be a question in natural language that this API can answer.",
|
||||
)
|
||||
|
||||
|
||||
_EXTRA_TOOLS = {
|
||||
"news-api": (_get_news_api, ["news_api_key"]),
|
||||
"tmdb-api": (_get_tmdb_api, ["tmdb_bearer_token"]),
|
||||
}
|
||||
|
||||
|
||||
def load_tools(
|
||||
tool_names: List[str], llm: Optional[BaseLLM] = None, **kwargs: Any
|
||||
) -> List[Tool]:
|
||||
"""Load tools based on their name.
|
||||
|
||||
Args:
|
||||
tool_names: name of tools to load.
|
||||
llm: Optional language model, may be needed to initialize certain tools.
|
||||
|
||||
Returns:
|
||||
List of tools.
|
||||
"""
|
||||
tools = []
|
||||
for name in tool_names:
|
||||
if name in _BASE_TOOLS:
|
||||
tools.append(_BASE_TOOLS[name]())
|
||||
elif name in _LLM_TOOLS:
|
||||
if llm is None:
|
||||
raise ValueError(f"Tool {name} requires an LLM to be provided")
|
||||
tools.append(_LLM_TOOLS[name](llm))
|
||||
elif name in _EXTRA_TOOLS:
|
||||
if llm is None:
|
||||
raise ValueError(f"Tool {name} requires an LLM to be provided")
|
||||
_get_tool_func, extra_keys = _EXTRA_TOOLS[name]
|
||||
missing_keys = set(extra_keys).difference(kwargs)
|
||||
if missing_keys:
|
||||
raise ValueError(
|
||||
f"Tool {name} requires some parameters that were not "
|
||||
f"provided: {missing_keys}"
|
||||
)
|
||||
sub_kwargs = {k: kwargs[k] for k in extra_keys}
|
||||
tools.append(_get_tool_func(llm=llm, **sub_kwargs))
|
||||
else:
|
||||
raise ValueError(f"Got unknown tool {name}")
|
||||
return tools
|
||||
|
||||
|
||||
def get_all_tool_names() -> List[str]:
|
||||
"""Get a list of all possible tool names."""
|
||||
return list(_BASE_TOOLS) + list(_EXTRA_TOOLS) + list(_LLM_TOOLS)
|
||||
@@ -1,12 +1,12 @@
|
||||
"""Load agent."""
|
||||
from typing import Any, List
|
||||
|
||||
from langchain.agents.agent import Agent
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.agents.react.base import ReActDocstoreAgent
|
||||
from langchain.agents.self_ask_with_search.base import SelfAskWithSearchAgent
|
||||
from langchain.agents.tools import Tool
|
||||
from langchain.llms.base import LLM
|
||||
from langchain.llms.base import BaseLLM
|
||||
|
||||
AGENT_TO_CLASS = {
|
||||
"zero-shot-react-description": ZeroShotAgent,
|
||||
@@ -17,10 +17,10 @@ AGENT_TO_CLASS = {
|
||||
|
||||
def initialize_agent(
|
||||
tools: List[Tool],
|
||||
llm: LLM,
|
||||
llm: BaseLLM,
|
||||
agent: str = "zero-shot-react-description",
|
||||
**kwargs: Any,
|
||||
) -> Agent:
|
||||
) -> AgentExecutor:
|
||||
"""Load agent given tools and LLM.
|
||||
|
||||
Args:
|
||||
@@ -39,4 +39,5 @@ def initialize_agent(
|
||||
f"Valid types are: {AGENT_TO_CLASS.keys()}."
|
||||
)
|
||||
agent_cls = AGENT_TO_CLASS[agent]
|
||||
return agent_cls.from_llm_and_tools(llm, tools, **kwargs)
|
||||
agent_obj = agent_cls.from_llm_and_tools(llm, tools)
|
||||
return AgentExecutor.from_agent_and_tools(agent=agent_obj, tools=tools, **kwargs)
|
||||
|
||||
@@ -3,10 +3,10 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any, Callable, List, NamedTuple, Optional, Tuple
|
||||
|
||||
from langchain.agents.agent import Agent
|
||||
from langchain.agents.agent import Agent, AgentExecutor
|
||||
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS, PREFIX, SUFFIX
|
||||
from langchain.agents.tools import Tool
|
||||
from langchain.llms.base import LLM
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.prompts import PromptTemplate
|
||||
|
||||
FINAL_ANSWER_ACTION = "Final Answer: "
|
||||
@@ -85,7 +85,7 @@ class ZeroShotAgent(Agent):
|
||||
format_instructions = FORMAT_INSTRUCTIONS.format(tool_names=tool_names)
|
||||
template = "\n\n".join([prefix, tool_strings, format_instructions, suffix])
|
||||
if input_variables is None:
|
||||
input_variables = ["input"]
|
||||
input_variables = ["input", "agent_scratchpad"]
|
||||
return PromptTemplate(template=template, input_variables=input_variables)
|
||||
|
||||
@classmethod
|
||||
@@ -101,7 +101,7 @@ class ZeroShotAgent(Agent):
|
||||
return get_action_and_input(text)
|
||||
|
||||
|
||||
class MRKLChain(ZeroShotAgent):
|
||||
class MRKLChain(AgentExecutor):
|
||||
"""Chain that implements the MRKL system.
|
||||
|
||||
Example:
|
||||
@@ -116,7 +116,9 @@ class MRKLChain(ZeroShotAgent):
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def from_chains(cls, llm: LLM, chains: List[ChainConfig], **kwargs: Any) -> Agent:
|
||||
def from_chains(
|
||||
cls, llm: BaseLLM, chains: List[ChainConfig], **kwargs: Any
|
||||
) -> AgentExecutor:
|
||||
"""User friendly way to initialize the MRKL chain.
|
||||
|
||||
This is intended to be an easy way to get up and running with the
|
||||
@@ -156,4 +158,5 @@ class MRKLChain(ZeroShotAgent):
|
||||
Tool(name=c.action_name, func=c.action, description=c.action_description)
|
||||
for c in chains
|
||||
]
|
||||
return cls.from_llm_and_tools(llm, tools, **kwargs)
|
||||
agent = ZeroShotAgent.from_llm_and_tools(llm, tools)
|
||||
return cls(agent=agent, tools=tools, **kwargs)
|
||||
|
||||
@@ -12,4 +12,5 @@ Thought: I now know the final answer
|
||||
Final Answer: the final answer to the original input question"""
|
||||
SUFFIX = """Begin!
|
||||
|
||||
Question: {input}"""
|
||||
Question: {input}
|
||||
Thought:{agent_scratchpad}"""
|
||||
|
||||
@@ -1,24 +1,26 @@
|
||||
"""Chain that implements the ReAct paper from https://arxiv.org/pdf/2210.03629.pdf."""
|
||||
import re
|
||||
from typing import Any, ClassVar, List, Optional, Tuple
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from langchain.agents.agent import Agent
|
||||
from langchain.agents.agent import Agent, AgentExecutor
|
||||
from langchain.agents.react.textworld_prompt import TEXTWORLD_PROMPT
|
||||
from langchain.agents.react.wiki_prompt import WIKI_PROMPT
|
||||
from langchain.agents.tools import Tool
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.docstore.base import Docstore
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.llms.base import LLM
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
|
||||
|
||||
class ReActDocstoreAgent(Agent, BaseModel):
|
||||
"""Agent for the ReAct chin."""
|
||||
|
||||
prompt: ClassVar[BasePromptTemplate] = WIKI_PROMPT
|
||||
@classmethod
|
||||
def create_prompt(cls, tools: List[Tool]) -> BasePromptTemplate:
|
||||
"""Return default prompt."""
|
||||
return WIKI_PROMPT
|
||||
|
||||
i: int = 1
|
||||
|
||||
@@ -64,7 +66,7 @@ class ReActDocstoreAgent(Agent, BaseModel):
|
||||
|
||||
@property
|
||||
def _stop(self) -> List[str]:
|
||||
return [f"\nObservation {self.i}: "]
|
||||
return [f"\nObservation {self.i}:"]
|
||||
|
||||
@property
|
||||
def llm_prefix(self) -> str:
|
||||
@@ -100,9 +102,10 @@ class DocstoreExplorer:
|
||||
class ReActTextWorldAgent(ReActDocstoreAgent, BaseModel):
|
||||
"""Agent for the ReAct TextWorld chain."""
|
||||
|
||||
prompt: ClassVar[BasePromptTemplate] = TEXTWORLD_PROMPT
|
||||
|
||||
i: int = 1
|
||||
@classmethod
|
||||
def create_prompt(cls, tools: List[Tool]) -> BasePromptTemplate:
|
||||
"""Return default prompt."""
|
||||
return TEXTWORLD_PROMPT
|
||||
|
||||
@classmethod
|
||||
def _validate_tools(cls, tools: List[Tool]) -> None:
|
||||
@@ -113,7 +116,7 @@ class ReActTextWorldAgent(ReActDocstoreAgent, BaseModel):
|
||||
raise ValueError(f"Tool name should be Play, got {tool_names}")
|
||||
|
||||
|
||||
class ReActChain(ReActDocstoreAgent):
|
||||
class ReActChain(AgentExecutor):
|
||||
"""Chain that implements the ReAct paper.
|
||||
|
||||
Example:
|
||||
@@ -123,12 +126,12 @@ class ReActChain(ReActDocstoreAgent):
|
||||
react = ReAct(llm=OpenAI())
|
||||
"""
|
||||
|
||||
def __init__(self, llm: LLM, docstore: Docstore, **kwargs: Any):
|
||||
def __init__(self, llm: BaseLLM, docstore: Docstore, **kwargs: Any):
|
||||
"""Initialize with the LLM and a docstore."""
|
||||
docstore_explorer = DocstoreExplorer(docstore)
|
||||
tools = [
|
||||
Tool(name="Search", func=docstore_explorer.search),
|
||||
Tool(name="Lookup", func=docstore_explorer.lookup),
|
||||
]
|
||||
llm_chain = LLMChain(llm=llm, prompt=WIKI_PROMPT)
|
||||
super().__init__(llm_chain=llm_chain, tools=tools, **kwargs)
|
||||
agent = ReActDocstoreAgent.from_llm_and_tools(llm, tools)
|
||||
super().__init__(agent=agent, tools=tools, **kwargs)
|
||||
|
||||
@@ -44,6 +44,9 @@ Action 4: Finish[yes]
|
||||
|
||||
"""
|
||||
]
|
||||
SUFFIX = """\n\nSetup: {input}"""
|
||||
SUFFIX = """\n\nSetup: {input}
|
||||
{agent_scratchpad}"""
|
||||
|
||||
TEXTWORLD_PROMPT = PromptTemplate.from_examples(EXAMPLES, SUFFIX, ["input"])
|
||||
TEXTWORLD_PROMPT = PromptTemplate.from_examples(
|
||||
EXAMPLES, SUFFIX, ["input", "agent_scratchpad"]
|
||||
)
|
||||
|
||||
@@ -107,6 +107,9 @@ Thought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urys
|
||||
and Leonid Levin have the same type of work.
|
||||
Action 3: Finish[yes]""",
|
||||
]
|
||||
SUFFIX = """\n\nQuestion: {input}"""
|
||||
SUFFIX = """\n\nQuestion: {input}
|
||||
{agent_scratchpad}"""
|
||||
|
||||
WIKI_PROMPT = PromptTemplate.from_examples(EXAMPLES, SUFFIX, ["input"])
|
||||
WIKI_PROMPT = PromptTemplate.from_examples(
|
||||
EXAMPLES, SUFFIX, ["input", "agent_scratchpad"]
|
||||
)
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
"""Chain that does self ask with search."""
|
||||
from typing import Any, ClassVar, List, Optional, Tuple
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
from langchain.agents.agent import Agent
|
||||
from langchain.agents.agent import Agent, AgentExecutor
|
||||
from langchain.agents.self_ask_with_search.prompt import PROMPT
|
||||
from langchain.agents.tools import Tool
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.llms.base import LLM
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.serpapi import SerpAPIWrapper
|
||||
|
||||
@@ -13,7 +12,10 @@ from langchain.serpapi import SerpAPIWrapper
|
||||
class SelfAskWithSearchAgent(Agent):
|
||||
"""Agent for the self-ask-with-search paper."""
|
||||
|
||||
prompt: ClassVar[BasePromptTemplate] = PROMPT
|
||||
@classmethod
|
||||
def create_prompt(cls, tools: List[Tool]) -> BasePromptTemplate:
|
||||
"""Prompt does not depend on tools."""
|
||||
return PROMPT
|
||||
|
||||
@classmethod
|
||||
def _validate_tools(cls, tools: List[Tool]) -> None:
|
||||
@@ -58,10 +60,10 @@ class SelfAskWithSearchAgent(Agent):
|
||||
@property
|
||||
def starter_string(self) -> str:
|
||||
"""Put this string after user input but before first LLM call."""
|
||||
return "\nAre follow up questions needed here:"
|
||||
return "Are follow up questions needed here:"
|
||||
|
||||
|
||||
class SelfAskWithSearchChain(SelfAskWithSearchAgent):
|
||||
class SelfAskWithSearchChain(AgentExecutor):
|
||||
"""Chain that does self ask with search.
|
||||
|
||||
Example:
|
||||
@@ -72,8 +74,8 @@ class SelfAskWithSearchChain(SelfAskWithSearchAgent):
|
||||
self_ask = SelfAskWithSearchChain(llm=OpenAI(), search_chain=search_chain)
|
||||
"""
|
||||
|
||||
def __init__(self, llm: LLM, search_chain: SerpAPIWrapper, **kwargs: Any):
|
||||
def __init__(self, llm: BaseLLM, search_chain: SerpAPIWrapper, **kwargs: Any):
|
||||
"""Initialize with just an LLM and a search chain."""
|
||||
search_tool = Tool(name="Intermediate Answer", func=search_chain.run)
|
||||
llm_chain = LLMChain(llm=llm, prompt=PROMPT)
|
||||
super().__init__(llm_chain=llm_chain, tools=[search_tool], **kwargs)
|
||||
agent = SelfAskWithSearchAgent.from_llm_and_tools(llm, [search_tool])
|
||||
super().__init__(agent=agent, tools=[search_tool], **kwargs)
|
||||
|
||||
@@ -37,5 +37,8 @@ Follow up: Where is Martin Campbell from?
|
||||
Intermediate answer: New Zealand.
|
||||
So the final answer is: No
|
||||
|
||||
Question: {input}"""
|
||||
PROMPT = PromptTemplate(input_variables=["input"], template=_DEFAULT_TEMPLATE)
|
||||
Question: {input}
|
||||
Are followup questions needed here:{agent_scratchpad}"""
|
||||
PROMPT = PromptTemplate(
|
||||
input_variables=["input", "agent_scratchpad"], template=_DEFAULT_TEMPLATE
|
||||
)
|
||||
|
||||
96
langchain/cache.py
Normal file
96
langchain/cache.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""Beta Feature: base interface for cache."""
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from sqlalchemy import Column, Integer, String, create_engine, select
|
||||
from sqlalchemy.engine.base import Engine
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from langchain.schema import Generation
|
||||
|
||||
RETURN_VAL_TYPE = List[Generation]
|
||||
|
||||
|
||||
class BaseCache(ABC):
|
||||
"""Base interface for cache."""
|
||||
|
||||
@abstractmethod
|
||||
def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
|
||||
"""Look up based on prompt and llm_string."""
|
||||
|
||||
@abstractmethod
|
||||
def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
|
||||
"""Update cache based on prompt and llm_string."""
|
||||
|
||||
|
||||
class InMemoryCache(BaseCache):
|
||||
"""Cache that stores things in memory."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize with empty cache."""
|
||||
self._cache: Dict[Tuple[str, str], RETURN_VAL_TYPE] = {}
|
||||
|
||||
def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
|
||||
"""Look up based on prompt and llm_string."""
|
||||
return self._cache.get((prompt, llm_string), None)
|
||||
|
||||
def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
|
||||
"""Update cache based on prompt and llm_string."""
|
||||
self._cache[(prompt, llm_string)] = return_val
|
||||
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
class FullLLMCache(Base): # type: ignore
|
||||
"""SQLite table for full LLM Cache (all generations)."""
|
||||
|
||||
__tablename__ = "full_llm_cache"
|
||||
prompt = Column(String, primary_key=True)
|
||||
llm = Column(String, primary_key=True)
|
||||
idx = Column(Integer, primary_key=True)
|
||||
response = Column(String)
|
||||
|
||||
|
||||
class SQLAlchemyCache(BaseCache):
|
||||
"""Cache that uses SQAlchemy as a backend."""
|
||||
|
||||
def __init__(self, engine: Engine):
|
||||
"""Initialize by creating all tables."""
|
||||
self.engine = engine
|
||||
Base.metadata.create_all(self.engine)
|
||||
|
||||
def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]:
|
||||
"""Look up based on prompt and llm_string."""
|
||||
stmt = (
|
||||
select(FullLLMCache.response)
|
||||
.where(FullLLMCache.prompt == prompt)
|
||||
.where(FullLLMCache.llm == llm_string)
|
||||
.order_by(FullLLMCache.idx)
|
||||
)
|
||||
with Session(self.engine) as session:
|
||||
generations = []
|
||||
for row in session.execute(stmt):
|
||||
generations.append(Generation(text=row[0]))
|
||||
if len(generations) > 0:
|
||||
return generations
|
||||
return None
|
||||
|
||||
def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None:
|
||||
"""Look up based on prompt and llm_string."""
|
||||
for i, generation in enumerate(return_val):
|
||||
item = FullLLMCache(
|
||||
prompt=prompt, llm=llm_string, response=generation.text, idx=i
|
||||
)
|
||||
with Session(self.engine) as session, session.begin():
|
||||
session.add(item)
|
||||
|
||||
|
||||
class SQLiteCache(SQLAlchemyCache):
|
||||
"""Cache that uses SQLite as a backend."""
|
||||
|
||||
def __init__(self, database_path: str = ".langchain.db"):
|
||||
"""Initialize by creating the engine and all tables."""
|
||||
engine = create_engine(f"sqlite:///{database_path}")
|
||||
super().__init__(engine)
|
||||
@@ -2,24 +2,37 @@
|
||||
from langchain.chains.api.base import APIChain
|
||||
from langchain.chains.conversation.base import ConversationChain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.chains.llm_bash.base import LLMBashChain
|
||||
from langchain.chains.llm_checker.base import LLMCheckerChain
|
||||
from langchain.chains.llm_math.base import LLMMathChain
|
||||
from langchain.chains.llm_requests import LLMRequestsChain
|
||||
from langchain.chains.mapreduce import MapReduceChain
|
||||
from langchain.chains.moderation import OpenAIModerationChain
|
||||
from langchain.chains.pal.base import PALChain
|
||||
from langchain.chains.qa_with_sources.base import QAWithSourcesChain
|
||||
from langchain.chains.qa_with_sources.vector_db import VectorDBQAWithSourcesChain
|
||||
from langchain.chains.sequential import SequentialChain, SimpleSequentialChain
|
||||
from langchain.chains.sql_database.base import SQLDatabaseChain
|
||||
from langchain.chains.transform import TransformChain
|
||||
from langchain.chains.vector_db_qa.base import VectorDBQA
|
||||
|
||||
__all__ = [
|
||||
"APIChain",
|
||||
"ConversationChain",
|
||||
"LLMChain",
|
||||
"LLMBashChain",
|
||||
"LLMCheckerChain",
|
||||
"LLMMathChain",
|
||||
"PALChain",
|
||||
"QAWithSourcesChain",
|
||||
"SQLDatabaseChain",
|
||||
"VectorDBQA",
|
||||
"SequentialChain",
|
||||
"SimpleSequentialChain",
|
||||
"ConversationChain",
|
||||
"QAWithSourcesChain",
|
||||
"VectorDBQA",
|
||||
"VectorDBQAWithSourcesChain",
|
||||
"PALChain",
|
||||
"APIChain",
|
||||
"LLMRequestsChain",
|
||||
"TransformChain",
|
||||
"MapReduceChain",
|
||||
"OpenAIModerationChain",
|
||||
]
|
||||
|
||||
@@ -3,24 +3,14 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
from pydantic import BaseModel, root_validator
|
||||
|
||||
from langchain.chains.api.prompt import API_RESPONSE_PROMPT, API_URL_PROMPT
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.input import print_text
|
||||
from langchain.llms.base import LLM
|
||||
|
||||
|
||||
class RequestsWrapper(BaseModel):
|
||||
"""Lightweight wrapper to partial out everything except the url to hit."""
|
||||
|
||||
headers: Optional[dict] = None
|
||||
|
||||
def run(self, url: str) -> str:
|
||||
"""Hit the URL and return the text."""
|
||||
return requests.get(url, headers=self.headers).text
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.requests import RequestsWrapper
|
||||
|
||||
|
||||
class APIChain(Chain, BaseModel):
|
||||
@@ -91,7 +81,7 @@ class APIChain(Chain, BaseModel):
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_api_docs(
|
||||
cls, llm: LLM, api_docs: str, headers: Optional[dict] = None, **kwargs: Any
|
||||
cls, llm: BaseLLM, api_docs: str, headers: Optional[dict] = None, **kwargs: Any
|
||||
) -> APIChain:
|
||||
"""Load chain from just an LLM and the api docs."""
|
||||
get_request_chain = LLMChain(llm=llm, prompt=API_URL_PROMPT)
|
||||
|
||||
32
langchain/chains/api/news_docs.py
Normal file
32
langchain/chains/api/news_docs.py
Normal file
@@ -0,0 +1,32 @@
|
||||
# flake8: noqa
|
||||
NEWS_DOCS = """API documentation:
|
||||
Endpoint: https://newsapi.org
|
||||
Top headlines /v2/top-headlines
|
||||
|
||||
This endpoint provides live top and breaking headlines for a country, specific category in a country, single source, or multiple sources. You can also search with keywords. Articles are sorted by the earliest date published first.
|
||||
|
||||
This endpoint is great for retrieving headlines for use with news tickers or similar.
|
||||
Request parameters
|
||||
|
||||
country | The 2-letter ISO 3166-1 code of the country you want to get headlines for. Possible options: ae ar at au be bg br ca ch cn co cu cz de eg fr gb gr hk hu id ie il in it jp kr lt lv ma mx my ng nl no nz ph pl pt ro rs ru sa se sg si sk th tr tw ua us ve za. Note: you can't mix this param with the sources param.
|
||||
category | The category you want to get headlines for. Possible options: business entertainment general health science sports technology. Note: you can't mix this param with the sources param.
|
||||
sources | A comma-seperated string of identifiers for the news sources or blogs you want headlines from. Use the /top-headlines/sources endpoint to locate these programmatically or look at the sources index. Note: you can't mix this param with the country or category params.
|
||||
q | Keywords or a phrase to search for.
|
||||
pageSize | int | The number of results to return per page (request). 20 is the default, 100 is the maximum.
|
||||
page | int | Use this to page through the results if the total results found is greater than the page size.
|
||||
|
||||
Response object
|
||||
status | string | If the request was successful or not. Options: ok, error. In the case of error a code and message property will be populated.
|
||||
totalResults | int | The total number of results available for your request.
|
||||
articles | array[article] | The results of the request.
|
||||
source | object | The identifier id and a display name name for the source this article came from.
|
||||
author | string | The author of the article
|
||||
title | string | The headline or title of the article.
|
||||
description | string | A description or snippet from the article.
|
||||
url | string | The direct URL to the article.
|
||||
urlToImage | string | The URL to a relevant image for the article.
|
||||
publishedAt | string | The date and time that the article was published, in UTC (+000)
|
||||
content | string | The unformatted content of the article, where available. This is truncated to 200 chars.
|
||||
|
||||
Use page size: 2
|
||||
"""
|
||||
33
langchain/chains/api/open_meteo_docs.py
Normal file
33
langchain/chains/api/open_meteo_docs.py
Normal file
@@ -0,0 +1,33 @@
|
||||
# flake8: noqa
|
||||
OPEN_METEO_DOCS = """BASE URL: https://api.open-meteo.com/
|
||||
|
||||
API Documentation
|
||||
The API endpoint /v1/forecast accepts a geographical coordinate, a list of weather variables and responds with a JSON hourly weather forecast for 7 days. Time always starts at 0:00 today and contains 168 hours. All URL parameters are listed below:
|
||||
|
||||
Parameter Format Required Default Description
|
||||
latitude, longitude Floating point Yes Geographical WGS84 coordinate of the location
|
||||
hourly String array No A list of weather variables which should be returned. Values can be comma separated, or multiple &hourly= parameter in the URL can be used.
|
||||
daily String array No A list of daily weather variable aggregations which should be returned. Values can be comma separated, or multiple &daily= parameter in the URL can be used. If daily weather variables are specified, parameter timezone is required.
|
||||
current_weather Bool No false Include current weather conditions in the JSON output.
|
||||
temperature_unit String No celsius If fahrenheit is set, all temperature values are converted to Fahrenheit.
|
||||
windspeed_unit String No kmh Other wind speed speed units: ms, mph and kn
|
||||
precipitation_unit String No mm Other precipitation amount units: inch
|
||||
timeformat String No iso8601 If format unixtime is selected, all time values are returned in UNIX epoch time in seconds. Please note that all timestamp are in GMT+0! For daily values with unix timestamps, please apply utc_offset_seconds again to get the correct date.
|
||||
timezone String No GMT If timezone is set, all timestamps are returned as local-time and data is returned starting at 00:00 local-time. Any time zone name from the time zone database is supported. If auto is set as a time zone, the coordinates will be automatically resolved to the local time zone.
|
||||
past_days Integer (0-2) No 0 If past_days is set, yesterday or the day before yesterday data are also returned.
|
||||
start_date
|
||||
end_date String (yyyy-mm-dd) No The time interval to get weather data. A day must be specified as an ISO8601 date (e.g. 2022-06-30).
|
||||
models String array No auto Manually select one or more weather models. Per default, the best suitable weather models will be combined.
|
||||
|
||||
Hourly Parameter Definition
|
||||
The parameter &hourly= accepts the following values. Most weather variables are given as an instantaneous value for the indicated hour. Some variables like precipitation are calculated from the preceding hour as an average or sum.
|
||||
|
||||
Variable Valid time Unit Description
|
||||
temperature_2m Instant °C (°F) Air temperature at 2 meters above ground
|
||||
snowfall Preceding hour sum cm (inch) Snowfall amount of the preceding hour in centimeters. For the water equivalent in millimeter, divide by 7. E.g. 7 cm snow = 10 mm precipitation water equivalent
|
||||
rain Preceding hour sum mm (inch) Rain from large scale weather systems of the preceding hour in millimeter
|
||||
showers Preceding hour sum mm (inch) Showers from convective precipitation in millimeters from the preceding hour
|
||||
weathercode Instant WMO code Weather condition as a numeric code. Follow WMO weather interpretation codes. See table below for details.
|
||||
snow_depth Instant meters Snow depth on the ground
|
||||
freezinglevel_height Instant meters Altitude above sea level of the 0°C level
|
||||
visibility Instant meters Viewing distance in meters. Influenced by low clouds, humidity and aerosols. Maximum visibility is approximately 24 km."""
|
||||
37
langchain/chains/api/tmdb_docs.py
Normal file
37
langchain/chains/api/tmdb_docs.py
Normal file
@@ -0,0 +1,37 @@
|
||||
# flake8: noqa
|
||||
TMDB_DOCS = """API documentation:
|
||||
Endpoint: https://api.themoviedb.org/3
|
||||
GET /search/movie
|
||||
|
||||
This API is for searching movies.
|
||||
|
||||
Query parameters table:
|
||||
language | string | Pass a ISO 639-1 value to display translated data for the fields that support it. minLength: 2, pattern: ([a-z]{2})-([A-Z]{2}), default: en-US | optional
|
||||
query | string | Pass a text query to search. This value should be URI encoded. minLength: 1 | required
|
||||
page | integer | Specify which page to query. minimum: 1, maximum: 1000, default: 1 | optional
|
||||
include_adult | boolean | Choose whether to inlcude adult (pornography) content in the results. default | optional
|
||||
region | string | Specify a ISO 3166-1 code to filter release dates. Must be uppercase. pattern: ^[A-Z]{2}$ | optional
|
||||
year | integer | optional
|
||||
primary_release_year | integer | optional
|
||||
|
||||
Response schema (JSON object):
|
||||
page | integer | optional
|
||||
total_results | integer | optional
|
||||
total_pages | integer | optional
|
||||
results | array[object] (Movie List Result Object)
|
||||
|
||||
Each object in the "results" key has the following schema:
|
||||
poster_path | string or null | optional
|
||||
adult | boolean | optional
|
||||
overview | string | optional
|
||||
release_date | string | optional
|
||||
genre_ids | array[integer] | optional
|
||||
id | integer | optional
|
||||
original_title | string | optional
|
||||
original_language | string | optional
|
||||
title | string | optional
|
||||
backdrop_path | string or null | optional
|
||||
popularity | number | optional
|
||||
vote_count | integer | optional
|
||||
video | boolean | optional
|
||||
vote_average | number | optional"""
|
||||
@@ -1,8 +1,10 @@
|
||||
"""Base interface that all chains should implement."""
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from pydantic import BaseModel, Extra
|
||||
from pydantic import BaseModel, Extra, Field
|
||||
|
||||
import langchain
|
||||
|
||||
|
||||
class Memory(BaseModel, ABC):
|
||||
@@ -27,13 +29,21 @@ class Memory(BaseModel, ABC):
|
||||
def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
|
||||
"""Save the context of this model run to memory."""
|
||||
|
||||
@abstractmethod
|
||||
def clear(self) -> None:
|
||||
"""Clear memory contents."""
|
||||
|
||||
|
||||
def _get_verbosity() -> bool:
|
||||
return langchain.verbose
|
||||
|
||||
|
||||
class Chain(BaseModel, ABC):
|
||||
"""Base interface that all chains should implement."""
|
||||
|
||||
memory: Optional[Memory] = None
|
||||
|
||||
verbose: bool = False
|
||||
verbose: bool = Field(default_factory=_get_verbosity)
|
||||
"""Whether to print out response text."""
|
||||
|
||||
@property
|
||||
@@ -64,18 +74,28 @@ class Chain(BaseModel, ABC):
|
||||
"""Run the logic of this chain and return the output."""
|
||||
|
||||
def __call__(
|
||||
self, inputs: Dict[str, Any], return_only_outputs: bool = False
|
||||
) -> Dict[str, str]:
|
||||
self, inputs: Union[Dict[str, Any], Any], return_only_outputs: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""Run the logic of this chain and add to output if desired.
|
||||
|
||||
Args:
|
||||
inputs: Dictionary of inputs.
|
||||
inputs: Dictionary of inputs, or single input if chain expects
|
||||
only one param.
|
||||
return_only_outputs: boolean for whether to return only outputs in the
|
||||
response. If True, only new keys generated by this chain will be
|
||||
returned. If False, both input keys and new keys generated by this
|
||||
chain will be returned. Defaults to False.
|
||||
|
||||
"""
|
||||
if not isinstance(inputs, dict):
|
||||
if len(self.input_keys) != 1:
|
||||
raise ValueError(
|
||||
f"A single string input was passed in, but this chain expects "
|
||||
f"multiple inputs ({self.input_keys}). When a chain expects "
|
||||
f"multiple inputs, please call it by passing in a dictionary, "
|
||||
"eg `chain({'foo': 1, 'bar': 2})`"
|
||||
)
|
||||
inputs = {self.input_keys[0]: inputs}
|
||||
if self.memory is not None:
|
||||
external_context = self.memory.load_memory_variables(inputs)
|
||||
inputs = dict(inputs, **external_context)
|
||||
@@ -99,16 +119,23 @@ class Chain(BaseModel, ABC):
|
||||
"""Call the chain on all inputs in the list."""
|
||||
return [self(inputs) for inputs in input_list]
|
||||
|
||||
def run(self, text: str) -> str:
|
||||
"""Run text in, text out (if applicable)."""
|
||||
if len(self.input_keys) != 1:
|
||||
raise ValueError(
|
||||
f"`run` not supported when there is not exactly "
|
||||
f"one input key, got {self.input_keys}."
|
||||
)
|
||||
def run(self, *args: str, **kwargs: str) -> str:
|
||||
"""Run the chain as text in, text out or multiple variables, text out."""
|
||||
if len(self.output_keys) != 1:
|
||||
raise ValueError(
|
||||
f"`run` not supported when there is not exactly "
|
||||
f"one output key, got {self.output_keys}."
|
||||
f"one output key. Got {self.output_keys}."
|
||||
)
|
||||
return self({self.input_keys[0]: text})[self.output_keys[0]]
|
||||
|
||||
if args and not kwargs:
|
||||
if len(args) != 1:
|
||||
raise ValueError("`run` supports only one positional argument.")
|
||||
return self(args[0])[self.output_keys[0]]
|
||||
|
||||
if kwargs and not args:
|
||||
return self(kwargs)[self.output_keys[0]]
|
||||
|
||||
raise ValueError(
|
||||
f"`run` supported with either positional arguments or keyword arguments"
|
||||
f" but not both. Got args: {args} and kwargs: {kwargs}."
|
||||
)
|
||||
|
||||
1
langchain/chains/combine_documents/__init__.py
Normal file
1
langchain/chains/combine_documents/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Different ways to combine documents."""
|
||||
50
langchain/chains/combine_documents/base.py
Normal file
50
langchain/chains/combine_documents/base.py
Normal file
@@ -0,0 +1,50 @@
|
||||
"""Base interface for chains combining documents."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.docstore.document import Document
|
||||
|
||||
|
||||
class BaseCombineDocumentsChain(Chain, BaseModel, ABC):
|
||||
"""Base interface for chains combining documents."""
|
||||
|
||||
input_key: str = "input_documents" #: :meta private:
|
||||
output_key: str = "output_text" #: :meta private:
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Expect input key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return output key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.output_key]
|
||||
|
||||
def prompt_length(self, docs: List[Document], **kwargs: Any) -> Optional[int]:
|
||||
"""Return the prompt length given the documents passed in.
|
||||
|
||||
Returns None if the method does not depend on the prompt length.
|
||||
"""
|
||||
return None
|
||||
|
||||
@abstractmethod
|
||||
def combine_docs(self, docs: List[Document], **kwargs: Any) -> str:
|
||||
"""Combine documents into a single string."""
|
||||
|
||||
def _call(self, inputs: Dict[str, Any]) -> Dict[str, str]:
|
||||
docs = inputs[self.input_key]
|
||||
# Other keys are assumed to be needed for LLM prediction
|
||||
other_keys = {k: v for k, v in inputs.items() if k != self.input_key}
|
||||
output = self.combine_docs(docs, **other_keys)
|
||||
return {self.output_key: output}
|
||||
137
langchain/chains/combine_documents/map_reduce.py
Normal file
137
langchain/chains/combine_documents/map_reduce.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""Combining documents by mapping a chain over them first, then combining results."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, root_validator
|
||||
|
||||
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.docstore.document import Document
|
||||
|
||||
|
||||
def _split_list_of_docs(
|
||||
docs: List[Document], length_func: Callable, token_max: int, **kwargs: Any
|
||||
) -> List[List[Document]]:
|
||||
new_result_doc_list = []
|
||||
_sub_result_docs = []
|
||||
for doc in docs:
|
||||
_sub_result_docs.append(doc)
|
||||
_num_tokens = length_func(_sub_result_docs, **kwargs)
|
||||
if _num_tokens > token_max:
|
||||
if len(_sub_result_docs) == 1:
|
||||
raise ValueError(
|
||||
"A single document was longer than the context length,"
|
||||
" we cannot handle this."
|
||||
)
|
||||
if len(_sub_result_docs) == 2:
|
||||
raise ValueError(
|
||||
"A single document was so long it could not be combined "
|
||||
"with another document, we cannot handle this."
|
||||
)
|
||||
new_result_doc_list.append(_sub_result_docs[:-1])
|
||||
_sub_result_docs = _sub_result_docs[-1:]
|
||||
new_result_doc_list.append(_sub_result_docs)
|
||||
return new_result_doc_list
|
||||
|
||||
|
||||
def _collapse_docs(
|
||||
docs: List[Document],
|
||||
combine_document_func: Callable,
|
||||
**kwargs: Any,
|
||||
) -> Document:
|
||||
result = combine_document_func(docs, **kwargs)
|
||||
combined_metadata = {k: str(v) for k, v in docs[0].metadata.items()}
|
||||
for doc in docs[1:]:
|
||||
for k, v in doc.metadata.items():
|
||||
if k in combined_metadata:
|
||||
combined_metadata[k] += f", {v}"
|
||||
else:
|
||||
combined_metadata[k] = str(v)
|
||||
return Document(page_content=result, metadata=combined_metadata)
|
||||
|
||||
|
||||
class MapReduceDocumentsChain(BaseCombineDocumentsChain, BaseModel):
|
||||
"""Combining documents by mapping a chain over them, then combining results."""
|
||||
|
||||
llm_chain: LLMChain
|
||||
"""Chain to apply to each document individually."""
|
||||
combine_document_chain: BaseCombineDocumentsChain
|
||||
"""Chain to use to combine results of applying llm_chain to documents."""
|
||||
collapse_document_chain: Optional[BaseCombineDocumentsChain] = None
|
||||
"""Chain to use to collapse intermediary results if needed.
|
||||
If None, will use the combine_document_chain."""
|
||||
document_variable_name: str
|
||||
"""The variable name in the llm_chain to put the documents in.
|
||||
If only one variable in the llm_chain, this need not be provided."""
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
@root_validator(pre=True)
|
||||
def get_default_document_variable_name(cls, values: Dict) -> Dict:
|
||||
"""Get default document variable name, if not provided."""
|
||||
if "document_variable_name" not in values:
|
||||
llm_chain_variables = values["llm_chain"].prompt.input_variables
|
||||
if len(llm_chain_variables) == 1:
|
||||
values["document_variable_name"] = llm_chain_variables[0]
|
||||
else:
|
||||
raise ValueError(
|
||||
"document_variable_name must be provided if there are "
|
||||
"multiple llm_chain input_variables"
|
||||
)
|
||||
else:
|
||||
llm_chain_variables = values["llm_chain"].prompt.input_variables
|
||||
if values["document_variable_name"] not in llm_chain_variables:
|
||||
raise ValueError(
|
||||
f"document_variable_name {values['document_variable_name']} was "
|
||||
f"not found in llm_chain input_variables: {llm_chain_variables}"
|
||||
)
|
||||
return values
|
||||
|
||||
@property
|
||||
def _collapse_chain(self) -> BaseCombineDocumentsChain:
|
||||
if self.collapse_document_chain is not None:
|
||||
return self.collapse_document_chain
|
||||
else:
|
||||
return self.combine_document_chain
|
||||
|
||||
def combine_docs(
|
||||
self, docs: List[Document], token_max: int = 3000, **kwargs: Any
|
||||
) -> str:
|
||||
"""Combine documents in a map reduce manner.
|
||||
|
||||
Combine by mapping first chain over all documents, then reducing the results.
|
||||
This reducing can be done recursively if needed (if there are many documents).
|
||||
"""
|
||||
results = self.llm_chain.apply(
|
||||
# FYI - this is parallelized and so it is fast.
|
||||
[{**{self.document_variable_name: d.page_content}, **kwargs} for d in docs]
|
||||
)
|
||||
question_result_key = self.llm_chain.output_key
|
||||
result_docs = [
|
||||
Document(page_content=r[question_result_key], metadata=docs[i].metadata)
|
||||
# This uses metadata from the docs, and the textual results from `results`
|
||||
for i, r in enumerate(results)
|
||||
]
|
||||
length_func = self.combine_document_chain.prompt_length
|
||||
num_tokens = length_func(result_docs, **kwargs)
|
||||
while num_tokens is not None and num_tokens > token_max:
|
||||
new_result_doc_list = _split_list_of_docs(
|
||||
result_docs, length_func, token_max, **kwargs
|
||||
)
|
||||
result_docs = []
|
||||
for docs in new_result_doc_list:
|
||||
new_doc = _collapse_docs(
|
||||
docs, self._collapse_chain.combine_docs, **kwargs
|
||||
)
|
||||
result_docs.append(new_doc)
|
||||
num_tokens = self.combine_document_chain.prompt_length(
|
||||
result_docs, **kwargs
|
||||
)
|
||||
output = self.combine_document_chain.combine_docs(result_docs, **kwargs)
|
||||
return output
|
||||
88
langchain/chains/combine_documents/refine.py
Normal file
88
langchain/chains/combine_documents/refine.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""Combining documents by doing a first pass and then refining on more documents."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from pydantic import BaseModel, Extra, Field, root_validator
|
||||
|
||||
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
|
||||
|
||||
def _get_default_document_prompt() -> PromptTemplate:
|
||||
return PromptTemplate(input_variables=["page_content"], template="{page_content}")
|
||||
|
||||
|
||||
class RefineDocumentsChain(BaseCombineDocumentsChain, BaseModel):
|
||||
"""Combine documents by doing a first pass and then refining on more documents."""
|
||||
|
||||
initial_llm_chain: LLMChain
|
||||
"""LLM chain to use on initial document."""
|
||||
refine_llm_chain: LLMChain
|
||||
"""LLM chain to use when refining."""
|
||||
document_variable_name: str
|
||||
"""The variable name in the initial_llm_chain to put the documents in.
|
||||
If only one variable in the initial_llm_chain, this need not be provided."""
|
||||
initial_response_name: str
|
||||
"""The variable name to format the initial response in when refining."""
|
||||
document_prompt: BasePromptTemplate = Field(
|
||||
default_factory=_get_default_document_prompt
|
||||
)
|
||||
"""Prompt to use to format each document."""
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
|
||||
extra = Extra.forbid
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
@root_validator(pre=True)
|
||||
def get_default_document_variable_name(cls, values: Dict) -> Dict:
|
||||
"""Get default document variable name, if not provided."""
|
||||
if "document_variable_name" not in values:
|
||||
llm_chain_variables = values["initial_llm_chain"].prompt.input_variables
|
||||
if len(llm_chain_variables) == 1:
|
||||
values["document_variable_name"] = llm_chain_variables[0]
|
||||
else:
|
||||
raise ValueError(
|
||||
"document_variable_name must be provided if there are "
|
||||
"multiple llm_chain input_variables"
|
||||
)
|
||||
else:
|
||||
llm_chain_variables = values["initial_llm_chain"].prompt.input_variables
|
||||
if values["document_variable_name"] not in llm_chain_variables:
|
||||
raise ValueError(
|
||||
f"document_variable_name {values['document_variable_name']} was "
|
||||
f"not found in llm_chain input_variables: {llm_chain_variables}"
|
||||
)
|
||||
return values
|
||||
|
||||
def combine_docs(self, docs: List[Document], **kwargs: Any) -> str:
|
||||
"""Combine by mapping first chain over all, then stuffing into final chain."""
|
||||
base_info = {"page_content": docs[0].page_content}
|
||||
base_info.update(docs[0].metadata)
|
||||
document_info = {k: base_info[k] for k in self.document_prompt.input_variables}
|
||||
base_inputs: dict = {
|
||||
self.document_variable_name: self.document_prompt.format(**document_info)
|
||||
}
|
||||
inputs = {**base_inputs, **kwargs}
|
||||
res = self.initial_llm_chain.predict(**inputs)
|
||||
for doc in docs[1:]:
|
||||
base_info = {"page_content": doc.page_content}
|
||||
base_info.update(doc.metadata)
|
||||
document_info = {
|
||||
k: base_info[k] for k in self.document_prompt.input_variables
|
||||
}
|
||||
base_inputs = {
|
||||
self.document_variable_name: self.document_prompt.format(
|
||||
**document_info
|
||||
),
|
||||
self.initial_response_name: res,
|
||||
}
|
||||
inputs = {**base_inputs, **kwargs}
|
||||
res = self.refine_llm_chain.predict(**inputs)
|
||||
return res
|
||||
@@ -1,21 +1,22 @@
|
||||
"""Document combining chain."""
|
||||
"""Chain that combines documents by stuffing into context."""
|
||||
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel, Extra, Field, root_validator
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.prompts.prompt import Prompt
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
|
||||
|
||||
def _get_default_document_prompt() -> Prompt:
|
||||
return Prompt(input_variables=["page_content"], template="{page_content}")
|
||||
def _get_default_document_prompt() -> PromptTemplate:
|
||||
return PromptTemplate(input_variables=["page_content"], template="{page_content}")
|
||||
|
||||
|
||||
class CombineDocumentsChain(Chain, BaseModel):
|
||||
"""Combine documents."""
|
||||
class StuffDocumentsChain(BaseCombineDocumentsChain, BaseModel):
|
||||
"""Chain that combines documents by stuffing into context."""
|
||||
|
||||
llm_chain: LLMChain
|
||||
"""LLM wrapper to use after formatting documents."""
|
||||
@@ -26,8 +27,6 @@ class CombineDocumentsChain(Chain, BaseModel):
|
||||
document_variable_name: str
|
||||
"""The variable name in the llm_chain to put the documents in.
|
||||
If only one variable in the llm_chain, this need not be provided."""
|
||||
input_key: str = "input_documents" #: :meta private:
|
||||
output_key: str = "output_text" #: :meta private:
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
@@ -35,22 +34,6 @@ class CombineDocumentsChain(Chain, BaseModel):
|
||||
extra = Extra.forbid
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Expect input key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return output key.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.output_key]
|
||||
|
||||
@root_validator(pre=True)
|
||||
def get_default_document_variable_name(cls, values: Dict) -> Dict:
|
||||
"""Get default document variable name, if not provided."""
|
||||
@@ -72,10 +55,7 @@ class CombineDocumentsChain(Chain, BaseModel):
|
||||
)
|
||||
return values
|
||||
|
||||
def _call(self, inputs: Dict[str, Any]) -> Dict[str, str]:
|
||||
docs = inputs[self.input_key]
|
||||
# Other keys are assumed to be needed for LLM prediction
|
||||
other_keys = {k: v for k, v in inputs.items() if k != self.input_key}
|
||||
def _get_inputs(self, docs: List[Document], **kwargs: Any) -> dict:
|
||||
# Get relevant information from each document.
|
||||
doc_dicts = []
|
||||
for doc in docs:
|
||||
@@ -88,7 +68,18 @@ class CombineDocumentsChain(Chain, BaseModel):
|
||||
# Format each document according to the prompt
|
||||
doc_strings = [self.document_prompt.format(**doc) for doc in doc_dicts]
|
||||
# Join the documents together to put them in the prompt.
|
||||
other_keys[self.document_variable_name] = "\n".join(doc_strings)
|
||||
inputs = kwargs.copy()
|
||||
inputs[self.document_variable_name] = "\n\n".join(doc_strings)
|
||||
return inputs
|
||||
|
||||
def prompt_length(self, docs: List[Document], **kwargs: Any) -> Optional[int]:
|
||||
"""Get the prompt length by formatting the prompt."""
|
||||
inputs = self._get_inputs(docs, **kwargs)
|
||||
prompt = self.llm_chain.prompt.format(**inputs)
|
||||
return self.llm_chain.llm.get_num_tokens(prompt)
|
||||
|
||||
def combine_docs(self, docs: List[Document], **kwargs: Any) -> str:
|
||||
"""Stuff all documents into one prompt and pass to LLM."""
|
||||
inputs = self._get_inputs(docs, **kwargs)
|
||||
# Call predict on the LLM.
|
||||
output = self.llm_chain.predict(**other_keys)
|
||||
return {self.output_key: output}
|
||||
return self.llm_chain.predict(**inputs)
|
||||
@@ -1,12 +1,12 @@
|
||||
"""Memory modules for conversation prompts."""
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from pydantic import BaseModel, root_validator
|
||||
from pydantic import BaseModel, Field, root_validator
|
||||
|
||||
from langchain.chains.base import Memory
|
||||
from langchain.chains.conversation.prompt import SUMMARY_PROMPT
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.llms.base import LLM
|
||||
from langchain.llms.base import BaseLLM
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
|
||||
|
||||
@@ -46,12 +46,49 @@ class ConversationBufferMemory(Memory, BaseModel):
|
||||
ai = "AI: " + outputs[list(outputs.keys())[0]]
|
||||
self.buffer += "\n" + "\n".join([human, ai])
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Clear memory contents."""
|
||||
self.buffer = ""
|
||||
|
||||
|
||||
class ConversationalBufferWindowMemory(Memory, BaseModel):
|
||||
"""Buffer for storing conversation memory."""
|
||||
|
||||
buffer: List[str] = Field(default_factory=list)
|
||||
memory_key: str = "history" #: :meta private:
|
||||
k: int = 5
|
||||
|
||||
@property
|
||||
def memory_variables(self) -> List[str]:
|
||||
"""Will always return list of memory variables.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.memory_key]
|
||||
|
||||
def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, str]:
|
||||
"""Return history buffer."""
|
||||
return {self.memory_key: "\n".join(self.buffer[-self.k :])}
|
||||
|
||||
def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
|
||||
"""Save context from this conversation to buffer."""
|
||||
prompt_input_key = _get_prompt_input_key(inputs, self.memory_variables)
|
||||
if len(outputs) != 1:
|
||||
raise ValueError(f"One output key expected, got {outputs.keys()}")
|
||||
human = "Human: " + inputs[prompt_input_key]
|
||||
ai = "AI: " + outputs[list(outputs.keys())[0]]
|
||||
self.buffer.append("\n".join([human, ai]))
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Clear memory contents."""
|
||||
self.buffer = []
|
||||
|
||||
|
||||
class ConversationSummaryMemory(Memory, BaseModel):
|
||||
"""Conversation summarizer to memory."""
|
||||
|
||||
buffer: str = ""
|
||||
llm: LLM
|
||||
llm: BaseLLM
|
||||
prompt: BasePromptTemplate = SUMMARY_PROMPT
|
||||
memory_key: str = "history" #: :meta private:
|
||||
|
||||
@@ -89,3 +126,7 @@ class ConversationSummaryMemory(Memory, BaseModel):
|
||||
new_lines = "\n".join([human, ai])
|
||||
chain = LLMChain(llm=self.llm, prompt=self.prompt)
|
||||
self.buffer = chain.predict(summary=self.buffer, new_lines=new_lines)
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Clear memory contents."""
|
||||
self.buffer = ""
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user