mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-05 08:40:36 +00:00
Compare commits
130 Commits
eugene/per
...
v0.0.201
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8e1a7a8646 | ||
|
|
e82687ddf4 | ||
|
|
7d2b946d0b | ||
|
|
c7db9febb0 | ||
|
|
7ad13cdbdb | ||
|
|
c5a46e7435 | ||
|
|
17c4ec4812 | ||
|
|
4a649e3b14 | ||
|
|
8a44c879c6 | ||
|
|
e0e3ef1c57 | ||
|
|
4555ad5d1f | ||
|
|
6ac120f299 | ||
|
|
e41f0b341c | ||
|
|
b3b155d488 | ||
|
|
e74733ab9e | ||
|
|
11ab0be11a | ||
|
|
1281fdf0f2 | ||
|
|
34ebb29726 | ||
|
|
f9edf76e7c | ||
|
|
970b2f9d38 | ||
|
|
292accde2b | ||
|
|
ee3d0513ad | ||
|
|
8fdf88b8e3 | ||
|
|
0c52275bdb | ||
|
|
cde1e8739a | ||
|
|
a9b3b2e327 | ||
|
|
6ac5d80286 | ||
|
|
ec1a2adf9c | ||
|
|
5b6bbf4ab2 | ||
|
|
2f0088039d | ||
|
|
b023f0c0f2 | ||
|
|
2c91f0d750 | ||
|
|
5922742d56 | ||
|
|
681ba6d520 | ||
|
|
7a5e36f3f5 | ||
|
|
289e9aeb9d | ||
|
|
d1561b74eb | ||
|
|
bb7ac9edb5 | ||
|
|
010d0bfeea | ||
|
|
e05997c25e | ||
|
|
c5bce4a465 | ||
|
|
2c9619bc1d | ||
|
|
18f5c985d9 | ||
|
|
a197acfcd3 | ||
|
|
18af149e91 | ||
|
|
614cff89bc | ||
|
|
a7227ee01b | ||
|
|
232faba796 | ||
|
|
d7d629911b | ||
|
|
6e90406e0f | ||
|
|
c868a3eef3 | ||
|
|
20e9ce8a62 | ||
|
|
704d56e241 | ||
|
|
b934677a81 | ||
|
|
2d038b57b2 | ||
|
|
0b740c9baa | ||
|
|
ac3e6e3944 | ||
|
|
d2270a2261 | ||
|
|
1250cd4630 | ||
|
|
f8cf09a230 | ||
|
|
e4224a396b | ||
|
|
21bd16bb59 | ||
|
|
9218684759 | ||
|
|
d5819a7ca7 | ||
|
|
0ca37e613c | ||
|
|
ca1afa7213 | ||
|
|
5f356b9993 | ||
|
|
d6f5d0c6b1 | ||
|
|
62ec10a7f5 | ||
|
|
736a1819aa | ||
|
|
f3e7ac0a2c | ||
|
|
3678cba0be | ||
|
|
7af186fddf | ||
|
|
7cc200766e | ||
|
|
db7ef635c0 | ||
|
|
0eb1bc1a02 | ||
|
|
63fcf41bea | ||
|
|
2791a753bf | ||
|
|
a09a0e3511 | ||
|
|
0ce8745928 | ||
|
|
d8ae925425 | ||
|
|
fe8bbc2da7 | ||
|
|
77c286cf02 | ||
|
|
3ec6400d70 | ||
|
|
a6ebffb695 | ||
|
|
767fa91eae | ||
|
|
5f74db4500 | ||
|
|
511c12dd39 | ||
|
|
893d20f735 | ||
|
|
35cfd25db3 | ||
|
|
658f8bdee7 | ||
|
|
5518f24ec3 | ||
|
|
b93638ef1e | ||
|
|
a1549901ce | ||
|
|
78aa59c68b | ||
|
|
ec0dd6e34a | ||
|
|
3294774148 | ||
|
|
cef79ca579 | ||
|
|
106364a45c | ||
|
|
9355e3f5f5 | ||
|
|
f15763518a | ||
|
|
137da7e4b6 | ||
|
|
9f4b720a63 | ||
|
|
76fcd96dae | ||
|
|
11fec7d4d1 | ||
|
|
0b4a51930c | ||
|
|
c66755b661 | ||
|
|
4d8cda1c3b | ||
|
|
3af36943e8 | ||
|
|
8ef7274ee6 | ||
|
|
d9fcc45d05 | ||
|
|
ce7c11625f | ||
|
|
5a207cce8f | ||
|
|
b3ae6bcd3f | ||
|
|
5468528748 | ||
|
|
69f4ffb851 | ||
|
|
2be4fbb835 | ||
|
|
062c3c00a2 | ||
|
|
92b87c2fec | ||
|
|
3954bcf396 | ||
|
|
b7999a9bc1 | ||
|
|
a0d847f636 | ||
|
|
217b5cc72d | ||
|
|
4092fd21dc | ||
|
|
2a4b32dee2 | ||
|
|
daf3e99b96 | ||
|
|
b177a29d3f | ||
|
|
65111eb2b3 | ||
|
|
0cfaa76e45 | ||
|
|
2ae2d6cd1d |
2
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
2
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
@@ -46,7 +46,7 @@ body:
|
||||
- @agola11
|
||||
|
||||
Tools / Toolkits
|
||||
- @vowelparrot
|
||||
- ...
|
||||
|
||||
placeholder: "@Username ..."
|
||||
|
||||
|
||||
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
2
.github/PULL_REQUEST_TEMPLATE.md
vendored
@@ -48,7 +48,7 @@ Tag maintainers/contributors who might be interested:
|
||||
- @agola11
|
||||
|
||||
Agents / Tools / Toolkits
|
||||
- @vowelparrot
|
||||
- @hwchase17
|
||||
|
||||
VectorStores / Retrievers / Memory
|
||||
- @dev2049
|
||||
|
||||
1
docs/_static/js/mendablesearch.js
vendored
1
docs/_static/js/mendablesearch.js
vendored
@@ -37,6 +37,7 @@ document.addEventListener('DOMContentLoaded', () => {
|
||||
style: { darkMode: false, accentColor: '#010810' },
|
||||
floatingButtonStyle: { color: '#ffffff', backgroundColor: '#010810' },
|
||||
anon_key: '82842b36-3ea6-49b2-9fb8-52cfc4bde6bf', // Mendable Search Public ANON key, ok to be public
|
||||
cmdShortcutKey:'j',
|
||||
messageSettings: {
|
||||
openSourcesInNewTab: false,
|
||||
prettySources: true // Prettify the sources displayed now
|
||||
|
||||
@@ -24,9 +24,9 @@ This guide aims to provide a comprehensive overview of the requirements for depl
|
||||
|
||||
Understanding these components is crucial when assessing serving systems. LangChain integrates with several open-source projects designed to tackle these issues, providing a robust framework for productionizing your LLM applications. Some notable frameworks include:
|
||||
|
||||
- `Ray Serve <../../../ecosystem/ray_serve.html>`_
|
||||
- `Ray Serve <../integrations/ray_serve.html>`_
|
||||
- `BentoML <https://github.com/ssheng/BentoChain>`_
|
||||
- `Modal <../../../ecosystem/modal.html>`_
|
||||
- `Modal <../integrations/modal.html>`_
|
||||
|
||||
These links will provide further information on each ecosystem, assisting you in finding the best fit for your LLM deployment needs.
|
||||
|
||||
|
||||
25
docs/ecosystem/baseten.md
Normal file
25
docs/ecosystem/baseten.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# Baseten
|
||||
|
||||
Learn how to use LangChain with models deployed on Baseten.
|
||||
|
||||
## Installation and setup
|
||||
|
||||
- Create a [Baseten](https://baseten.co) account and [API key](https://docs.baseten.co/settings/api-keys).
|
||||
- Install the Baseten Python client with `pip install baseten`
|
||||
- Use your API key to authenticate with `baseten login`
|
||||
|
||||
## Invoking a model
|
||||
|
||||
Baseten integrates with LangChain through the LLM module, which provides a standardized and interoperable interface for models that are deployed on your Baseten workspace.
|
||||
|
||||
You can deploy foundation models like WizardLM and Alpaca with one click from the [Baseten model library](https://app.baseten.co/explore/) or if you have your own model, [deploy it with this tutorial](https://docs.baseten.co/deploying-models/deploy).
|
||||
|
||||
In this example, we'll work with WizardLM. [Deploy WizardLM here](https://app.baseten.co/explore/wizardlm) and follow along with the deployed [model's version ID](https://docs.baseten.co/managing-models/manage).
|
||||
|
||||
```python
|
||||
from langchain.llms import Baseten
|
||||
|
||||
wizardlm = Baseten(model="MODEL_VERSION_ID", verbose=True)
|
||||
|
||||
wizardlm("What is the difference between a Wizard and a Sorcerer?")
|
||||
```
|
||||
21
docs/integrations/awadb.md
Normal file
21
docs/integrations/awadb.md
Normal file
@@ -0,0 +1,21 @@
|
||||
# AwaDB
|
||||
|
||||
>[AwaDB](https://github.com/awa-ai/awadb) is an AI Native database for the search and storage of embedding vectors used by LLM Applications.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
```bash
|
||||
pip install awadb
|
||||
```
|
||||
|
||||
|
||||
## VectorStore
|
||||
|
||||
There exists a wrapper around AwaDB vector databases, allowing you to use it as a vectorstore,
|
||||
whether for semantic search or example selection.
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import AwaDB
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the AwaDB wrapper, see [this notebook](../modules/indexes/vectorstores/examples/awadb.ipynb)
|
||||
36
docs/integrations/databricks.md
Normal file
36
docs/integrations/databricks.md
Normal file
@@ -0,0 +1,36 @@
|
||||
Databricks
|
||||
==========
|
||||
|
||||
The [Databricks](https://www.databricks.com/) Lakehouse Platform unifies data, analytics, and AI on one platform.
|
||||
|
||||
Databricks embraces the LangChain ecosystem in various ways:
|
||||
|
||||
1. Databricks connector for the SQLDatabase Chain: SQLDatabase.from_databricks() provides an easy way to query your data on Databricks through LangChain
|
||||
2. Databricks-managed MLflow integrates with LangChain: Tracking and serving LangChain applications with fewer steps
|
||||
3. Databricks as an LLM provider: Deploy your fine-tuned LLMs on Databricks via serving endpoints or cluster driver proxy apps, and query it as langchain.llms.Databricks
|
||||
4. Databricks Dolly: Databricks open-sourced Dolly which allows for commercial use, and can be accessed through the Hugging Face Hub
|
||||
|
||||
Databricks connector for the SQLDatabase Chain
|
||||
----------------------------------------------
|
||||
You can connect to [Databricks runtimes](https://docs.databricks.com/runtime/index.html) and [Databricks SQL](https://www.databricks.com/product/databricks-sql) using the SQLDatabase wrapper of LangChain. See the notebook [Connect to Databricks](./databricks/databricks.html) for details.
|
||||
|
||||
Databricks-managed MLflow integrates with LangChain
|
||||
---------------------------------------------------
|
||||
|
||||
MLflow is an open source platform to manage the ML lifecycle, including experimentation, reproducibility, deployment, and a central model registry. See the notebook [MLflow Callback Handler](./mlflow_tracking.ipynb) for details about MLflow's integration with LangChain.
|
||||
|
||||
Databricks provides a fully managed and hosted version of MLflow integrated with enterprise security features, high availability, and other Databricks workspace features such as experiment and run management and notebook revision capture. MLflow on Databricks offers an integrated experience for tracking and securing machine learning model training runs and running machine learning projects. See [MLflow guide](https://docs.databricks.com/mlflow/index.html) for more details.
|
||||
|
||||
Databricks-managed MLflow makes it more convenient to develop LangChain applications on Databricks. For MLflow tracking, you don't need to set the tracking uri. For MLflow Model Serving, you can save LangChain Chains in the MLflow langchain flavor, and then register and serve the Chain with a few clicks on Databricks, with credentials securely managed by MLflow Model Serving.
|
||||
|
||||
Databricks as an LLM provider
|
||||
-----------------------------
|
||||
|
||||
The notebook [Wrap Databricks endpoints as LLMs](../modules/models/llms/integrations/databricks.html) illustrates the method to wrap Databricks endpoints as LLMs in LangChain. It supports two types of endpoints: the serving endpoint, which is recommended for both production and development, and the cluster driver proxy app, which is recommended for interactive development.
|
||||
|
||||
Databricks endpoints support Dolly, but are also great for hosting models like MPT-7B or any other models from the Hugging Face ecosystem. Databricks endpoints can also be used with proprietary models like OpenAI to provide a governance layer for enterprises.
|
||||
|
||||
Databricks Dolly
|
||||
----------------
|
||||
|
||||
Databricks’ Dolly is an instruction-following large language model trained on the Databricks machine learning platform that is licensed for commercial use. The model is available on Hugging Face Hub as databricks/dolly-v2-12b. See the notebook [Hugging Face Hub](../modules/models/llms/integrations/huggingface_hub.html) for instructions to access it through the Hugging Face Hub integration with LangChain.
|
||||
@@ -58,7 +58,7 @@
|
||||
"### Optional Parameters\n",
|
||||
"There following parameters are optional. When executing the method in a Databricks notebook, you don't need to provide them in most of the cases.\n",
|
||||
"* `host`: The Databricks workspace hostname, excluding 'https://' part. Defaults to 'DATABRICKS_HOST' environment variable or current workspace if in a Databricks notebook.\n",
|
||||
"* `api_token`: The Databricks personal access token for accessing the Databricks SQL warehouse or the cluster. Defaults to 'DATABRICKS_API_TOKEN' environment variable or a temporary one is generated if in a Databricks notebook.\n",
|
||||
"* `api_token`: The Databricks personal access token for accessing the Databricks SQL warehouse or the cluster. Defaults to 'DATABRICKS_TOKEN' environment variable or a temporary one is generated if in a Databricks notebook.\n",
|
||||
"* `warehouse_id`: The warehouse ID in the Databricks SQL.\n",
|
||||
"* `cluster_id`: The cluster ID in the Databricks Runtime. If running in a Databricks notebook and both 'warehouse_id' and 'cluster_id' are None, it uses the ID of the cluster the notebook is attached to.\n",
|
||||
"* `engine_args`: The arguments to be used when connecting Databricks.\n",
|
||||
368
docs/integrations/langchain_decorators.md
Normal file
368
docs/integrations/langchain_decorators.md
Normal file
@@ -0,0 +1,368 @@
|
||||
# LangChain Decorators ✨
|
||||
|
||||
lanchchain decorators is a layer on the top of LangChain that provides syntactic sugar 🍭 for writing custom langchain prompts and chains
|
||||
|
||||
For Feedback, Issues, Contributions - please raise an issue here:
|
||||
[ju-bezdek/langchain-decorators](https://github.com/ju-bezdek/langchain-decorators)
|
||||
|
||||
|
||||
|
||||
Main principles and benefits:
|
||||
|
||||
- more `pythonic` way of writing code
|
||||
- write multiline prompts that wont break your code flow with indentation
|
||||
- making use of IDE in-built support for **hinting**, **type checking** and **popup with docs** to quickly peek in the function to see the prompt, parameters it consumes etc.
|
||||
- leverage all the power of 🦜🔗 LangChain ecosystem
|
||||
- adding support for **optional parameters**
|
||||
- easily share parameters between the prompts by binding them to one class
|
||||
|
||||
|
||||
|
||||
Here is a simple example of a code written with **LangChain Decorators ✨**
|
||||
|
||||
``` python
|
||||
|
||||
@llm_prompt
|
||||
def write_me_short_post(topic:str, platform:str="twitter", audience:str = "developers")->str:
|
||||
"""
|
||||
Write me a short header for my post about {topic} for {platform} platform.
|
||||
It should be for {audience} audience.
|
||||
(Max 15 words)
|
||||
"""
|
||||
return
|
||||
|
||||
# run it naturaly
|
||||
write_me_short_post(topic="starwars")
|
||||
# or
|
||||
write_me_short_post(topic="starwars", platform="redit")
|
||||
```
|
||||
|
||||
# Quick start
|
||||
## Installation
|
||||
```bash
|
||||
pip install langchain_decorators
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
Good idea on how to start is to review the examples here:
|
||||
- [jupyter notebook](https://github.com/ju-bezdek/langchain-decorators/blob/main/example_notebook.ipynb)
|
||||
- [colab notebook](https://colab.research.google.com/drive/1no-8WfeP6JaLD9yUtkPgym6x0G9ZYZOG#scrollTo=N4cf__D0E2Yk)
|
||||
|
||||
# Defining other parameters
|
||||
Here we are just marking a function as a prompt with `llm_prompt` decorator, turning it effectively into a LLMChain. Instead of running it
|
||||
|
||||
|
||||
Standard LLMchain takes much more init parameter than just inputs_variables and prompt... here is this implementation detail hidden in the decorator.
|
||||
Here is how it works:
|
||||
|
||||
1. Using **Global settings**:
|
||||
|
||||
``` python
|
||||
# define global settings for all prompty (if not set - chatGPT is the current default)
|
||||
from langchain_decorators import GlobalSettings
|
||||
|
||||
GlobalSettings.define_settings(
|
||||
default_llm=ChatOpenAI(temperature=0.0), this is default... can change it here globally
|
||||
default_streaming_llm=ChatOpenAI(temperature=0.0,streaming=True), this is default... can change it here for all ... will be used for streaming
|
||||
)
|
||||
```
|
||||
|
||||
2. Using predefined **prompt types**
|
||||
|
||||
``` python
|
||||
#You can change the default prompt types
|
||||
from langchain_decorators import PromptTypes, PromptTypeSettings
|
||||
|
||||
PromptTypes.AGENT_REASONING.llm = ChatOpenAI()
|
||||
|
||||
# Or you can just define your own ones:
|
||||
class MyCustomPromptTypes(PromptTypes):
|
||||
GPT4=PromptTypeSettings(llm=ChatOpenAI(model="gpt-4"))
|
||||
|
||||
@llm_prompt(prompt_type=MyCustomPromptTypes.GPT4)
|
||||
def write_a_complicated_code(app_idea:str)->str:
|
||||
...
|
||||
|
||||
```
|
||||
|
||||
3. Define the settings **directly in the decorator**
|
||||
|
||||
``` python
|
||||
from langchain.llms import OpenAI
|
||||
|
||||
@llm_prompt(
|
||||
llm=OpenAI(temperature=0.7),
|
||||
stop_tokens=["\nObservation"],
|
||||
...
|
||||
)
|
||||
def creative_writer(book_title:str)->str:
|
||||
...
|
||||
```
|
||||
|
||||
## Passing a memory and/or callbacks:
|
||||
|
||||
To pass any of these, just declare them in the function (or use kwargs to pass anything)
|
||||
|
||||
```python
|
||||
|
||||
@llm_prompt()
|
||||
async def write_me_short_post(topic:str, platform:str="twitter", memory:SimpleMemory = None):
|
||||
"""
|
||||
{history_key}
|
||||
Write me a short header for my post about {topic} for {platform} platform.
|
||||
It should be for {audience} audience.
|
||||
(Max 15 words)
|
||||
"""
|
||||
pass
|
||||
|
||||
await write_me_short_post(topic="old movies")
|
||||
|
||||
```
|
||||
|
||||
# Simplified streaming
|
||||
|
||||
If we wan't to leverage streaming:
|
||||
- we need to define prompt as async function
|
||||
- turn on the streaming on the decorator, or we can define PromptType with streaming on
|
||||
- capture the stream using StreamingContext
|
||||
|
||||
This way we just mark which prompt should be streamed, not needing to tinker with what LLM should we use, passing around the creating and distribute streaming handler into particular part of our chain... just turn the streaming on/off on prompt/prompt type...
|
||||
|
||||
The streaming will happen only if we call it in streaming context ... there we can define a simple function to handle the stream
|
||||
|
||||
``` python
|
||||
# this code example is complete and should run as it is
|
||||
|
||||
from langchain_decorators import StreamingContext, llm_prompt
|
||||
|
||||
# this will mark the prompt for streaming (useful if we want stream just some prompts in our app... but don't want to pass distribute the callback handlers)
|
||||
# note that only async functions can be streamed (will get an error if it's not)
|
||||
@llm_prompt(capture_stream=True)
|
||||
async def write_me_short_post(topic:str, platform:str="twitter", audience:str = "developers"):
|
||||
"""
|
||||
Write me a short header for my post about {topic} for {platform} platform.
|
||||
It should be for {audience} audience.
|
||||
(Max 15 words)
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
|
||||
# just an arbitrary function to demonstrate the streaming... wil be some websockets code in the real world
|
||||
tokens=[]
|
||||
def capture_stream_func(new_token:str):
|
||||
tokens.append(new_token)
|
||||
|
||||
# if we want to capture the stream, we need to wrap the execution into StreamingContext...
|
||||
# this will allow us to capture the stream even if the prompt call is hidden inside higher level method
|
||||
# only the prompts marked with capture_stream will be captured here
|
||||
with StreamingContext(stream_to_stdout=True, callback=capture_stream_func):
|
||||
result = await run_prompt()
|
||||
print("Stream finished ... we can distinguish tokens thanks to alternating colors")
|
||||
|
||||
|
||||
print("\nWe've captured",len(tokens),"tokens🎉\n")
|
||||
print("Here is the result:")
|
||||
print(result)
|
||||
```
|
||||
|
||||
|
||||
# Prompt declarations
|
||||
By default the prompt is is the whole function docs, unless you mark your prompt
|
||||
|
||||
## Documenting your prompt
|
||||
|
||||
We can specify what part of our docs is the prompt definition, by specifying a code block with **<prompt>** language tag
|
||||
|
||||
``` python
|
||||
@llm_prompt
|
||||
def write_me_short_post(topic:str, platform:str="twitter", audience:str = "developers"):
|
||||
"""
|
||||
Here is a good way to write a prompt as part of a function docstring, with additional documentation for devs.
|
||||
|
||||
It needs to be a code block, marked as a `<prompt>` language
|
||||
```<prompt>
|
||||
Write me a short header for my post about {topic} for {platform} platform.
|
||||
It should be for {audience} audience.
|
||||
(Max 15 words)
|
||||
```
|
||||
|
||||
Now only to code block above will be used as a prompt, and the rest of the docstring will be used as a description for developers.
|
||||
(It has also a nice benefit that IDE (like VS code) will display the prompt properly (not trying to parse it as markdown, and thus not showing new lines properly))
|
||||
"""
|
||||
return
|
||||
```
|
||||
|
||||
## Chat messages prompt
|
||||
|
||||
For chat models is very useful to define prompt as a set of message templates... here is how to do it:
|
||||
|
||||
``` python
|
||||
@llm_prompt
|
||||
def simulate_conversation(human_input:str, agent_role:str="a pirate"):
|
||||
"""
|
||||
## System message
|
||||
- note the `:system` sufix inside the <prompt:_role_> tag
|
||||
|
||||
|
||||
```<prompt:system>
|
||||
You are a {agent_role} hacker. You mus act like one.
|
||||
You reply always in code, using python or javascript code block...
|
||||
for example:
|
||||
|
||||
... do not reply with anything else.. just with code - respecting your role.
|
||||
```
|
||||
|
||||
# human message
|
||||
(we are using the real role that are enforced by the LLM - GPT supports system, assistant, user)
|
||||
``` <prompt:user>
|
||||
Helo, who are you
|
||||
```
|
||||
a reply:
|
||||
|
||||
|
||||
``` <prompt:assistant>
|
||||
\``` python <<- escaping inner code block with \ that should be part of the prompt
|
||||
def hello():
|
||||
print("Argh... hello you pesky pirate")
|
||||
\```
|
||||
```
|
||||
|
||||
we can also add some history using placeholder
|
||||
```<prompt:placeholder>
|
||||
{history}
|
||||
```
|
||||
```<prompt:user>
|
||||
{human_input}
|
||||
```
|
||||
|
||||
Now only to code block above will be used as a prompt, and the rest of the docstring will be used as a description for developers.
|
||||
(It has also a nice benefit that IDE (like VS code) will display the prompt properly (not trying to parse it as markdown, and thus not showing new lines properly))
|
||||
"""
|
||||
pass
|
||||
|
||||
```
|
||||
|
||||
the roles here are model native roles (assistant, user, system for chatGPT)
|
||||
|
||||
|
||||
|
||||
# Optional sections
|
||||
- you can define a whole sections of your prompt that should be optional
|
||||
- if any input in the section is missing, the whole section wont be rendered
|
||||
|
||||
the syntax for this is as follows:
|
||||
|
||||
``` python
|
||||
@llm_prompt
|
||||
def prompt_with_optional_partials():
|
||||
"""
|
||||
this text will be rendered always, but
|
||||
|
||||
{? anything inside this block will be rendered only if all the {value}s parameters are not empty (None | "") ?}
|
||||
|
||||
you can also place it in between the words
|
||||
this too will be rendered{? , but
|
||||
this block will be rendered only if {this_value} and {this_value}
|
||||
is not empty?} !
|
||||
"""
|
||||
```
|
||||
|
||||
|
||||
# Output parsers
|
||||
|
||||
- llm_prompt decorator natively tries to detect the best output parser based on the output type. (if not set, it returns the raw string)
|
||||
- list, dict and pydantic outputs are also supported natively (automaticaly)
|
||||
|
||||
``` python
|
||||
# this code example is complete and should run as it is
|
||||
|
||||
from langchain_decorators import llm_prompt
|
||||
|
||||
@llm_prompt
|
||||
def write_name_suggestions(company_business:str, count:int)->list:
|
||||
""" Write me {count} good name suggestions for company that {company_business}
|
||||
"""
|
||||
pass
|
||||
|
||||
write_name_suggestions(company_business="sells cookies", count=5)
|
||||
```
|
||||
|
||||
## More complex structures
|
||||
|
||||
for dict / pydantic you need to specify the formatting instructions...
|
||||
this can be tedious, that's why you can let the output parser gegnerate you the instructions based on the model (pydantic)
|
||||
|
||||
``` python
|
||||
from langchain_decorators import llm_prompt
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class TheOutputStructureWeExpect(BaseModel):
|
||||
name:str = Field (description="The name of the company")
|
||||
headline:str = Field( description="The description of the company (for landing page)")
|
||||
employees:list[str] = Field(description="5-8 fake employee names with their positions")
|
||||
|
||||
@llm_prompt()
|
||||
def fake_company_generator(company_business:str)->TheOutputStructureWeExpect:
|
||||
""" Generate a fake company that {company_business}
|
||||
{FORMAT_INSTRUCTIONS}
|
||||
"""
|
||||
return
|
||||
|
||||
company = fake_company_generator(company_business="sells cookies")
|
||||
|
||||
# print the result nicely formatted
|
||||
print("Company name: ",company.name)
|
||||
print("company headline: ",company.headline)
|
||||
print("company employees: ",company.employees)
|
||||
|
||||
```
|
||||
|
||||
|
||||
# Binding the prompt to an object
|
||||
|
||||
``` python
|
||||
from pydantic import BaseModel
|
||||
from langchain_decorators import llm_prompt
|
||||
|
||||
class AssistantPersonality(BaseModel):
|
||||
assistant_name:str
|
||||
assistant_role:str
|
||||
field:str
|
||||
|
||||
@property
|
||||
def a_property(self):
|
||||
return "whatever"
|
||||
|
||||
def hello_world(self, function_kwarg:str=None):
|
||||
"""
|
||||
We can reference any {field} or {a_property} inside our prompt... and combine it with {function_kwarg} in the method
|
||||
"""
|
||||
|
||||
|
||||
@llm_prompt
|
||||
def introduce_your_self(self)->str:
|
||||
"""
|
||||
``` <prompt:system>
|
||||
You are an assistant named {assistant_name}.
|
||||
Your role is to act as {assistant_role}
|
||||
```
|
||||
```<prompt:user>
|
||||
Introduce your self (in less than 20 words)
|
||||
```
|
||||
"""
|
||||
|
||||
|
||||
|
||||
personality = AssistantPersonality(assistant_name="John", assistant_role="a pirate")
|
||||
|
||||
print(personality.introduce_your_self(personality))
|
||||
```
|
||||
|
||||
|
||||
# More examples:
|
||||
|
||||
- these and few more examples are also available in the [colab notebook here](https://colab.research.google.com/drive/1no-8WfeP6JaLD9yUtkPgym6x0G9ZYZOG#scrollTo=N4cf__D0E2Yk)
|
||||
- including the [ReAct Agent re-implementation](https://colab.research.google.com/drive/1no-8WfeP6JaLD9yUtkPgym6x0G9ZYZOG#scrollTo=3bID5fryE2Yp) using purely langchain decorators
|
||||
43
docs/integrations/shaleprotocol.md
Normal file
43
docs/integrations/shaleprotocol.md
Normal file
@@ -0,0 +1,43 @@
|
||||
# Shale Protocol
|
||||
|
||||
[Shale Protocol](https://shaleprotocol.com) provides production-ready inference APIs for open LLMs. It's a Plug & Play API as it's hosted on a highly scalable GPU cloud infrastructure.
|
||||
|
||||
Our free tier supports up to 1K daily requests per key as we want to eliminate the barrier for anyone to start building genAI apps with LLMs.
|
||||
|
||||
With Shale Protocol, developers/researchers can create apps and explore the capabilities of open LLMs at no cost.
|
||||
|
||||
This page covers how Shale-Serve API can be incorporated with LangChain.
|
||||
|
||||
As of June 2023, the API supports Vicuna-13B by default. We are going to support more LLMs such as Falcon-40B in future releases.
|
||||
|
||||
|
||||
## How to
|
||||
|
||||
### 1. Find the link to our Discord on https://shaleprotocol.com. Generate an API key through the "Shale Bot" on our Discord. No credit card is required and no free trials. It's a forever free tier with 1K limit per day per API key.
|
||||
|
||||
### 2. Use https://shale.live/v1 as OpenAI API drop-in replacement
|
||||
|
||||
For example
|
||||
```python
|
||||
from langchain.llms import OpenAI
|
||||
from langchain import PromptTemplate, LLMChain
|
||||
|
||||
import os
|
||||
os.environ['OPENAI_API_BASE'] = "https://shale.live/v1"
|
||||
os.environ['OPENAI_API_KEY'] = "ENTER YOUR API KEY"
|
||||
|
||||
llm = OpenAI()
|
||||
|
||||
template = """Question: {question}
|
||||
|
||||
# Answer: Let's think step by step."""
|
||||
|
||||
prompt = PromptTemplate(template=template, input_variables=["question"])
|
||||
|
||||
llm_chain = LLMChain(prompt=prompt, llm=llm)
|
||||
|
||||
question = "What NFL team won the Super Bowl in the year Justin Beiber was born?"
|
||||
|
||||
llm_chain.run(question)
|
||||
|
||||
```
|
||||
@@ -4,7 +4,7 @@
|
||||
What is Vectara?
|
||||
|
||||
**Vectara Overview:**
|
||||
- Vectara is developer-first API platform for building conversational search applications
|
||||
- Vectara is developer-first API platform for building GenAI applications
|
||||
- To use Vectara - first [sign up](https://console.vectara.com/signup) and create an account. Then create a corpus and an API key for indexing and searching.
|
||||
- You can use Vectara's [indexing API](https://docs.vectara.com/docs/indexing-apis/indexing) to add documents into Vectara's index
|
||||
- You can use Vectara's [Search API](https://docs.vectara.com/docs/search-apis/search) to query Vectara's index (which also supports Hybrid search implicitly).
|
||||
@@ -13,6 +13,13 @@ What is Vectara?
|
||||
## Installation and Setup
|
||||
To use Vectara with LangChain no special installation steps are required. You just have to provide your customer_id, corpus ID, and an API key created within the Vectara console to enable indexing and searching.
|
||||
|
||||
Alternatively these can be provided as environment variables
|
||||
- export `VECTARA_CUSTOMER_ID`="your_customer_id"
|
||||
- export `VECTARA_CORPUS_ID`="your_corpus_id"
|
||||
- export `VECTARA_API_KEY`="your-vectara-api-key"
|
||||
|
||||
## Usage
|
||||
|
||||
### VectorStore
|
||||
|
||||
There exists a wrapper around the Vectara platform, allowing you to use it as a vectorstore, whether for semantic search or example selection.
|
||||
@@ -32,8 +39,21 @@ vectara = Vectara(
|
||||
```
|
||||
The customer_id, corpus_id and api_key are optional, and if they are not supplied will be read from the environment variables `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY`, respectively.
|
||||
|
||||
To query the vectorstore, you can use the `similarity_search` method (or `similarity_search_with_score`), which takes a query string and returns a list of results:
|
||||
```python
|
||||
results = vectara.similarity_score("what is LangChain?")
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the Vectara wrapper, see one of the two example notebooks:
|
||||
`similarity_search_with_score` also supports the following additional arguments:
|
||||
- `k`: number of results to return (defaults to 5)
|
||||
- `lambda_val`: the [lexical matching](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) factor for hybrid search (defaults to 0.025)
|
||||
- `filter`: a [filter](https://docs.vectara.com/docs/common-use-cases/filtering-by-metadata/filter-overview) to apply to the results (default None)
|
||||
- `n_sentence_context`: number of sentences to include before/after the actual matching segment when returning results. This defaults to 0 so as to return the exact text segment that matches, but can be used with other values e.g. 2 or 3 to return adjacent text segments.
|
||||
|
||||
The results are returned as a list of relevant documents, and a relevance score of each document.
|
||||
|
||||
|
||||
For a more detailed examples of using the Vectara wrapper, see one of these two sample notebooks:
|
||||
* [Chat Over Documents with Vectara](./vectara/vectara_chat.html)
|
||||
* [Vectara Text Generation](./vectara/vectara_text_generation.html)
|
||||
|
||||
|
||||
@@ -102,21 +102,11 @@
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'langchain.vectorstores.vectara.Vectara'>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"openai_api_key = os.environ['OPENAI_API_KEY']\n",
|
||||
"llm = OpenAI(openai_api_key=openai_api_key, temperature=0)\n",
|
||||
"retriever = VectaraRetriever(vectorstore, alpha=0.025, k=5, filter=None)\n",
|
||||
"\n",
|
||||
"print(type(vectorstore))\n",
|
||||
"retriever = vectorstore.as_retriever(lambda_val=0.025, k=5, filter=None)\n",
|
||||
"d = retriever.get_relevant_documents('What did the president say about Ketanji Brown Jackson')\n",
|
||||
"\n",
|
||||
"qa = ConversationalRetrievalChain.from_llm(llm, retriever, memory=memory)"
|
||||
@@ -142,7 +132,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, and a former federal public defender.\""
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
@@ -174,7 +164,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Justice Stephen Breyer.'"
|
||||
"' Justice Stephen Breyer'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
@@ -241,7 +231,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, and a former federal public defender.\""
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
@@ -286,7 +276,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Justice Stephen Breyer.'"
|
||||
"' Justice Stephen Breyer'"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
@@ -344,7 +334,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. A former top litigator in private practice. A former federal public defender.', metadata={'source': '../../modules/state_of_the_union.txt'})"
|
||||
"Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
@@ -392,6 +382,24 @@
|
||||
"result = qa({\"question\": query, \"chat_history\": chat_history, \"vectordbkwargs\": vectordbkwargs})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"id": "24ebdaec",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(result['answer'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "99b96dae",
|
||||
@@ -459,7 +467,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' The president did not mention Ketanji Brown Jackson.'"
|
||||
"\" The president said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson, who he described as one of the nation's top legal minds, to continue Justice Breyer's legacy of excellence.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
@@ -538,7 +546,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' The president did not mention Ketanji Brown Jackson.\\nSOURCES: ../../modules/state_of_the_union.txt'"
|
||||
"\" The president said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson, who he described as one of the nation's top legal minds, and that she will continue Justice Breyer's legacy of excellence.\\nSOURCES: ../../../state_of_the_union.txt\""
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
@@ -598,7 +606,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, and a former federal public defender."
|
||||
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence."
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -620,7 +628,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Justice Stephen Breyer."
|
||||
" Justice Stephen Breyer"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -681,7 +689,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, and a former federal public defender.\""
|
||||
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 33,
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"source": [
|
||||
"# Vectara Text Generation\n",
|
||||
"\n",
|
||||
"This notebook is based on [chat_vector_db](https://github.com/hwchase17/langchain/blob/master/docs/modules/chains/index_examples/question_answering.ipynb) and adapted to Vectara."
|
||||
"This notebook is based on [text generation](https://github.com/hwchase17/langchain/blob/master/docs/modules/chains/index_examples/vector_db_text_generation.ipynb) notebook and adapted to Vectara."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -24,6 +24,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"import requests\n",
|
||||
@@ -159,7 +160,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[{'text': '\\n\\nEnvironment variables are an essential part of any development workflow. They provide a way to store and access information that is specific to the environment in which the code is running. This can be especially useful when working with different versions of a language or framework, or when running code on different machines.\\n\\nThe Deno CLI tasks extension provides a way to easily manage environment variables when running Deno commands. This extension provides a task definition for allowing you to create tasks that execute the `deno` CLI from within the editor. The template for the Deno CLI tasks has the following interface, which can be configured in a `tasks.json` within your workspace:\\n\\nThe task definition includes the `type` field, which should be set to `deno`, and the `command` field, which is the `deno` command to run (e.g. `run`, `test`, `cache`, etc.). Additionally, you can specify additional arguments to pass on the command line, the current working directory to execute the command, and any environment variables.\\n\\nUsing environment variables with the Deno CLI tasks extension is a great way to ensure that your code is running in the correct environment. For example, if you are running a test suite,'}, {'text': '\\n\\nEnvironment variables are an important part of any programming language, and they can be used to store and access data in a variety of ways. In this blog post, we\\'ll be taking a look at environment variables specifically for the shell.\\n\\nShell variables are similar to environment variables, but they won\\'t be exported to spawned commands. They are defined with the following syntax:\\n\\n```sh\\nVAR_NAME=value\\n```\\n\\nShell variables can be used to store and access data in a variety of ways. For example, you can use them to store values that you want to re-use, but don\\'t want to be available in any spawned processes.\\n\\nFor example, if you wanted to store a value and then use it in a command, you could do something like this:\\n\\n```sh\\nVAR=hello && echo $VAR && deno eval \"console.log(\\'Deno: \\' + Deno.env.get(\\'VAR\\'))\"\\n```\\n\\nThis would output the following:\\n\\n```\\nhello\\nDeno: undefined\\n```\\n\\nAs you can see, the value stored in the shell variable is not available in the spawned process.\\n\\n'}, {'text': '\\n\\nWhen it comes to developing applications, environment variables are an essential part of the process. Environment variables are used to store information that can be used by applications and scripts to customize their behavior. This is especially important when it comes to developing applications with Deno, as there are several environment variables that can impact the behavior of Deno.\\n\\nThe most important environment variable for Deno is `DENO_AUTH_TOKENS`. This environment variable is used to store authentication tokens that are used to access remote resources. This is especially important when it comes to accessing remote APIs or databases. Without the proper authentication tokens, Deno will not be able to access the remote resources.\\n\\nAnother important environment variable for Deno is `DENO_DIR`. This environment variable is used to store the directory where Deno will store its files. This includes the Deno executable, the Deno cache, and the Deno configuration files. By setting this environment variable, you can ensure that Deno will always be able to find the files it needs.\\n\\nFinally, there is the `DENO_PLUGINS` environment variable. This environment variable is used to store the list of plugins that Deno will use. This is important for customizing the'}, {'text': '\\n\\nEnvironment variables are a great way to store and access sensitive information in your Deno applications. Deno offers built-in support for environment variables with `Deno.env`, and you can also use a `.env` file to store and access environment variables. In this blog post, we\\'ll explore both of these options and how to use them in your Deno applications.\\n\\n## Built-in `Deno.env`\\n\\nThe Deno runtime offers built-in support for environment variables with [`Deno.env`](https://deno.land/api@v1.25.3?s=Deno.env). `Deno.env` has getter and setter methods. Here is example usage:\\n\\n```ts\\nDeno.env.set(\"FIREBASE_API_KEY\", \"examplekey123\");\\nDeno.env.set(\"FIREBASE_AUTH_DOMAIN\", \"firebasedomain.com\");\\n\\nconsole.log(Deno.env.get(\"FIREBASE_API_KEY\")); // examplekey123\\nconsole.log(Deno.env.get(\"FIREBASE_AUTH_'}]\n"
|
||||
"[{'text': '\\n\\nEnvironment variables are a powerful tool for managing configuration settings in your applications. They allow you to store and access values from anywhere in your code, making it easier to keep your codebase organized and maintainable.\\n\\nHowever, there are times when you may want to use environment variables specifically for a single command. This is where shell variables come in. Shell variables are similar to environment variables, but they won\\'t be exported to spawned commands. They are defined with the following syntax:\\n\\n```sh\\nVAR_NAME=value\\n```\\n\\nFor example, if you wanted to use a shell variable instead of an environment variable in a command, you could do something like this:\\n\\n```sh\\nVAR=hello && echo $VAR && deno eval \"console.log(\\'Deno: \\' + Deno.env.get(\\'VAR\\'))\"\\n```\\n\\nThis would output the following:\\n\\n```\\nhello\\nDeno: undefined\\n```\\n\\nShell variables can be useful when you want to re-use a value, but don\\'t want it available in any spawned processes.\\n\\nAnother way to use environment variables is through pipelines. Pipelines provide a way to pipe the'}, {'text': '\\n\\nEnvironment variables are a great way to store and access sensitive information in your applications. They are also useful for configuring applications and managing different environments. In Deno, there are two ways to use environment variables: the built-in `Deno.env` and the `.env` file.\\n\\nThe `Deno.env` is a built-in feature of the Deno runtime that allows you to set and get environment variables. It has getter and setter methods that you can use to access and set environment variables. For example, you can set the `FIREBASE_API_KEY` and `FIREBASE_AUTH_DOMAIN` environment variables like this:\\n\\n```ts\\nDeno.env.set(\"FIREBASE_API_KEY\", \"examplekey123\");\\nDeno.env.set(\"FIREBASE_AUTH_DOMAIN\", \"firebasedomain.com\");\\n\\nconsole.log(Deno.env.get(\"FIREBASE_API_KEY\")); // examplekey123\\nconsole.log(Deno.env.get(\"FIREBASE_AUTH_DOMAIN\")); // firebasedomain'}, {'text': \"\\n\\nEnvironment variables are a powerful tool for managing configuration and settings in your applications. They allow you to store and access values that can be used in your code, and they can be set and changed without having to modify your code.\\n\\nIn Deno, environment variables are defined using the `export` command. For example, to set a variable called `VAR_NAME` to the value `value`, you would use the following command:\\n\\n```sh\\nexport VAR_NAME=value\\n```\\n\\nYou can then access the value of the environment variable in your code using the `Deno.env.get()` method. For example, if you wanted to log the value of the `VAR_NAME` variable, you could use the following code:\\n\\n```js\\nconsole.log(Deno.env.get('VAR_NAME'));\\n```\\n\\nYou can also set environment variables for a single command. To do this, you can list the environment variables before the command, like so:\\n\\n```\\nVAR=hello VAR2=bye deno run main.ts\\n```\\n\\nThis will set the environment variables `VAR` and `V\"}, {'text': \"\\n\\nEnvironment variables are a powerful tool for managing settings and configuration in your applications. They can be used to store information such as user preferences, application settings, and even passwords. In this blog post, we'll discuss how to make Deno scripts executable with a hashbang (shebang).\\n\\nA hashbang is a line of code that is placed at the beginning of a script. It tells the system which interpreter to use when running the script. In the case of Deno, the hashbang should be `#!/usr/bin/env -S deno run --allow-env`. This tells the system to use the Deno interpreter and to allow the script to access environment variables.\\n\\nOnce the hashbang is in place, you may need to give the script execution permissions. On Linux, this can be done with the command `sudo chmod +x hashbang.ts`. After that, you can execute the script by calling it like any other command: `./hashbang.ts`.\\n\\nIn the example program, we give the context permission to access the environment variables and print the Deno installation path. This is done by using the `Deno.env.get()` function, which returns the value of the specified environment\"}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "9b22020a",
|
||||
"metadata": {},
|
||||
@@ -139,6 +140,7 @@
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "c0a6c031",
|
||||
"metadata": {},
|
||||
@@ -229,7 +231,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"What did biden say about ketanji brown jackson is the state of the union address?\")"
|
||||
"agent.run(\"What did biden say about ketanji brown jackson in the state of the union address?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -271,6 +273,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "787a9b5e",
|
||||
"metadata": {},
|
||||
@@ -279,6 +282,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "9161ba91",
|
||||
"metadata": {},
|
||||
@@ -396,6 +400,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "49a0cbbe",
|
||||
"metadata": {},
|
||||
|
||||
150
docs/modules/agents/agents/examples/openai_functions_agent.ipynb
Normal file
150
docs/modules/agents/agents/examples/openai_functions_agent.ipynb
Normal file
@@ -0,0 +1,150 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9502d5b0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# OpenAI Functions Agent\n",
|
||||
"\n",
|
||||
"This notebook showcases using an agent that uses the OpenAI functions ability"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "c0a83623",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import LLMMathChain, OpenAI, SerpAPIWrapper, SQLDatabase, SQLDatabaseChain\n",
|
||||
"from langchain.agents import initialize_agent, Tool\n",
|
||||
"from langchain.agents import AgentType\n",
|
||||
"from langchain.chat_models import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "6fefaba2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"llm_math_chain = LLMMathChain.from_llm(llm=llm, verbose=True)\n",
|
||||
"db = SQLDatabase.from_uri(\"sqlite:///../../../../../notebooks/Chinook.db\")\n",
|
||||
"db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name = \"Search\",\n",
|
||||
" func=search.run,\n",
|
||||
" description=\"useful for when you need to answer questions about current events. You should ask targeted questions\"\n",
|
||||
" ),\n",
|
||||
" Tool(\n",
|
||||
" name=\"Calculator\",\n",
|
||||
" func=llm_math_chain.run,\n",
|
||||
" description=\"useful for when you need to answer questions about math\"\n",
|
||||
" ),\n",
|
||||
" Tool(\n",
|
||||
" name=\"FooBar-DB\",\n",
|
||||
" func=db_chain.run,\n",
|
||||
" description=\"useful for when you need to answer questions about FooBar. Input should be in the form of a question containing full context\"\n",
|
||||
" )\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "9ff6cee9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mrkl = initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "ba8e4cbe",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `Search` with `{'query': 'Leo DiCaprio girlfriend'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3mAmidst his casual romance with Gigi, Leo allegedly entered a relationship with 19-year old model, Eden Polani, in February 2023.\u001b[0m\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `Calculator` with `{'expression': '19^0.43'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"19^0.43\u001b[32;1m\u001b[1;3m```text\n",
|
||||
"19**0.43\n",
|
||||
"```\n",
|
||||
"...numexpr.evaluate(\"19**0.43\")...\n",
|
||||
"\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m3.547023357958959\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001b[33;1m\u001b[1;3mAnswer: 3.547023357958959\u001b[0m\u001b[32;1m\u001b[1;3mLeo DiCaprio's girlfriend is reportedly Eden Polani. Her current age raised to the power of 0.43 is approximately 3.55.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Leo DiCaprio's girlfriend is reportedly Eden Polani. Her current age raised to the power of 0.43 is approximately 3.55.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"mrkl.run(\"Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9f5f6743",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -30,38 +30,83 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI"
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.agents.agent_types import AgentType"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bd806175",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using ZERO_SHOT_REACT_DESCRIPTION\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the ZERO_SHOT_REACT_DESCRIPTION agent type. Note that this is an alternative to the above."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "a1717204",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_csv_agent(\n",
|
||||
" OpenAI(temperature=0), \n",
|
||||
" 'titanic.csv', \n",
|
||||
" verbose=True, \n",
|
||||
" agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c31bb8a6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using OpenAI Functions\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the OPENAI_FUNCTIONS agent type. Note that this is an alternative to the above."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "16c4dc59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_csv_agent(\n",
|
||||
" ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\"), \n",
|
||||
" 'titanic.csv', \n",
|
||||
" verbose=True, \n",
|
||||
" agent_type=AgentType.OPENAI_FUNCTIONS\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "16c4dc59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_csv_agent(OpenAI(temperature=0), 'titanic.csv', verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "46b9489d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error in on_chain_start callback: 'name'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `python_repl_ast` with `df.shape[0]`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to count the number of rows\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df.shape[0]\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m891\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: There are 891 rows.\u001b[0m\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m891\u001b[0m\u001b[32;1m\u001b[1;3mThere are 891 rows in the dataframe.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -69,10 +114,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There are 891 rows.'"
|
||||
"'There are 891 rows in the dataframe.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -83,23 +128,28 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"id": "a96309be",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error in on_chain_start callback: 'name'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `python_repl_ast` with `df[df['SibSp'] > 3]['PassengerId'].count()`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to count the number of people with more than 3 siblings\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df[df['SibSp'] > 3].shape[0]\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m30\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 30 people have more than 3 siblings.\u001b[0m\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m30\u001b[0m\u001b[32;1m\u001b[1;3mThere are 30 people in the dataframe who have more than 3 siblings.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -107,10 +157,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'30 people have more than 3 siblings.'"
|
||||
"'There are 30 people in the dataframe who have more than 3 siblings.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -121,35 +171,39 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"id": "964a09f7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error in on_chain_start callback: 'name'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `python_repl_ast` with `import pandas as pd\n",
|
||||
"import math\n",
|
||||
"\n",
|
||||
"# Create a dataframe\n",
|
||||
"data = {'Age': [22, 38, 26, 35, 35]}\n",
|
||||
"df = pd.DataFrame(data)\n",
|
||||
"\n",
|
||||
"# Calculate the average age\n",
|
||||
"average_age = df['Age'].mean()\n",
|
||||
"\n",
|
||||
"# Calculate the square root of the average age\n",
|
||||
"square_root = math.sqrt(average_age)\n",
|
||||
"\n",
|
||||
"square_root`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to calculate the average age first\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df['Age'].mean()\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m29.69911764705882\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now need to calculate the square root of the average age\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: math.sqrt(df['Age'].mean())\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mNameError(\"name 'math' is not defined\")\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to import the math library\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: import math\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now need to calculate the square root of the average age\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: math.sqrt(df['Age'].mean())\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m5.449689683556195\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 5.449689683556195\u001b[0m\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m5.585696017507576\u001b[0m\u001b[32;1m\u001b[1;3mThe square root of the average age is approximately 5.59.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -157,10 +211,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'5.449689683556195'"
|
||||
"'The square root of the average age is approximately 5.59.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -181,23 +235,26 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 8,
|
||||
"id": "15f11fbd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error in on_chain_start callback: 'name'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `python_repl_ast` with `df1['Age'].nunique() - df2['Age'].nunique()`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to compare the age columns in both dataframes\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: len(df1[df1['Age'] != df2['Age']])\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m177\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 177 rows in the age column are different.\u001b[0m\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m-1\u001b[0m\u001b[32;1m\u001b[1;3mThere is 1 row in the age column that is different between the two dataframes.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -205,17 +262,17 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'177 rows in the age column are different.'"
|
||||
"'There is 1 row in the age column that is different between the two dataframes.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent = create_csv_agent(OpenAI(temperature=0), ['titanic.csv', 'titanic_age_fillna.csv'], verbose=True)\n",
|
||||
"agent.run(\"how many rows in the age column are different?\")"
|
||||
"agent = create_csv_agent(ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\"), ['titanic.csv', 'titanic_age_fillna.csv'], verbose=True, agent_type=AgentType.OPENAI_FUNCTIONS)\n",
|
||||
"agent.run(\"how many rows in the age column are different between the two dfs?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -19,7 +19,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import create_pandas_dataframe_agent"
|
||||
"from langchain.agents import create_pandas_dataframe_agent\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.agents.agent_types import AgentType"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -35,6 +37,16 @@
|
||||
"df = pd.read_csv('titanic.csv')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a62858e2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using ZERO_SHOT_REACT_DESCRIPTION\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the ZERO_SHOT_REACT_DESCRIPTION agent type. Note that this is an alternative to the above."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
@@ -45,9 +57,34 @@
|
||||
"agent = create_pandas_dataframe_agent(OpenAI(temperature=0), df, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7233ab56",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using OpenAI Functions\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the OPENAI_FUNCTIONS agent type. Note that this is an alternative to the above."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "a8ea710e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_pandas_dataframe_agent(\n",
|
||||
" ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\"), \n",
|
||||
" df, \n",
|
||||
" verbose=True, \n",
|
||||
" agent_type=AgentType.OPENAI_FUNCTIONS\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a9207a2e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -57,13 +94,12 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to count the number of rows\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df.shape[0]\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m891\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: There are 891 rows.\u001b[0m\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `python_repl_ast` with `df.shape[0]`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m891\u001b[0m\u001b[32;1m\u001b[1;3mThere are 891 rows in the dataframe.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -71,10 +107,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There are 891 rows.'"
|
||||
"'There are 891 rows in the dataframe.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -172,7 +208,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "c4bc0584",
|
||||
"metadata": {},
|
||||
@@ -257,7 +292,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 1,
|
||||
"id": "f98e9c90-5c37-4fb9-af3e-d09693af8543",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -22,12 +22,24 @@
|
||||
"from langchain.agents.agent_toolkits import create_python_agent\n",
|
||||
"from langchain.tools.python.tool import PythonREPLTool\n",
|
||||
"from langchain.python import PythonREPL\n",
|
||||
"from langchain.llms.openai import OpenAI"
|
||||
"from langchain.llms.openai import OpenAI\n",
|
||||
"from langchain.agents.agent_types import AgentType\n",
|
||||
"from langchain.chat_models import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ca30d64c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using ZERO_SHOT_REACT_DESCRIPTION\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the ZERO_SHOT_REACT_DESCRIPTION agent type. Note that this is an alternative to the above."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 2,
|
||||
"id": "cc422f53-c51c-4694-a834-72ecd1e68363",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -37,7 +49,34 @@
|
||||
"agent_executor = create_python_agent(\n",
|
||||
" llm=OpenAI(temperature=0, max_tokens=1000),\n",
|
||||
" tool=PythonREPLTool(),\n",
|
||||
" verbose=True\n",
|
||||
" verbose=True,\n",
|
||||
" agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bb487e8e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using OpenAI Functions\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the OPENAI_FUNCTIONS agent type. Note that this is an alternative to the above."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "6e651822",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor = create_python_agent(\n",
|
||||
" llm=ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\"),\n",
|
||||
" tool=PythonREPLTool(),\n",
|
||||
" verbose=True,\n",
|
||||
" agent_type=AgentType.OPENAI_FUNCTIONS,\n",
|
||||
" agent_executor_kwargs={\"handle_parsing_errors\": True},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -52,7 +91,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "25cd4f92-ea9b-4fe6-9838-a4f85f81eebe",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -64,23 +103,20 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to calculate the 10th fibonacci number\n",
|
||||
"Action: Python REPL\n",
|
||||
"Action Input: def fibonacci(n):\n",
|
||||
" if n == 0:\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `Python_REPL` with `def fibonacci(n):\n",
|
||||
" if n <= 0:\n",
|
||||
" return 0\n",
|
||||
" elif n == 1:\n",
|
||||
" return 1\n",
|
||||
" else:\n",
|
||||
" return fibonacci(n-1) + fibonacci(n-2)\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to call the function with 10 as the argument\n",
|
||||
"Action: Python REPL\n",
|
||||
"Action Input: fibonacci(10)\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 55\u001b[0m\n",
|
||||
" return fibonacci(n-1) + fibonacci(n-2)\n",
|
||||
"\n",
|
||||
"fibonacci(10)`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m\u001b[0m\u001b[32;1m\u001b[1;3mThe 10th Fibonacci number is 55.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -88,10 +124,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'55'"
|
||||
"'The 10th Fibonacci number is 55.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -111,9 +147,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "4b9f60e7-eb6a-4f14-8604-498d863d4482",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -121,59 +159,70 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to write a neural network in PyTorch and train it on the given data.\n",
|
||||
"Action: Python REPL\n",
|
||||
"Action Input: \n",
|
||||
"import torch\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mCould not parse tool input: {'name': 'python', 'arguments': 'import torch\\nimport torch.nn as nn\\nimport torch.optim as optim\\n\\n# Define the neural network\\nclass SingleNeuron(nn.Module):\\n def __init__(self):\\n super(SingleNeuron, self).__init__()\\n self.linear = nn.Linear(1, 1)\\n \\n def forward(self, x):\\n return self.linear(x)\\n\\n# Create the synthetic data\\nx_train = torch.tensor([[1.0], [2.0], [3.0], [4.0]], dtype=torch.float32)\\ny_train = torch.tensor([[2.0], [4.0], [6.0], [8.0]], dtype=torch.float32)\\n\\n# Create the neural network\\nmodel = SingleNeuron()\\n\\n# Define the loss function and optimizer\\ncriterion = nn.MSELoss()\\noptimizer = optim.SGD(model.parameters(), lr=0.01)\\n\\n# Train the neural network\\nfor epoch in range(1, 1001):\\n # Forward pass\\n y_pred = model(x_train)\\n \\n # Compute loss\\n loss = criterion(y_pred, y_train)\\n \\n # Backward pass and optimization\\n optimizer.zero_grad()\\n loss.backward()\\n optimizer.step()\\n \\n # Print the loss every 100 epochs\\n if epoch % 100 == 0:\\n print(f\"Epoch {epoch}: Loss = {loss.item()}\")\\n\\n# Make a prediction for x = 5\\nx_test = torch.tensor([[5.0]], dtype=torch.float32)\\ny_pred = model(x_test)\\ny_pred.item()'} because the `arguments` is not valid JSON.\u001b[0mInvalid or incomplete response\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `Python_REPL` with `import torch\n",
|
||||
"import torch.nn as nn\n",
|
||||
"import torch.optim as optim\n",
|
||||
"\n",
|
||||
"# Define the model\n",
|
||||
"model = torch.nn.Sequential(\n",
|
||||
" torch.nn.Linear(1, 1)\n",
|
||||
")\n",
|
||||
"# Define the neural network\n",
|
||||
"class SingleNeuron(nn.Module):\n",
|
||||
" def __init__(self):\n",
|
||||
" super(SingleNeuron, self).__init__()\n",
|
||||
" self.linear = nn.Linear(1, 1)\n",
|
||||
" \n",
|
||||
" def forward(self, x):\n",
|
||||
" return self.linear(x)\n",
|
||||
"\n",
|
||||
"# Define the loss\n",
|
||||
"loss_fn = torch.nn.MSELoss()\n",
|
||||
"# Create the synthetic data\n",
|
||||
"x_train = torch.tensor([[1.0], [2.0], [3.0], [4.0]], dtype=torch.float32)\n",
|
||||
"y_train = torch.tensor([[2.0], [4.0], [6.0], [8.0]], dtype=torch.float32)\n",
|
||||
"\n",
|
||||
"# Define the optimizer\n",
|
||||
"optimizer = torch.optim.SGD(model.parameters(), lr=0.01)\n",
|
||||
"# Create the neural network\n",
|
||||
"model = SingleNeuron()\n",
|
||||
"\n",
|
||||
"# Define the data\n",
|
||||
"x_data = torch.tensor([[1.0], [2.0], [3.0], [4.0]])\n",
|
||||
"y_data = torch.tensor([[2.0], [4.0], [6.0], [8.0]])\n",
|
||||
"# Define the loss function and optimizer\n",
|
||||
"criterion = nn.MSELoss()\n",
|
||||
"optimizer = optim.SGD(model.parameters(), lr=0.01)\n",
|
||||
"\n",
|
||||
"# Train the model\n",
|
||||
"for epoch in range(1000):\n",
|
||||
"# Train the neural network\n",
|
||||
"for epoch in range(1, 1001):\n",
|
||||
" # Forward pass\n",
|
||||
" y_pred = model(x_data)\n",
|
||||
"\n",
|
||||
" # Compute and print loss\n",
|
||||
" loss = loss_fn(y_pred, y_data)\n",
|
||||
" if (epoch+1) % 100 == 0:\n",
|
||||
" print(f'Epoch {epoch+1}: loss = {loss.item():.4f}')\n",
|
||||
"\n",
|
||||
" # Zero the gradients\n",
|
||||
" y_pred = model(x_train)\n",
|
||||
" \n",
|
||||
" # Compute loss\n",
|
||||
" loss = criterion(y_pred, y_train)\n",
|
||||
" \n",
|
||||
" # Backward pass and optimization\n",
|
||||
" optimizer.zero_grad()\n",
|
||||
"\n",
|
||||
" # Backward pass\n",
|
||||
" loss.backward()\n",
|
||||
"\n",
|
||||
" # Update the weights\n",
|
||||
" optimizer.step()\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mEpoch 100: loss = 0.0013\n",
|
||||
"Epoch 200: loss = 0.0007\n",
|
||||
"Epoch 300: loss = 0.0004\n",
|
||||
"Epoch 400: loss = 0.0002\n",
|
||||
"Epoch 500: loss = 0.0001\n",
|
||||
"Epoch 600: loss = 0.0001\n",
|
||||
"Epoch 700: loss = 0.0000\n",
|
||||
"Epoch 800: loss = 0.0000\n",
|
||||
"Epoch 900: loss = 0.0000\n",
|
||||
"Epoch 1000: loss = 0.0000\n",
|
||||
"\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The prediction for x = 5 is 10.0.\u001b[0m\n",
|
||||
" \n",
|
||||
" # Print the loss every 100 epochs\n",
|
||||
" if epoch % 100 == 0:\n",
|
||||
" print(f\"Epoch {epoch}: Loss = {loss.item()}\")\n",
|
||||
"\n",
|
||||
"# Make a prediction for x = 5\n",
|
||||
"x_test = torch.tensor([[5.0]], dtype=torch.float32)\n",
|
||||
"y_pred = model(x_test)\n",
|
||||
"y_pred.item()`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3mEpoch 100: Loss = 0.03825576975941658\n",
|
||||
"Epoch 200: Loss = 0.02100197970867157\n",
|
||||
"Epoch 300: Loss = 0.01152981910854578\n",
|
||||
"Epoch 400: Loss = 0.006329738534986973\n",
|
||||
"Epoch 500: Loss = 0.0034749575424939394\n",
|
||||
"Epoch 600: Loss = 0.0019077073084190488\n",
|
||||
"Epoch 700: Loss = 0.001047312980517745\n",
|
||||
"Epoch 800: Loss = 0.0005749554838985205\n",
|
||||
"Epoch 900: Loss = 0.0003156439634039998\n",
|
||||
"Epoch 1000: Loss = 0.00017328384274151176\n",
|
||||
"\u001b[0m\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `Python_REPL` with `x_test.item()`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m\u001b[0m\u001b[32;1m\u001b[1;3mThe prediction for x = 5 is 10.000173568725586.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -181,10 +230,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The prediction for x = 5 is 10.0.'"
|
||||
"'The prediction for x = 5 is 10.000173568725586.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -206,9 +255,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "LangChain",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "langchain"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -220,7 +269,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -37,7 +37,30 @@
|
||||
"from langchain.agents.agent_toolkits import SQLDatabaseToolkit\n",
|
||||
"from langchain.sql_database import SQLDatabase\n",
|
||||
"from langchain.llms.openai import OpenAI\n",
|
||||
"from langchain.agents import AgentExecutor"
|
||||
"from langchain.agents import AgentExecutor\n",
|
||||
"from langchain.agents.agent_types import AgentType\n",
|
||||
"from langchain.chat_models import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "65ec5bb3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = SQLDatabase.from_uri(\"sqlite:///../../../../../notebooks/Chinook.db\")\n",
|
||||
"toolkit = SQLDatabaseToolkit(db=db, llm=OpenAI(temperature=0))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f74d1792",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using ZERO_SHOT_REACT_DESCRIPTION\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the ZERO_SHOT_REACT_DESCRIPTION agent type. Note that this is an alternative to the above."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -49,16 +72,39 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = SQLDatabase.from_uri(\"sqlite:///../../../../notebooks/Chinook.db\")\n",
|
||||
"toolkit = SQLDatabaseToolkit(db=db)\n",
|
||||
"\n",
|
||||
"agent_executor = create_sql_agent(\n",
|
||||
" llm=OpenAI(temperature=0),\n",
|
||||
" toolkit=toolkit,\n",
|
||||
" verbose=True\n",
|
||||
" verbose=True,\n",
|
||||
" agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "971cc455",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using OpenAI Functions\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the OPENAI_FUNCTIONS agent type. Note that this is an alternative to the above."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "6426a27d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# agent_executor = create_sql_agent(\n",
|
||||
"# llm=ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\"),\n",
|
||||
"# toolkit=toolkit,\n",
|
||||
"# verbose=True,\n",
|
||||
"# agent_type=AgentType.OPENAI_FUNCTIONS\n",
|
||||
"# )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36ae48c7-cb08-4fef-977e-c7d4b96a464b",
|
||||
@@ -69,7 +115,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "ff70e83d-5ad0-4fc7-bb96-27d82ac166d7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -81,14 +127,16 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction: list_tables_sql_db\n",
|
||||
"Action Input: \"\"\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mArtist, Invoice, Playlist, Genre, Album, PlaylistTrack, Track, InvoiceLine, MediaType, Employee, Customer\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I should look at the schema of the playlisttrack table\n",
|
||||
"Action: schema_sql_db\n",
|
||||
"Action Input: \"PlaylistTrack\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m\n",
|
||||
"\u001b[1m> Entering new chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `list_tables_sql_db` with `{}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[38;5;200m\u001b[1;3mAlbum, Artist, Track, PlaylistTrack, InvoiceLine, sales_table, Playlist, Genre, Employee, Customer, Invoice, MediaType\u001b[0m\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `schema_sql_db` with `PlaylistTrack`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[33;1m\u001b[1;3m\n",
|
||||
"CREATE TABLE \"PlaylistTrack\" (\n",
|
||||
"\t\"PlaylistId\" INTEGER NOT NULL, \n",
|
||||
"\t\"TrackId\" INTEGER NOT NULL, \n",
|
||||
@@ -97,13 +145,36 @@
|
||||
"\tFOREIGN KEY(\"PlaylistId\") REFERENCES \"Playlist\" (\"PlaylistId\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"SELECT * FROM 'PlaylistTrack' LIMIT 3;\n",
|
||||
"PlaylistId TrackId\n",
|
||||
"1 3402\n",
|
||||
"1 3389\n",
|
||||
"1 3390\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The PlaylistTrack table has two columns, PlaylistId and TrackId, and is linked to the Playlist and Track tables.\u001b[0m\n",
|
||||
"/*\n",
|
||||
"3 rows from PlaylistTrack table:\n",
|
||||
"PlaylistId\tTrackId\n",
|
||||
"1\t3402\n",
|
||||
"1\t3389\n",
|
||||
"1\t3390\n",
|
||||
"*/\u001b[0m\u001b[32;1m\u001b[1;3mThe `PlaylistTrack` table has two columns: `PlaylistId` and `TrackId`. It is a junction table that represents the relationship between playlists and tracks. \n",
|
||||
"\n",
|
||||
"Here is the schema of the `PlaylistTrack` table:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"CREATE TABLE \"PlaylistTrack\" (\n",
|
||||
"\t\"PlaylistId\" INTEGER NOT NULL, \n",
|
||||
"\t\"TrackId\" INTEGER NOT NULL, \n",
|
||||
"\tPRIMARY KEY (\"PlaylistId\", \"TrackId\"), \n",
|
||||
"\tFOREIGN KEY(\"TrackId\") REFERENCES \"Track\" (\"TrackId\"), \n",
|
||||
"\tFOREIGN KEY(\"PlaylistId\") REFERENCES \"Playlist\" (\"PlaylistId\")\n",
|
||||
")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Here are three sample rows from the `PlaylistTrack` table:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"PlaylistId TrackId\n",
|
||||
"1 3402\n",
|
||||
"1 3389\n",
|
||||
"1 3390\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Please let me know if there is anything else I can help you with.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -111,10 +182,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The PlaylistTrack table has two columns, PlaylistId and TrackId, and is linked to the Playlist and Track tables.'"
|
||||
"'The `PlaylistTrack` table has two columns: `PlaylistId` and `TrackId`. It is a junction table that represents the relationship between playlists and tracks. \\n\\nHere is the schema of the `PlaylistTrack` table:\\n\\n```\\nCREATE TABLE \"PlaylistTrack\" (\\n\\t\"PlaylistId\" INTEGER NOT NULL, \\n\\t\"TrackId\" INTEGER NOT NULL, \\n\\tPRIMARY KEY (\"PlaylistId\", \"TrackId\"), \\n\\tFOREIGN KEY(\"TrackId\") REFERENCES \"Track\" (\"TrackId\"), \\n\\tFOREIGN KEY(\"PlaylistId\") REFERENCES \"Playlist\" (\"PlaylistId\")\\n)\\n```\\n\\nHere are three sample rows from the `PlaylistTrack` table:\\n\\n```\\nPlaylistId TrackId\\n1 3402\\n1 3389\\n1 3390\\n```\\n\\nPlease let me know if there is anything else I can help you with.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -519,7 +590,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "18ada398-dce6-4049-9b56-fc0ede63da9c",
|
||||
"metadata": {},
|
||||
@@ -11,6 +12,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "eecb683b-3a46-4b9d-81a3-7caefbfec1a1",
|
||||
"metadata": {},
|
||||
@@ -88,6 +90,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "f4814175-964d-42f1-aa9d-22801ce1e912",
|
||||
"metadata": {},
|
||||
@@ -123,6 +126,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "8a38ad10",
|
||||
"metadata": {},
|
||||
@@ -165,7 +169,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"What did biden say about ketanji brown jackson is the state of the union address?\")"
|
||||
"agent_executor.run(\"What did biden say about ketanji brown jackson in the state of the union address?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -203,10 +207,11 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"What did biden say about ketanji brown jackson is the state of the union address? List the source.\")"
|
||||
"agent_executor.run(\"What did biden say about ketanji brown jackson in the state of the union address? List the source.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "7ca07707",
|
||||
"metadata": {},
|
||||
@@ -255,6 +260,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "71680984-edaf-4a63-90f5-94edbd263550",
|
||||
"metadata": {},
|
||||
@@ -299,7 +305,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"What did biden say about ketanji brown jackson is the state of the union address?\")"
|
||||
"agent_executor.run(\"What did biden say about ketanji brown jackson in the state of the union address?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -25,6 +25,15 @@ Next, we have some examples of customizing and generically working with tools
|
||||
./tools/custom_tools.ipynb
|
||||
./tools/multi_input_tool.ipynb
|
||||
./tools/tool_input_validation.ipynb
|
||||
./tools/human_approval.ipynb
|
||||
|
||||
Tools are also usable outside of the LangChain ecosystem! Here are examples of doing so
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
./tools/tools_as_openai_functions.ipynb
|
||||
|
||||
|
||||
In this documentation we cover generic tooling functionality (eg how to create your own)
|
||||
|
||||
@@ -160,3 +160,9 @@ Below is a list of all supported tools and relevant information:
|
||||
- Notes: A connection to the OpenWeatherMap API (https://api.openweathermap.org), specifically the `/data/2.5/weather` endpoint.
|
||||
- Requires LLM: No
|
||||
- Extra Parameters: `openweathermap_api_key` (your API key to access this endpoint)
|
||||
|
||||
**sleep**
|
||||
|
||||
- Tool Name: Sleep
|
||||
- Tool Description: Make agent sleep for some time.
|
||||
- Requires LLM: No
|
||||
|
||||
138
docs/modules/agents/tools/tools_as_openai_functions.ipynb
Normal file
138
docs/modules/agents/tools/tools_as_openai_functions.ipynb
Normal file
@@ -0,0 +1,138 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4111c9d4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tools as OpenAI Functions\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use LangChain tools as OpenAI functions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "d65d8a60",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.schema import HumanMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "abd8dc72",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = ChatOpenAI(model=\"gpt-3.5-turbo-0613\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "dce2cdb7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools import MoveFileTool, format_tool_to_openai_function"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "3b3dc766",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = [MoveFileTool()]\n",
|
||||
"functions = [format_tool_to_openai_function(t) for t in tools]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "230a7939",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"message = model.predict_messages([HumanMessage(content='move file foo to bar')], functions=functions)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "c118c940",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='', additional_kwargs={'function_call': {'name': 'move_file', 'arguments': '{\\n \"source_path\": \"foo\",\\n \"destination_path\": \"bar\"\\n}'}}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"message"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "d618e3d8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'name': 'move_file',\n",
|
||||
" 'arguments': '{\\n \"source_path\": \"foo\",\\n \"destination_path\": \"bar\"\\n}'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"message.additional_kwargs['function_call']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "751da79f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "23234b50-e6c6-4c87-9f97-259c15f36894",
|
||||
"metadata": {
|
||||
@@ -11,6 +12,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "29dd6333-307c-43df-b848-65001c01733b",
|
||||
"metadata": {},
|
||||
@@ -28,6 +30,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "2b6d7dba-cd20-472a-ae05-f68675cc9ea4",
|
||||
"metadata": {},
|
||||
@@ -36,6 +39,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "e4592215-6604-47e2-89ff-5db3af6d1e40",
|
||||
"metadata": {
|
||||
@@ -50,6 +54,11 @@
|
||||
" self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any\n",
|
||||
" ) -> Any:\n",
|
||||
" \"\"\"Run when LLM starts running.\"\"\"\n",
|
||||
" \n",
|
||||
" def on_chat_model_start(\n",
|
||||
" self, serialized: Dict[str, Any], messages: List[List[BaseMessage]], **kwargs: Any\n",
|
||||
" ) -> Any:\n",
|
||||
" \"\"\"Run when Chat Model starts running.\"\"\"\n",
|
||||
"\n",
|
||||
" def on_llm_new_token(self, token: str, **kwargs: Any) -> Any:\n",
|
||||
" \"\"\"Run on new LLM token. Only available when streaming is enabled.\"\"\"\n",
|
||||
@@ -100,6 +109,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "cbccd7d1",
|
||||
"metadata": {},
|
||||
@@ -108,8 +118,8 @@
|
||||
"\n",
|
||||
"The `callbacks` argument is available on most objects throughout the API (Chains, Models, Tools, Agents, etc.) in two different places:\n",
|
||||
"\n",
|
||||
"- **Constructor callbacks**: defined in the constructor, eg. `LLMChain(callbacks=[handler])`, which will be used for all calls made on that object, and will be scoped to that object only, eg. if you pass a handler to the `LLMChain` constructor, it will not be used by the Model attached to that chain.\n",
|
||||
"- **Request callbacks**: defined in the `call()`/`run()`/`apply()` methods used for issuing a request, eg. `chain.call(inputs, callbacks=[handler])`, which will be used for that specific request only, and all sub-requests that it contains (eg. a call to an LLMChain triggers a call to a Model, which uses the same handler passed in the `call()` method).\n",
|
||||
"- **Constructor callbacks**: defined in the constructor, eg. `LLMChain(callbacks=[handler], tags=['a-tag'])`, which will be used for all calls made on that object, and will be scoped to that object only, eg. if you pass a handler to the `LLMChain` constructor, it will not be used by the Model attached to that chain.\n",
|
||||
"- **Request callbacks**: defined in the `call()`/`run()`/`apply()` methods used for issuing a request, eg. `chain.call(inputs, callbacks=[handler], tags=['a-tag'])`, which will be used for that specific request only, and all sub-requests that it contains (eg. a call to an LLMChain triggers a call to a Model, which uses the same handler passed in the `call()` method).\n",
|
||||
"\n",
|
||||
"The `verbose` argument is available on most objects throughout the API (Chains, Models, Tools, Agents, etc.) as a constructor argument, eg. `LLMChain(verbose=True)`, and it is equivalent to passing a `ConsoleCallbackHandler` to the `callbacks` argument of that object and all child objects. This is useful for debugging, as it will log all events to the console.\n",
|
||||
"\n",
|
||||
@@ -120,6 +130,18 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "46128fb4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tags\n",
|
||||
"\n",
|
||||
"You can add tags to your callbacks by passing a `tags` argument to the `call()`/`run()`/`apply()` methods. This is useful for filtering your logs, eg. if you want to log all requests made to a specific LLMChain, you can add a tag, and then filter your logs by that tag. You can pass tags to both constructor and request callbacks, see the examples above for details. These tags are then passed to the `tags` argument of the \"start\" callback methods, ie. `on_llm_start`, `on_chat_model_start`, `on_chain_start`, `on_tool_start`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "d3bf3304-43fb-47ad-ae50-0637a17018a2",
|
||||
"metadata": {},
|
||||
@@ -201,6 +223,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "389c8448-5283-49e3-8c04-dbe1522e202c",
|
||||
"metadata": {},
|
||||
@@ -268,6 +291,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "bc9785fa-4f71-4797-91a3-4fe7e57d0429",
|
||||
"metadata": {
|
||||
@@ -363,6 +387,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "d26dbb34-fcc3-401c-a115-39c7620d2d65",
|
||||
"metadata": {},
|
||||
@@ -548,6 +573,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "32b29135-f852-4492-88ed-547275c72c53",
|
||||
"metadata": {},
|
||||
@@ -556,6 +582,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "fbb606d6-2863-46c5-8347-9f0bdb3805bb",
|
||||
"metadata": {},
|
||||
@@ -564,6 +591,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "f62cd10c-494c-47d6-aa98-6e926cb9c456",
|
||||
"metadata": {},
|
||||
@@ -572,6 +600,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "d5a74b3f-3769-4a4f-99c7-b6a3b20a94e2",
|
||||
"metadata": {},
|
||||
@@ -829,6 +858,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "254fef1b-6b6e-4352-9cf4-363fba895ac7",
|
||||
"metadata": {},
|
||||
|
||||
@@ -177,7 +177,7 @@
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
|
||||
"RETURN a.name\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'a.name': 'Tom Cruise'}, {'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}]\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Tom Cruise'}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -185,7 +185,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Tom Cruise, Val Kilmer, Anthony Edwards, and Meg Ryan played in Top Gun.'"
|
||||
"'Val Kilmer, Anthony Edwards, Meg Ryan, and Tom Cruise played in Top Gun.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
@@ -197,10 +197,180 @@
|
||||
"chain.run(\"Who played in Top Gun?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2d28c4df",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Limit the number of results\n",
|
||||
"You can limit the number of results from the Cypher QA Chain using the `top_k` parameter.\n",
|
||||
"The default is 10."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "df230946",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = GraphCypherQAChain.from_llm(\n",
|
||||
" ChatOpenAI(temperature=0), graph=graph, verbose=True, top_k=2\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "3f1600ee",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
|
||||
"RETURN a.name\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Val Kilmer and Anthony Edwards played in Top Gun.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Who played in Top Gun?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "88c16206",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Return intermediate results\n",
|
||||
"You can return intermediate steps from the Cypher QA Chain using the `return_intermediate_steps` parameter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "e412f36b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = GraphCypherQAChain.from_llm(\n",
|
||||
" ChatOpenAI(temperature=0), graph=graph, verbose=True, return_intermediate_steps=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "4f4699dc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
|
||||
"RETURN a.name\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m[{'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Tom Cruise'}]\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"Intermediate steps: [{'query': \"MATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\\nRETURN a.name\"}, {'context': [{'a.name': 'Val Kilmer'}, {'a.name': 'Anthony Edwards'}, {'a.name': 'Meg Ryan'}, {'a.name': 'Tom Cruise'}]}]\n",
|
||||
"Final answer: Val Kilmer, Anthony Edwards, Meg Ryan, and Tom Cruise played in Top Gun.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = chain(\"Who played in Top Gun?\")\n",
|
||||
"print(f\"Intermediate steps: {result['intermediate_steps']}\")\n",
|
||||
"print(f\"Final answer: {result['result']}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d6e1b054",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Return direct results\n",
|
||||
"You can return direct results from the Cypher QA Chain using the `return_direct` parameter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "2d3acf10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = GraphCypherQAChain.from_llm(\n",
|
||||
" ChatOpenAI(temperature=0), graph=graph, verbose=True, return_direct=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "b0a9d143",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new GraphCypherQAChain chain...\u001b[0m\n",
|
||||
"Generated Cypher:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (a:Actor)-[:ACTED_IN]->(m:Movie {name: 'Top Gun'})\n",
|
||||
"RETURN a.name\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'a.name': 'Val Kilmer'},\n",
|
||||
" {'a.name': 'Anthony Edwards'},\n",
|
||||
" {'a.name': 'Meg Ryan'},\n",
|
||||
" {'a.name': 'Tom Cruise'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Who played in Top Gun?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b4825316",
|
||||
"id": "74d0a36f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -222,7 +392,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.8.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
270
docs/modules/chains/examples/graph_nebula_qa.ipynb
Normal file
270
docs/modules/chains/examples/graph_nebula_qa.ipynb
Normal file
@@ -0,0 +1,270 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "c94240f5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# NebulaGraphQAChain\n",
|
||||
"\n",
|
||||
"This notebook shows how to use LLMs to provide a natural language interface to NebulaGraph database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "dbc0ee68",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You will need to have a running NebulaGraph cluster, for which you can run a containerized cluster by running the following script:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"curl -fsSL nebula-up.siwei.io/install.sh | bash\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Other options are:\n",
|
||||
"- Install as a [Docker Desktop Extension](https://www.docker.com/blog/distributed-cloud-native-graph-database-nebulagraph-docker-extension/). See [here](https://docs.nebula-graph.io/3.5.0/2.quick-start/1.quick-start-workflow/)\n",
|
||||
"- NebulaGraph Cloud Service. See [here](https://www.nebula-graph.io/cloud)\n",
|
||||
"- Deploy from package, source code, or via Kubernetes. See [here](https://docs.nebula-graph.io/)\n",
|
||||
"\n",
|
||||
"Once the cluster is running, we could create the SPACE and SCHEMA for the database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c82f4141",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install ipython-ngql\n",
|
||||
"%load_ext ngql\n",
|
||||
"\n",
|
||||
"# connect ngql jupyter extension to nebulagraph\n",
|
||||
"%ngql --address 127.0.0.1 --port 9669 --user root --password nebula\n",
|
||||
"# create a new space\n",
|
||||
"%ngql CREATE SPACE IF NOT EXISTS langchain(partition_num=1, replica_factor=1, vid_type=fixed_string(128));\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eda0809a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Wait for a few seconds for the space to be created.\n",
|
||||
"%ngql USE langchain;"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "119fe35c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create the schema, for full dataset, refer [here](https://www.siwei.io/en/nebulagraph-etl-dbt/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5aa796ee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%ngql\n",
|
||||
"CREATE TAG IF NOT EXISTS movie(name string);\n",
|
||||
"CREATE TAG IF NOT EXISTS person(name string, birthdate string);\n",
|
||||
"CREATE EDGE IF NOT EXISTS acted_in();\n",
|
||||
"CREATE TAG INDEX IF NOT EXISTS person_index ON person(name(128));\n",
|
||||
"CREATE TAG INDEX IF NOT EXISTS movie_index ON movie(name(128));"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "66e4799a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Wait for schema creation to complete, then we can insert some data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "d8eea530",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"UsageError: Cell magic `%%ngql` not found.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%ngql\n",
|
||||
"INSERT VERTEX person(name, birthdate) VALUES \"Al Pacino\":(\"Al Pacino\", \"1940-04-25\");\n",
|
||||
"INSERT VERTEX movie(name) VALUES \"The Godfather II\":(\"The Godfather II\");\n",
|
||||
"INSERT VERTEX movie(name) VALUES \"The Godfather Coda: The Death of Michael Corleone\":(\"The Godfather Coda: The Death of Michael Corleone\");\n",
|
||||
"INSERT EDGE acted_in() VALUES \"Al Pacino\"->\"The Godfather II\":();\n",
|
||||
"INSERT EDGE acted_in() VALUES \"Al Pacino\"->\"The Godfather Coda: The Death of Michael Corleone\":();"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "62812aad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.chains import NebulaGraphQAChain\n",
|
||||
"from langchain.graphs import NebulaGraph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "0928915d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"graph = NebulaGraph(\n",
|
||||
" space=\"langchain\",\n",
|
||||
" username=\"root\",\n",
|
||||
" password=\"nebula\",\n",
|
||||
" address=\"127.0.0.1\",\n",
|
||||
" port=9669,\n",
|
||||
" session_pool_size=30,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "58c1a8ea",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Refresh graph schema information\n",
|
||||
"\n",
|
||||
"If the schema of database changes, you can refresh the schema information needed to generate nGQL statements."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4e3de44f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# graph.refresh_schema()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "1fe76ccd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Node properties: [{'tag': 'movie', 'properties': [('name', 'string')]}, {'tag': 'person', 'properties': [('name', 'string'), ('birthdate', 'string')]}]\n",
|
||||
"Edge properties: [{'edge': 'acted_in', 'properties': []}]\n",
|
||||
"Relationships: ['(:person)-[:acted_in]->(:movie)']\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(graph.get_schema)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "68a3c677",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Querying the graph\n",
|
||||
"\n",
|
||||
"We can now use the graph cypher QA chain to ask question of the graph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "7476ce98",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = NebulaGraphQAChain.from_llm(\n",
|
||||
" ChatOpenAI(temperature=0), graph=graph, verbose=True\n",
|
||||
")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "ef8ee27b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new NebulaGraphQAChain chain...\u001b[0m\n",
|
||||
"Generated nGQL:\n",
|
||||
"\u001b[32;1m\u001b[1;3mMATCH (p:`person`)-[:acted_in]->(m:`movie`) WHERE m.`movie`.`name` == 'The Godfather II'\n",
|
||||
"RETURN p.`person`.`name`\u001b[0m\n",
|
||||
"Full Context:\n",
|
||||
"\u001b[32;1m\u001b[1;3m{'p.person.name': ['Al Pacino']}\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Al Pacino played in The Godfather II.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run(\"Who played in The Godfather II?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -350,7 +350,7 @@
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"reduce_template_string = \"\"\"Give the following following python fuctions name and their descritpion, answer the following question\n",
|
||||
"reduce_template_string = \"\"\"Give the following python fuctions name and their descritpion, answer the following question\n",
|
||||
"{code_description}\n",
|
||||
"Question: {question}\n",
|
||||
"Answer:\n",
|
||||
|
||||
@@ -30,6 +30,7 @@ For detailed instructions on how to get set up with Unstructured, see installati
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
./document_loaders/examples/airtable.ipynb
|
||||
./document_loaders/examples/audio.ipynb
|
||||
./document_loaders/examples/conll-u.ipynb
|
||||
./document_loaders/examples/copypaste.ipynb
|
||||
@@ -37,6 +38,7 @@ For detailed instructions on how to get set up with Unstructured, see installati
|
||||
./document_loaders/examples/email.ipynb
|
||||
./document_loaders/examples/epub.ipynb
|
||||
./document_loaders/examples/evernote.ipynb
|
||||
./document_loaders/examples/excel.ipynb
|
||||
./document_loaders/examples/facebook_chat.ipynb
|
||||
./document_loaders/examples/file_directory.ipynb
|
||||
./document_loaders/examples/html.ipynb
|
||||
@@ -115,6 +117,7 @@ We need access tokens and sometime other parameters to get access to these datas
|
||||
./document_loaders/examples/discord_loader.ipynb
|
||||
./document_loaders/examples/docugami.ipynb
|
||||
./document_loaders/examples/duckdb.ipynb
|
||||
./document_loaders/examples/fauna.ipynb
|
||||
./document_loaders/examples/figma.ipynb
|
||||
./document_loaders/examples/gitbook.ipynb
|
||||
./document_loaders/examples/git.ipynb
|
||||
@@ -136,6 +139,7 @@ We need access tokens and sometime other parameters to get access to these datas
|
||||
./document_loaders/examples/reddit.ipynb
|
||||
./document_loaders/examples/roam.ipynb
|
||||
./document_loaders/examples/slack.ipynb
|
||||
./document_loaders/examples/snowflake.ipynb
|
||||
./document_loaders/examples/spreedly.ipynb
|
||||
./document_loaders/examples/stripe.ipynb
|
||||
./document_loaders/examples/tomarkdown.ipynb
|
||||
|
||||
142
docs/modules/indexes/document_loaders/examples/airtable.ipynb
Normal file
142
docs/modules/indexes/document_loaders/examples/airtable.ipynb
Normal file
@@ -0,0 +1,142 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7ae421e6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Airtable"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "98aea00d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install pyairtable"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "592483eb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import AirtableLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "637e1205",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* Get your API key [here](https://support.airtable.com/docs/creating-and-using-api-keys-and-access-tokens).\n",
|
||||
"* Get ID of your base [here](https://airtable.com/developers/web/api/introduction).\n",
|
||||
"* Get your table ID from the table url as shown [here](https://www.highviewapps.com/kb/where-can-i-find-the-airtable-base-id-and-table-id/#:~:text=Both%20the%20Airtable%20Base%20ID,URL%20that%20begins%20with%20tbl)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c12a7aff",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"api_key=\"xxx\"\n",
|
||||
"base_id=\"xxx\"\n",
|
||||
"table_id=\"xxx\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "ccddd5a6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = AirtableLoader(api_key,table_id,base_id)\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae76c25c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Returns each table row as `dict`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "7abec7ce",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"3"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "403c95da",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'id': 'recF3GbGZCuh9sXIQ',\n",
|
||||
" 'createdTime': '2023-06-09T04:47:21.000Z',\n",
|
||||
" 'fields': {'Priority': 'High',\n",
|
||||
" 'Status': 'In progress',\n",
|
||||
" 'Name': 'Document Splitters'}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"eval(docs[0].page_content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -29,7 +29,6 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
@@ -45,7 +44,6 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
@@ -76,7 +74,6 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
@@ -96,7 +93,6 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
@@ -152,6 +148,211 @@
|
||||
"source": [
|
||||
"print(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## `UnstructuredCSVLoader`\n",
|
||||
"\n",
|
||||
"You can also load the table using the `UnstructuredCSVLoader`. One advantage of using `UnstructuredCSVLoader` is that if you use it in `\"elements\"` mode, an HTML representation of the table will be available in the metadata."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.csv_loader import UnstructuredCSVLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredCSVLoader(file_path='example_data/mlb_teams_2012.csv', mode=\"elements\")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <td>Nationals</td>\n",
|
||||
" <td>81.34</td>\n",
|
||||
" <td>98</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Reds</td>\n",
|
||||
" <td>82.20</td>\n",
|
||||
" <td>97</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Yankees</td>\n",
|
||||
" <td>197.96</td>\n",
|
||||
" <td>95</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Giants</td>\n",
|
||||
" <td>117.62</td>\n",
|
||||
" <td>94</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Braves</td>\n",
|
||||
" <td>83.31</td>\n",
|
||||
" <td>94</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Athletics</td>\n",
|
||||
" <td>55.37</td>\n",
|
||||
" <td>94</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Rangers</td>\n",
|
||||
" <td>120.51</td>\n",
|
||||
" <td>93</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Orioles</td>\n",
|
||||
" <td>81.43</td>\n",
|
||||
" <td>93</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Rays</td>\n",
|
||||
" <td>64.17</td>\n",
|
||||
" <td>90</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Angels</td>\n",
|
||||
" <td>154.49</td>\n",
|
||||
" <td>89</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Tigers</td>\n",
|
||||
" <td>132.30</td>\n",
|
||||
" <td>88</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Cardinals</td>\n",
|
||||
" <td>110.30</td>\n",
|
||||
" <td>88</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Dodgers</td>\n",
|
||||
" <td>95.14</td>\n",
|
||||
" <td>86</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>White Sox</td>\n",
|
||||
" <td>96.92</td>\n",
|
||||
" <td>85</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Brewers</td>\n",
|
||||
" <td>97.65</td>\n",
|
||||
" <td>83</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Phillies</td>\n",
|
||||
" <td>174.54</td>\n",
|
||||
" <td>81</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Diamondbacks</td>\n",
|
||||
" <td>74.28</td>\n",
|
||||
" <td>81</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Pirates</td>\n",
|
||||
" <td>63.43</td>\n",
|
||||
" <td>79</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Padres</td>\n",
|
||||
" <td>55.24</td>\n",
|
||||
" <td>76</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Mariners</td>\n",
|
||||
" <td>81.97</td>\n",
|
||||
" <td>75</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Mets</td>\n",
|
||||
" <td>93.35</td>\n",
|
||||
" <td>74</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Blue Jays</td>\n",
|
||||
" <td>75.48</td>\n",
|
||||
" <td>73</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Royals</td>\n",
|
||||
" <td>60.91</td>\n",
|
||||
" <td>72</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Marlins</td>\n",
|
||||
" <td>118.07</td>\n",
|
||||
" <td>69</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Red Sox</td>\n",
|
||||
" <td>173.18</td>\n",
|
||||
" <td>69</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Indians</td>\n",
|
||||
" <td>78.43</td>\n",
|
||||
" <td>68</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Twins</td>\n",
|
||||
" <td>94.08</td>\n",
|
||||
" <td>66</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Rockies</td>\n",
|
||||
" <td>78.06</td>\n",
|
||||
" <td>64</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Cubs</td>\n",
|
||||
" <td>88.19</td>\n",
|
||||
" <td>61</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <td>Astros</td>\n",
|
||||
" <td>60.65</td>\n",
|
||||
" <td>55</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata[\"text_as_html\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -170,7 +371,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
167
docs/modules/indexes/document_loaders/examples/embaas.ipynb
Normal file
167
docs/modules/indexes/document_loaders/examples/embaas.ipynb
Normal file
@@ -0,0 +1,167 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Embaas\n",
|
||||
"[embaas](https://embaas.io) is a fully managed NLP API service that offers features like embedding generation, document text extraction, document to embeddings and more. You can choose a [variety of pre-trained models](https://embaas.io/docs/models/embeddings).\n",
|
||||
"\n",
|
||||
"### Prerequisites\n",
|
||||
"Create a free embaas account at [https://embaas.io/register](https://embaas.io/register) and generate an [API key](https://embaas.io/dashboard/api-keys)\n",
|
||||
"\n",
|
||||
"### Document Text Extraction API\n",
|
||||
"The document text extraction API allows you to extract the text from a given document. The API supports a variety of document formats, including PDF, mp3, mp4 and more. For a full list of supported formats, check out the API docs (link below)."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set API key\n",
|
||||
"embaas_api_key = \"YOUR_API_KEY\"\n",
|
||||
"# or set environment variable\n",
|
||||
"os.environ[\"EMBAAS_API_KEY\"] = \"YOUR_API_KEY\""
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"#### Using a blob (bytes)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.embaas import EmbaasBlobLoader\n",
|
||||
"from langchain.document_loaders.blob_loaders import Blob"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"blob_loader = EmbaasBlobLoader()\n",
|
||||
"blob = Blob.from_path(\"example.pdf\")\n",
|
||||
"documents = blob_loader.load(blob)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You can also directly create embeddings with your preferred embeddings model\n",
|
||||
"blob_loader = EmbaasBlobLoader(params={\"model\": \"e5-large-v2\", \"should_embed\": True})\n",
|
||||
"blob = Blob.from_path(\"example.pdf\")\n",
|
||||
"documents = blob_loader.load(blob)\n",
|
||||
"\n",
|
||||
"print(documents[0][\"metadata\"][\"embedding\"])"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"start_time": "2023-06-12T22:19:48.366886Z",
|
||||
"end_time": "2023-06-12T22:19:48.380467Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"#### Using a file"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.embaas import EmbaasLoader"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"file_loader = EmbaasLoader(file_path=\"example.pdf\")\n",
|
||||
"documents = file_loader.load()"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Disable automatic text splitting\n",
|
||||
"file_loader = EmbaasLoader(file_path=\"example.mp3\", params={\"should_chunk\": False})\n",
|
||||
"documents = file_loader.load()"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"start_time": "2023-06-12T22:24:31.880857Z",
|
||||
"end_time": "2023-06-12T22:24:31.894665Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"For more detailed information about the embaas document text extraction API, please refer to [the official embaas API documentation](https://embaas.io/api-reference)."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
@@ -9,7 +9,7 @@
|
||||
"\n",
|
||||
">[EPUB](https://en.wikipedia.org/wiki/EPUB) is an e-book file format that uses the \".epub\" file extension. The term is short for electronic publication and is sometimes styled ePub. `EPUB` is supported by many e-readers, and compatible software is available for most smartphones, tablets, and computers.\n",
|
||||
"\n",
|
||||
"This covers how to load `.epub` documents into the Document format that we can use downstream. You'll need to install the [`pandocs`](https://pandoc.org/installing.html) package for this loader to work."
|
||||
"This covers how to load `.epub` documents into the Document format that we can use downstream. You'll need to install the [`pandoc`](https://pandoc.org/installing.html) package for this loader to work."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -19,7 +19,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install pandocs"
|
||||
"#!pip install pandoc"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<factbook>
|
||||
<country>
|
||||
<name>United States</name>
|
||||
<capital>Washington, DC</capital>
|
||||
<leader>Joe Biden</leader>
|
||||
<sport>Baseball</sport>
|
||||
</country>
|
||||
<country>
|
||||
<name>Canada</name>
|
||||
<capital>Ottawa</capital>
|
||||
<leader>Justin Trudeau</leader>
|
||||
<sport>Hockey</sport>
|
||||
</country>
|
||||
<country>
|
||||
<name>France</name>
|
||||
<capital>Paris</capital>
|
||||
<leader>Emmanuel Macron</leader>
|
||||
<sport>Soccer</sport>
|
||||
</country>
|
||||
<country>
|
||||
<name>Trinidad & Tobado</name>
|
||||
<capital>Port of Spain</capital>
|
||||
<leader>Keith Rowley</leader>
|
||||
<sport>Track & Field</sport>
|
||||
</country>
|
||||
</factbook>
|
||||
84
docs/modules/indexes/document_loaders/examples/fauna.ipynb
Normal file
84
docs/modules/indexes/document_loaders/examples/fauna.ipynb
Normal file
@@ -0,0 +1,84 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Fauna\n",
|
||||
"\n",
|
||||
">[Fauna](https://fauna.com/) is a Document Database.\n",
|
||||
"\n",
|
||||
"Query `Fauna` documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install fauna"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query data example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.fauna import FaunaLoader\n",
|
||||
"\n",
|
||||
"secret = \"<enter-valid-fauna-secret>\"\n",
|
||||
"query = \"Item.all()\" # Fauna query. Assumes that the collection is called \"Item\"\n",
|
||||
"field = \"text\" # The field that contains the page content. Assumes that the field is called \"text\"\n",
|
||||
"\n",
|
||||
"loader = FaunaLoader(query, field, secret)\n",
|
||||
"docs = loader.lazy_load()\n",
|
||||
"\n",
|
||||
"for value in docs:\n",
|
||||
" print(value)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Query with Pagination\n",
|
||||
"You get a `after` value if there are more data. You can get values after the curcor by passing in the `after` string in query. \n",
|
||||
"\n",
|
||||
"To learn more following [this link](https://fqlx-beta--fauna-docs.netlify.app/fqlx/beta/reference/schema_entities/set/static-paginate)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"\"\"\n",
|
||||
"Item.paginate(\"hs+DzoPOg ... aY1hOohozrV7A\")\n",
|
||||
"Item.all()\n",
|
||||
"\"\"\"\n",
|
||||
"loader = FaunaLoader(query, field, secret)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -22,6 +22,16 @@
|
||||
"Load .docx using `Docx2txt` into a document."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7b80ea891",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install docx2txt "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!wget -r -A.html -P rtdocs https://langchain.readthedocs.io/en/latest/"
|
||||
"#!wget -r -A.html -P rtdocs https://python.langchain.com/en/latest/"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -146,6 +146,73 @@
|
||||
"documents[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Add custom scraping rules\n",
|
||||
"\n",
|
||||
"The `SitemapLoader` uses `beautifulsoup4` for the scraping process, and it scrapes every element on the page by default. The `SitemapLoader` constructor accepts a custom scraping function. This feature can be helpful to tailor the scraping process to your specific needs; for example, you might want to avoid scraping headers or navigation elements.\n",
|
||||
"\n",
|
||||
" The following example shows how to develop and use a custom function to avoid navigation and header elements."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Import the `beautifulsoup4` library and define the custom function."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip install beautifulsoup4"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"\n",
|
||||
"def remove_nav_and_header_elements(content: BeautifulSoup) -> str:\n",
|
||||
" # Find all 'nav' and 'header' elements in the BeautifulSoup object\n",
|
||||
" nav_elements = content.find_all('nav')\n",
|
||||
" header_elements = content.find_all('header')\n",
|
||||
"\n",
|
||||
" # Remove each 'nav' and 'header' element from the BeautifulSoup object\n",
|
||||
" for element in nav_elements + header_elements:\n",
|
||||
" element.decompose()\n",
|
||||
"\n",
|
||||
" return str(content.get_text())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Add your custom function to the `SitemapLoader` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = SitemapLoader(\n",
|
||||
" \"https://langchain.readthedocs.io/sitemap.xml\",\n",
|
||||
" filter_urls=[\"https://python.langchain.com/en/latest/\"],\n",
|
||||
" parsing_function=remove_nav_and_header_elements\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Snowflake\n",
|
||||
"\n",
|
||||
"This notebooks goes over how to load documents from Snowflake"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install snowflake-connector-python"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import settings as s\n",
|
||||
"from langchain.document_loaders import SnowflakeLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"QUERY = \"select text, survey_id from CLOUD_DATA_SOLUTIONS.HAPPY_OR_NOT.OPEN_FEEDBACK limit 10\"\n",
|
||||
"snowflake_loader = SnowflakeLoader(\n",
|
||||
" query=QUERY,\n",
|
||||
" user=s.SNOWFLAKE_USER,\n",
|
||||
" password=s.SNOWFLAKE_PASS,\n",
|
||||
" account=s.SNOWFLAKE_ACCOUNT,\n",
|
||||
" warehouse=s.SNOWFLAKE_WAREHOUSE,\n",
|
||||
" role=s.SNOWFLAKE_ROLE,\n",
|
||||
" database=s.SNOWFLAKE_DATABASE,\n",
|
||||
" schema=s.SNOWFLAKE_SCHEMA\n",
|
||||
")\n",
|
||||
"snowflake_documents = snowflake_loader.load()\n",
|
||||
"print(snowflake_documents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from snowflakeLoader import SnowflakeLoader\n",
|
||||
"import settings as s\n",
|
||||
"QUERY = \"select text, survey_id as source from CLOUD_DATA_SOLUTIONS.HAPPY_OR_NOT.OPEN_FEEDBACK limit 10\"\n",
|
||||
"snowflake_loader = SnowflakeLoader(\n",
|
||||
" query=QUERY,\n",
|
||||
" user=s.SNOWFLAKE_USER,\n",
|
||||
" password=s.SNOWFLAKE_PASS,\n",
|
||||
" account=s.SNOWFLAKE_ACCOUNT,\n",
|
||||
" warehouse=s.SNOWFLAKE_WAREHOUSE,\n",
|
||||
" role=s.SNOWFLAKE_ROLE,\n",
|
||||
" database=s.SNOWFLAKE_DATABASE,\n",
|
||||
" schema=s.SNOWFLAKE_SCHEMA,\n",
|
||||
" metadata_columns=['source']\n",
|
||||
")\n",
|
||||
"snowflake_documents = snowflake_loader.load()\n",
|
||||
"print(snowflake_documents)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
78
docs/modules/indexes/document_loaders/examples/xml.ipynb
Normal file
78
docs/modules/indexes/document_loaders/examples/xml.ipynb
Normal file
@@ -0,0 +1,78 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "22a849cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# XML\n",
|
||||
"\n",
|
||||
"The `UnstructuredXMLLoader` is used to load `XML` files. The loader works with `.xml` files. The page content will be the text extracted from the XML tags."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e6616e3a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import UnstructuredXMLLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a654e4d9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='United States\\n\\nWashington, DC\\n\\nJoe Biden\\n\\nBaseball\\n\\nCanada\\n\\nOttawa\\n\\nJustin Trudeau\\n\\nHockey\\n\\nFrance\\n\\nParis\\n\\nEmmanuel Macron\\n\\nSoccer\\n\\nTrinidad & Tobado\\n\\nPort of Spain\\n\\nKeith Rowley\\n\\nTrack & Field', metadata={'source': 'example_data/factbook.xml'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = UnstructuredXMLLoader(\n",
|
||||
" \"example_data/factbook.xml\",\n",
|
||||
")\n",
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a54342bb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,296 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e48afb8d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Loading documents from a YouTube url\n",
|
||||
"\n",
|
||||
"Building chat or QA applications on YouTube videos is a topic of high interest.\n",
|
||||
"\n",
|
||||
"Below we show how to easily go from a YouTube url to text to chat!\n",
|
||||
"\n",
|
||||
"We wil use the `OpenAIWhisperParser`, which will use the OpenAI Whisper API to transcribe audio to text.\n",
|
||||
"\n",
|
||||
"Note: You will need to have an `OPENAI_API_KEY` supplied."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "5f34e934",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.generic import GenericLoader\n",
|
||||
"from langchain.document_loaders.parsers import OpenAIWhisperParser\n",
|
||||
"from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "85fc12bd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will use `yt_dlp` to download audio for YouTube urls.\n",
|
||||
"\n",
|
||||
"We will use `pydub` to split downloaded audio files (such that we adhere to Whisper API's 25MB file size limit)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fb5a6606",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install yt_dlp\n",
|
||||
"! pip install pydub"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b0e119f4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### YouTube url to text\n",
|
||||
"\n",
|
||||
"Use `YoutubeAudioLoader` to fetch / download the audio files.\n",
|
||||
"\n",
|
||||
"Then, ues `OpenAIWhisperParser()` to transcribe them to text.\n",
|
||||
"\n",
|
||||
"Let's take the first lecture of Andrej Karpathy's YouTube course as an example! "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "23e1e134",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[youtube] Extracting URL: https://youtu.be/kCc8FmEb1nY\n",
|
||||
"[youtube] kCc8FmEb1nY: Downloading webpage\n",
|
||||
"[youtube] kCc8FmEb1nY: Downloading android player API JSON\n",
|
||||
"[info] kCc8FmEb1nY: Downloading 1 format(s): 140\n",
|
||||
"[dashsegments] Total fragments: 11\n",
|
||||
"[download] Destination: /Users/31treehaus/Desktop/AI/langchain-fork/docs/modules/indexes/document_loaders/examples/Let's build GPT: from scratch, in code, spelled out..m4a\n",
|
||||
"[download] 100% of 107.73MiB in 00:00:18 at 5.92MiB/s \n",
|
||||
"[FixupM4a] Correcting container of \"/Users/31treehaus/Desktop/AI/langchain-fork/docs/modules/indexes/document_loaders/examples/Let's build GPT: from scratch, in code, spelled out..m4a\"\n",
|
||||
"[ExtractAudio] Not converting audio /Users/31treehaus/Desktop/AI/langchain-fork/docs/modules/indexes/document_loaders/examples/Let's build GPT: from scratch, in code, spelled out..m4a; file is already in target format m4a\n",
|
||||
"[youtube] Extracting URL: https://youtu.be/VMj-3S1tku0\n",
|
||||
"[youtube] VMj-3S1tku0: Downloading webpage\n",
|
||||
"[youtube] VMj-3S1tku0: Downloading android player API JSON\n",
|
||||
"[info] VMj-3S1tku0: Downloading 1 format(s): 140\n",
|
||||
"[download] /Users/31treehaus/Desktop/AI/langchain-fork/docs/modules/indexes/document_loaders/examples/The spelled-out intro to neural networks and backpropagation: building micrograd.m4a has already been downloaded\n",
|
||||
"[download] 100% of 134.98MiB\n",
|
||||
"[ExtractAudio] Not converting audio /Users/31treehaus/Desktop/AI/langchain-fork/docs/modules/indexes/document_loaders/examples/The spelled-out intro to neural networks and backpropagation: building micrograd.m4a; file is already in target format m4a\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Two Karpathy lecture videos\n",
|
||||
"urls = [\"https://youtu.be/kCc8FmEb1nY\",\n",
|
||||
" \"https://youtu.be/VMj-3S1tku0\"]\n",
|
||||
"\n",
|
||||
"# Directory to save audio files \n",
|
||||
"save_dir = \"~/Downloads/YouTube\"\n",
|
||||
"\n",
|
||||
"# Transcribe the videos to text\n",
|
||||
"loader = GenericLoader(YoutubeAudioLoader(urls,save_dir),OpenAIWhisperParser())\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "72a94fd8",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Hello, my name is Andrej and I've been training deep neural networks for a bit more than a decade. And in this lecture I'd like to show you what neural network training looks like under the hood. So in particular we are going to start with a blank Jupyter notebook and by the end of this lecture we will define and train a neural net and you'll get to see everything that goes on under the hood and exactly sort of how that works on an intuitive level. Now specifically what I would like to do is I w\""
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Returns a list of Documents, which can be easily viewed or parsed\n",
|
||||
"docs[0].page_content[0:500]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "93be6b49",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Building a chat app from YouTube video\n",
|
||||
"\n",
|
||||
"Given `Documents`, we can easily enable chat / question+answering."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "1823f042",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.vectorstores import FAISS\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "7257cda1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Combine doc\n",
|
||||
"combined_docs = [doc.page_content for doc in docs]\n",
|
||||
"text = \" \".join(combined_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "147c0c55",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Split them\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500, chunk_overlap = 150)\n",
|
||||
"splits = text_splitter.split_text(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "f3556703",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Build an index\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"vectordb = FAISS.from_texts(splits,embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "beaa99db",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Build a QA chain\n",
|
||||
"qa_chain = RetrievalQA.from_chain_type(llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0),\n",
|
||||
" chain_type=\"stuff\",\n",
|
||||
" retriever=vectordb.as_retriever())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "f2239a62",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"We need to zero out the gradient before backprop at each step because the backward pass accumulates gradients in the grad attribute of each parameter. If we don't reset the grad to zero before each backward pass, the gradients will accumulate and add up, leading to incorrect updates and slower convergence. By resetting the grad to zero before each backward pass, we ensure that the gradients are calculated correctly and that the optimization process works as intended.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Ask a question!\n",
|
||||
"query = \"Why do we need to zero out the gradient before backprop at each step?\"\n",
|
||||
"qa_chain.run(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "a8d01098",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'In the context of transformers, an encoder is a component that reads in a sequence of input tokens and generates a sequence of hidden representations. On the other hand, a decoder is a component that takes in a sequence of hidden representations and generates a sequence of output tokens. The main difference between the two is that the encoder is used to encode the input sequence into a fixed-length representation, while the decoder is used to decode the fixed-length representation into an output sequence. In machine translation, for example, the encoder reads in the source language sentence and generates a fixed-length representation, which is then used by the decoder to generate the target language sentence.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What is the difference between an encoder and decoder?\"\n",
|
||||
"qa_chain.run(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "fe1e77dd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'For any token, x is the input vector that contains the private information of that token, k and q are the key and query vectors respectively, which are produced by forwarding linear modules on x, and v is the vector that is calculated by propagating the same linear module on x again. The key vector represents what the token contains, and the query vector represents what the token is looking for. The vector v is the information that the token will communicate to other tokens if it finds them interesting, and it gets aggregated for the purposes of the self-attention mechanism.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"For any token, what are x, k, v, and q?\"\n",
|
||||
"qa_chain.run(query)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AWS Kendra\n",
|
||||
"\n",
|
||||
"> AWS Kendra is an intelligent search service provided by Amazon Web Services (AWS). It utilizes advanced natural language processing (NLP) and machine learning algorithms to enable powerful search capabilities across various data sources within an organization. Kendra is designed to help users find the information they need quickly and accurately, improving productivity and decision-making.\n",
|
||||
"\n",
|
||||
"> With Kendra, users can search across a wide range of content types, including documents, FAQs, knowledge bases, manuals, and websites. It supports multiple languages and can understand complex queries, synonyms, and contextual meanings to provide highly relevant search results."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using the AWS Kendra Index Retriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install boto3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import boto3\n",
|
||||
"from langchain.retrievers import AwsKendraIndexRetriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create New Retriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"kclient = boto3.client('kendra', region_name=\"us-east-1\")\n",
|
||||
"\n",
|
||||
"retriever = AwsKendraIndexRetriever(\n",
|
||||
" kclient=kclient,\n",
|
||||
" kendraindex=\"kendraindex\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now you can use retrieved documents from AWS Kendra Index"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever.get_relevant_documents(\"what is langchain\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
121
docs/modules/indexes/retrievers/examples/merger_retriever.ipynb
Normal file
121
docs/modules/indexes/retrievers/examples/merger_retriever.ipynb
Normal file
@@ -0,0 +1,121 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "fc0db1bc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LOTR (Merger Retriever)\n",
|
||||
"\n",
|
||||
"Lord of the Retrievers, also known as MergerRetriever, takes a list of retrievers as input and merges the results of their get_relevant_documents() methods into a single list. The merged results will be a list of documents that are relevant to the query and that have been ranked by the different retrievers.\n",
|
||||
"\n",
|
||||
"The MergerRetriever class can be used to improve the accuracy of document retrieval in a number of ways. First, it can combine the results of multiple retrievers, which can help to reduce the risk of bias in the results. Second, it can rank the results of the different retrievers, which can help to ensure that the most relevant documents are returned first."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9fbcc58f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import chromadb\n",
|
||||
"from langchain.retrievers.merger_retriever import MergerRetriever\n",
|
||||
"from langchain.vectorstores import Chroma\n",
|
||||
"from langchain.embeddings import HuggingFaceEmbeddings\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.document_transformers import EmbeddingsRedundantFilter\n",
|
||||
"from langchain.retrievers.document_compressors import DocumentCompressorPipeline\n",
|
||||
"from langchain.retrievers import ContextualCompressionRetriever\n",
|
||||
"\n",
|
||||
"# Get 3 diff embeddings.\n",
|
||||
"all_mini = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n",
|
||||
"multi_qa_mini = HuggingFaceEmbeddings(model_name=\"multi-qa-MiniLM-L6-dot-v1\")\n",
|
||||
"filter_embeddings = OpenAIEmbeddings()\n",
|
||||
"\n",
|
||||
"ABS_PATH = os.path.dirname(os.path.abspath(__file__))\n",
|
||||
"DB_DIR = os.path.join(ABS_PATH, \"db\")\n",
|
||||
"\n",
|
||||
"# Instantiate 2 diff cromadb indexs, each one with a diff embedding.\n",
|
||||
"client_settings = chromadb.config.Settings(\n",
|
||||
" chroma_db_impl=\"duckdb+parquet\",\n",
|
||||
" persist_directory=DB_DIR,\n",
|
||||
" anonymized_telemetry=False,\n",
|
||||
")\n",
|
||||
"db_all = Chroma(\n",
|
||||
" collection_name=\"project_store_all\",\n",
|
||||
" persist_directory=DB_DIR,\n",
|
||||
" client_settings=client_settings,\n",
|
||||
" embedding_function=all_mini,\n",
|
||||
")\n",
|
||||
"db_multi_qa = Chroma(\n",
|
||||
" collection_name=\"project_store_multi\",\n",
|
||||
" persist_directory=DB_DIR,\n",
|
||||
" client_settings=client_settings,\n",
|
||||
" embedding_function=multi_qa_mini,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Define 2 diff retrievers with 2 diff embeddings and diff search type.\n",
|
||||
"retriever_all = db_all.as_retriever(\n",
|
||||
" search_type=\"similarity\", search_kwargs={\"k\": 5, \"include_metadata\": True}\n",
|
||||
")\n",
|
||||
"retriever_multi_qa = db_multi_qa.as_retriever(\n",
|
||||
" search_type=\"mmr\", search_kwargs={\"k\": 5, \"include_metadata\": True}\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# The Lord of the Retrievers will hold the ouput of boths retrievers and can be used as any other \n",
|
||||
"# retriever on different types of chains.\n",
|
||||
"lotr = MergerRetriever(retrievers=[retriever_all, retriever_multi_qa])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "c152339d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Remove redundant results from the merged retrievers."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "039faea6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"# We can remove redundant results from both retrievers using yet another embedding. \n",
|
||||
"# Using multiples embeddings in diff steps could help reduce biases.\n",
|
||||
"filter = EmbeddingsRedundantFilter(embeddings=filter_embeddings)\n",
|
||||
"pipeline = DocumentCompressorPipeline(transformers=[filter])\n",
|
||||
"compression_retriever = ContextualCompressionRetriever(\n",
|
||||
" base_compressor=pipeline, base_retriever=lotr\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -99,13 +99,14 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "2d958271",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Similarity Score Threshold Retrieval\n",
|
||||
"\n",
|
||||
"You can also a retrieval method that sets a similarity score threshold and only returns documents with a score above that threshold"
|
||||
"You can also use a retrieval method that sets a similarity score threshold and only returns documents with a score above that threshold"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,413 +1,457 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# CodeTextSplitter\n",
|
||||
"\n",
|
||||
"CodeTextSplitter allows you to split your code with multiple language support. Import enum `Language` and specify the language. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import (\n",
|
||||
" RecursiveCharacterTextSplitter,\n",
|
||||
" Language,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['cpp',\n",
|
||||
" 'go',\n",
|
||||
" 'java',\n",
|
||||
" 'js',\n",
|
||||
" 'php',\n",
|
||||
" 'proto',\n",
|
||||
" 'python',\n",
|
||||
" 'rst',\n",
|
||||
" 'ruby',\n",
|
||||
" 'rust',\n",
|
||||
" 'scala',\n",
|
||||
" 'swift',\n",
|
||||
" 'markdown',\n",
|
||||
" 'latex',\n",
|
||||
" 'html']"
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# CodeTextSplitter\n",
|
||||
"\n",
|
||||
"CodeTextSplitter allows you to split your code with multiple language support. Import enum `Language` and specify the language. "
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Full list of support languages\n",
|
||||
"[e.value for e in Language]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['\\nclass ', '\\ndef ', '\\n\\tdef ', '\\n\\n', '\\n', ' ', '']"
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import (\n",
|
||||
" RecursiveCharacterTextSplitter,\n",
|
||||
" Language,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# You can also see the separators used for a given language\n",
|
||||
"RecursiveCharacterTextSplitter.get_separators_for_language(Language.PYTHON)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Python\n",
|
||||
"\n",
|
||||
"Here's an example using the PythonTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='def hello_world():\\n print(\"Hello, World!\")', metadata={}),\n",
|
||||
" Document(page_content='# Call the function\\nhello_world()', metadata={})]"
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['cpp',\n",
|
||||
" 'go',\n",
|
||||
" 'java',\n",
|
||||
" 'js',\n",
|
||||
" 'php',\n",
|
||||
" 'proto',\n",
|
||||
" 'python',\n",
|
||||
" 'rst',\n",
|
||||
" 'ruby',\n",
|
||||
" 'rust',\n",
|
||||
" 'scala',\n",
|
||||
" 'swift',\n",
|
||||
" 'markdown',\n",
|
||||
" 'latex',\n",
|
||||
" 'html',\n",
|
||||
" 'sol']"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Full list of support languages\n",
|
||||
"[e.value for e in Language]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"PYTHON_CODE = \"\"\"\n",
|
||||
"def hello_world():\n",
|
||||
" print(\"Hello, World!\")\n",
|
||||
"\n",
|
||||
"# Call the function\n",
|
||||
"hello_world()\n",
|
||||
"\"\"\"\n",
|
||||
"python_splitter = RecursiveCharacterTextSplitter.from_language(\n",
|
||||
" language=Language.PYTHON, chunk_size=50, chunk_overlap=0\n",
|
||||
")\n",
|
||||
"python_docs = python_splitter.create_documents([PYTHON_CODE])\n",
|
||||
"python_docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## JS\n",
|
||||
"Here's an example using the JS text splitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='function helloWorld() {\\n console.log(\"Hello, World!\");\\n}', metadata={}),\n",
|
||||
" Document(page_content='// Call the function\\nhelloWorld();', metadata={})]"
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['\\nclass ', '\\ndef ', '\\n\\tdef ', '\\n\\n', '\\n', ' ', '']"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# You can also see the separators used for a given language\n",
|
||||
"RecursiveCharacterTextSplitter.get_separators_for_language(Language.PYTHON)"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"JS_CODE = \"\"\"\n",
|
||||
"function helloWorld() {\n",
|
||||
" console.log(\"Hello, World!\");\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"// Call the function\n",
|
||||
"helloWorld();\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"js_splitter = RecursiveCharacterTextSplitter.from_language(\n",
|
||||
" language=Language.JS, chunk_size=60, chunk_overlap=0\n",
|
||||
")\n",
|
||||
"js_docs = js_splitter.create_documents([JS_CODE])\n",
|
||||
"js_docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Markdown\n",
|
||||
"\n",
|
||||
"Here's an example using the Markdown text splitter."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"markdown_text = \"\"\"\n",
|
||||
"# 🦜️🔗 LangChain\n",
|
||||
"\n",
|
||||
"⚡ Building applications with LLMs through composability ⚡\n",
|
||||
"\n",
|
||||
"## Quick Install\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"# Hopefully this code block isn't split\n",
|
||||
"pip install langchain\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"As an open source project in a rapidly developing field, we are extremely open to contributions.\n",
|
||||
"\"\"\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='# 🦜️🔗 LangChain', metadata={}),\n",
|
||||
" Document(page_content='⚡ Building applications with LLMs through composability ⚡', metadata={}),\n",
|
||||
" Document(page_content='## Quick Install', metadata={}),\n",
|
||||
" Document(page_content=\"```bash\\n# Hopefully this code block isn't split\", metadata={}),\n",
|
||||
" Document(page_content='pip install langchain', metadata={}),\n",
|
||||
" Document(page_content='```', metadata={}),\n",
|
||||
" Document(page_content='As an open source project in a rapidly developing field, we', metadata={}),\n",
|
||||
" Document(page_content='are extremely open to contributions.', metadata={})]"
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Python\n",
|
||||
"\n",
|
||||
"Here's an example using the PythonTextSplitter"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"md_splitter = RecursiveCharacterTextSplitter.from_language(\n",
|
||||
" language=Language.MARKDOWN, chunk_size=60, chunk_overlap=0\n",
|
||||
")\n",
|
||||
"md_docs = md_splitter.create_documents([markdown_text])\n",
|
||||
"md_docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Latex\n",
|
||||
"\n",
|
||||
"Here's an example on Latex text"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"latex_text = \"\"\"\n",
|
||||
"\\documentclass{article}\n",
|
||||
"\n",
|
||||
"\\begin{document}\n",
|
||||
"\n",
|
||||
"\\maketitle\n",
|
||||
"\n",
|
||||
"\\section{Introduction}\n",
|
||||
"Large language models (LLMs) are a type of machine learning model that can be trained on vast amounts of text data to generate human-like language. In recent years, LLMs have made significant advances in a variety of natural language processing tasks, including language translation, text generation, and sentiment analysis.\n",
|
||||
"\n",
|
||||
"\\subsection{History of LLMs}\n",
|
||||
"The earliest LLMs were developed in the 1980s and 1990s, but they were limited by the amount of data that could be processed and the computational power available at the time. In the past decade, however, advances in hardware and software have made it possible to train LLMs on massive datasets, leading to significant improvements in performance.\n",
|
||||
"\n",
|
||||
"\\subsection{Applications of LLMs}\n",
|
||||
"LLMs have many applications in industry, including chatbots, content creation, and virtual assistants. They can also be used in academia for research in linguistics, psychology, and computational linguistics.\n",
|
||||
"\n",
|
||||
"\\end{document}\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='\\\\documentclass{article}\\n\\n\\x08egin{document}\\n\\n\\\\maketitle', metadata={}),\n",
|
||||
" Document(page_content='\\\\section{Introduction}', metadata={}),\n",
|
||||
" Document(page_content='Large language models (LLMs) are a type of machine learning', metadata={}),\n",
|
||||
" Document(page_content='model that can be trained on vast amounts of text data to', metadata={}),\n",
|
||||
" Document(page_content='generate human-like language. In recent years, LLMs have', metadata={}),\n",
|
||||
" Document(page_content='made significant advances in a variety of natural language', metadata={}),\n",
|
||||
" Document(page_content='processing tasks, including language translation, text', metadata={}),\n",
|
||||
" Document(page_content='generation, and sentiment analysis.', metadata={}),\n",
|
||||
" Document(page_content='\\\\subsection{History of LLMs}', metadata={}),\n",
|
||||
" Document(page_content='The earliest LLMs were developed in the 1980s and 1990s,', metadata={}),\n",
|
||||
" Document(page_content='but they were limited by the amount of data that could be', metadata={}),\n",
|
||||
" Document(page_content='processed and the computational power available at the', metadata={}),\n",
|
||||
" Document(page_content='time. In the past decade, however, advances in hardware and', metadata={}),\n",
|
||||
" Document(page_content='software have made it possible to train LLMs on massive', metadata={}),\n",
|
||||
" Document(page_content='datasets, leading to significant improvements in', metadata={}),\n",
|
||||
" Document(page_content='performance.', metadata={}),\n",
|
||||
" Document(page_content='\\\\subsection{Applications of LLMs}', metadata={}),\n",
|
||||
" Document(page_content='LLMs have many applications in industry, including', metadata={}),\n",
|
||||
" Document(page_content='chatbots, content creation, and virtual assistants. They', metadata={}),\n",
|
||||
" Document(page_content='can also be used in academia for research in linguistics,', metadata={}),\n",
|
||||
" Document(page_content='psychology, and computational linguistics.', metadata={}),\n",
|
||||
" Document(page_content='\\\\end{document}', metadata={})]"
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='def hello_world():\\n print(\"Hello, World!\")', metadata={}),\n",
|
||||
" Document(page_content='# Call the function\\nhello_world()', metadata={})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"PYTHON_CODE = \"\"\"\n",
|
||||
"def hello_world():\n",
|
||||
" print(\"Hello, World!\")\n",
|
||||
"\n",
|
||||
"# Call the function\n",
|
||||
"hello_world()\n",
|
||||
"\"\"\"\n",
|
||||
"python_splitter = RecursiveCharacterTextSplitter.from_language(\n",
|
||||
" language=Language.PYTHON, chunk_size=50, chunk_overlap=0\n",
|
||||
")\n",
|
||||
"python_docs = python_splitter.create_documents([PYTHON_CODE])\n",
|
||||
"python_docs"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"latex_splitter = RecursiveCharacterTextSplitter.from_language(\n",
|
||||
" language=Language.MARKDOWN, chunk_size=60, chunk_overlap=0\n",
|
||||
")\n",
|
||||
"latex_docs = latex_splitter.create_documents([latex_text])\n",
|
||||
"latex_docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## HTML\n",
|
||||
"\n",
|
||||
"Here's an example using an HTML text splitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"html_text = \"\"\"\n",
|
||||
"<!DOCTYPE html>\n",
|
||||
"<html>\n",
|
||||
" <head>\n",
|
||||
" <title>🦜️🔗 LangChain</title>\n",
|
||||
" <style>\n",
|
||||
" body {\n",
|
||||
" font-family: Arial, sans-serif;\n",
|
||||
" }\n",
|
||||
" h1 {\n",
|
||||
" color: darkblue;\n",
|
||||
" }\n",
|
||||
" </style>\n",
|
||||
" </head>\n",
|
||||
" <body>\n",
|
||||
" <div>\n",
|
||||
" <h1>🦜️🔗 LangChain</h1>\n",
|
||||
" <p>⚡ Building applications with LLMs through composability ⚡</p>\n",
|
||||
" </div>\n",
|
||||
" <div>\n",
|
||||
" As an open source project in a rapidly developing field, we are extremely open to contributions.\n",
|
||||
" </div>\n",
|
||||
" </body>\n",
|
||||
"</html>\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='<!DOCTYPE html>\\n<html>\\n <head>', metadata={}),\n",
|
||||
" Document(page_content='<title>🦜️🔗 LangChain</title>\\n <style>', metadata={}),\n",
|
||||
" Document(page_content='body {', metadata={}),\n",
|
||||
" Document(page_content='font-family: Arial, sans-serif;', metadata={}),\n",
|
||||
" Document(page_content='}\\n h1 {', metadata={}),\n",
|
||||
" Document(page_content='color: darkblue;\\n }', metadata={}),\n",
|
||||
" Document(page_content='</style>\\n </head>\\n <body>\\n <div>', metadata={}),\n",
|
||||
" Document(page_content='<h1>🦜️🔗 LangChain</h1>', metadata={}),\n",
|
||||
" Document(page_content='<p>⚡ Building applications with LLMs through', metadata={}),\n",
|
||||
" Document(page_content='composability ⚡</p>', metadata={}),\n",
|
||||
" Document(page_content='</div>\\n <div>', metadata={}),\n",
|
||||
" Document(page_content='As an open source project in a rapidly', metadata={}),\n",
|
||||
" Document(page_content='developing field, we are extremely open to contributions.', metadata={}),\n",
|
||||
" Document(page_content='</div>\\n </body>\\n</html>', metadata={})]"
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## JS\n",
|
||||
"Here's an example using the JS text splitter"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"html_splitter = RecursiveCharacterTextSplitter.from_language(\n",
|
||||
" language=Language.MARKDOWN, chunk_size=60, chunk_overlap=0\n",
|
||||
")\n",
|
||||
"html_docs = html_splitter.create_documents([html_text])\n",
|
||||
"html_docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='function helloWorld() {\\n console.log(\"Hello, World!\");\\n}', metadata={}),\n",
|
||||
" Document(page_content='// Call the function\\nhelloWorld();', metadata={})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"JS_CODE = \"\"\"\n",
|
||||
"function helloWorld() {\n",
|
||||
" console.log(\"Hello, World!\");\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"// Call the function\n",
|
||||
"helloWorld();\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"js_splitter = RecursiveCharacterTextSplitter.from_language(\n",
|
||||
" language=Language.JS, chunk_size=60, chunk_overlap=0\n",
|
||||
")\n",
|
||||
"js_docs = js_splitter.create_documents([JS_CODE])\n",
|
||||
"js_docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Solidity\n",
|
||||
"Here's an example using the Solidity text splitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='pragma solidity ^0.8.20;', metadata={}),\n",
|
||||
" Document(page_content='contract HelloWorld {\\n function add(uint a, uint b) pure public returns(uint) {\\n return a + b;\\n }\\n}', metadata={})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"SOL_CODE = \"\"\"\n",
|
||||
"pragma solidity ^0.8.20;",
|
||||
"\n",
|
||||
"contract HelloWorld {\n",
|
||||
" function add(uint a, uint b) pure public returns(uint) {\n",
|
||||
" return a + b;\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"sol_splitter = RecursiveCharacterTextSplitter.from_language(\n",
|
||||
" language=Language.SOL, chunk_size=128, chunk_overlap=0\n",
|
||||
")\n",
|
||||
"sol_docs = sol_splitter.create_documents([SOL_CODE])\n",
|
||||
"sol_docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Markdown\n",
|
||||
"\n",
|
||||
"Here's an example using the Markdown text splitter."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"markdown_text = \"\"\"\n",
|
||||
"# 🦜️🔗 LangChain\n",
|
||||
"\n",
|
||||
"⚡ Building applications with LLMs through composability ⚡\n",
|
||||
"\n",
|
||||
"## Quick Install\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"# Hopefully this code block isn't split\n",
|
||||
"pip install langchain\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"As an open source project in a rapidly developing field, we are extremely open to contributions.\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='# 🦜️🔗 LangChain', metadata={}),\n",
|
||||
" Document(page_content='⚡ Building applications with LLMs through composability ⚡', metadata={}),\n",
|
||||
" Document(page_content='## Quick Install', metadata={}),\n",
|
||||
" Document(page_content=\"```bash\\n# Hopefully this code block isn't split\", metadata={}),\n",
|
||||
" Document(page_content='pip install langchain', metadata={}),\n",
|
||||
" Document(page_content='```', metadata={}),\n",
|
||||
" Document(page_content='As an open source project in a rapidly developing field, we', metadata={}),\n",
|
||||
" Document(page_content='are extremely open to contributions.', metadata={})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"md_splitter = RecursiveCharacterTextSplitter.from_language(\n",
|
||||
" language=Language.MARKDOWN, chunk_size=60, chunk_overlap=0\n",
|
||||
")\n",
|
||||
"md_docs = md_splitter.create_documents([markdown_text])\n",
|
||||
"md_docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Latex\n",
|
||||
"\n",
|
||||
"Here's an example on Latex text"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"latex_text = \"\"\"\n",
|
||||
"\\documentclass{article}\n",
|
||||
"\n",
|
||||
"\\begin{document}\n",
|
||||
"\n",
|
||||
"\\maketitle\n",
|
||||
"\n",
|
||||
"\\section{Introduction}\n",
|
||||
"Large language models (LLMs) are a type of machine learning model that can be trained on vast amounts of text data to generate human-like language. In recent years, LLMs have made significant advances in a variety of natural language processing tasks, including language translation, text generation, and sentiment analysis.\n",
|
||||
"\n",
|
||||
"\\subsection{History of LLMs}\n",
|
||||
"The earliest LLMs were developed in the 1980s and 1990s, but they were limited by the amount of data that could be processed and the computational power available at the time. In the past decade, however, advances in hardware and software have made it possible to train LLMs on massive datasets, leading to significant improvements in performance.\n",
|
||||
"\n",
|
||||
"\\subsection{Applications of LLMs}\n",
|
||||
"LLMs have many applications in industry, including chatbots, content creation, and virtual assistants. They can also be used in academia for research in linguistics, psychology, and computational linguistics.\n",
|
||||
"\n",
|
||||
"\\end{document}\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='\\\\documentclass{article}\\n\\n\\x08egin{document}\\n\\n\\\\maketitle', metadata={}),\n",
|
||||
" Document(page_content='\\\\section{Introduction}', metadata={}),\n",
|
||||
" Document(page_content='Large language models (LLMs) are a type of machine learning', metadata={}),\n",
|
||||
" Document(page_content='model that can be trained on vast amounts of text data to', metadata={}),\n",
|
||||
" Document(page_content='generate human-like language. In recent years, LLMs have', metadata={}),\n",
|
||||
" Document(page_content='made significant advances in a variety of natural language', metadata={}),\n",
|
||||
" Document(page_content='processing tasks, including language translation, text', metadata={}),\n",
|
||||
" Document(page_content='generation, and sentiment analysis.', metadata={}),\n",
|
||||
" Document(page_content='\\\\subsection{History of LLMs}', metadata={}),\n",
|
||||
" Document(page_content='The earliest LLMs were developed in the 1980s and 1990s,', metadata={}),\n",
|
||||
" Document(page_content='but they were limited by the amount of data that could be', metadata={}),\n",
|
||||
" Document(page_content='processed and the computational power available at the', metadata={}),\n",
|
||||
" Document(page_content='time. In the past decade, however, advances in hardware and', metadata={}),\n",
|
||||
" Document(page_content='software have made it possible to train LLMs on massive', metadata={}),\n",
|
||||
" Document(page_content='datasets, leading to significant improvements in', metadata={}),\n",
|
||||
" Document(page_content='performance.', metadata={}),\n",
|
||||
" Document(page_content='\\\\subsection{Applications of LLMs}', metadata={}),\n",
|
||||
" Document(page_content='LLMs have many applications in industry, including', metadata={}),\n",
|
||||
" Document(page_content='chatbots, content creation, and virtual assistants. They', metadata={}),\n",
|
||||
" Document(page_content='can also be used in academia for research in linguistics,', metadata={}),\n",
|
||||
" Document(page_content='psychology, and computational linguistics.', metadata={}),\n",
|
||||
" Document(page_content='\\\\end{document}', metadata={})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"latex_splitter = RecursiveCharacterTextSplitter.from_language(\n",
|
||||
" language=Language.MARKDOWN, chunk_size=60, chunk_overlap=0\n",
|
||||
")\n",
|
||||
"latex_docs = latex_splitter.create_documents([latex_text])\n",
|
||||
"latex_docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## HTML\n",
|
||||
"\n",
|
||||
"Here's an example using an HTML text splitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"html_text = \"\"\"\n",
|
||||
"<!DOCTYPE html>\n",
|
||||
"<html>\n",
|
||||
" <head>\n",
|
||||
" <title>🦜️🔗 LangChain</title>\n",
|
||||
" <style>\n",
|
||||
" body {\n",
|
||||
" font-family: Arial, sans-serif;\n",
|
||||
" }\n",
|
||||
" h1 {\n",
|
||||
" color: darkblue;\n",
|
||||
" }\n",
|
||||
" </style>\n",
|
||||
" </head>\n",
|
||||
" <body>\n",
|
||||
" <div>\n",
|
||||
" <h1>🦜️🔗 LangChain</h1>\n",
|
||||
" <p>⚡ Building applications with LLMs through composability ⚡</p>\n",
|
||||
" </div>\n",
|
||||
" <div>\n",
|
||||
" As an open source project in a rapidly developing field, we are extremely open to contributions.\n",
|
||||
" </div>\n",
|
||||
" </body>\n",
|
||||
"</html>\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='<!DOCTYPE html>\\n<html>\\n <head>', metadata={}),\n",
|
||||
" Document(page_content='<title>🦜️🔗 LangChain</title>\\n <style>', metadata={}),\n",
|
||||
" Document(page_content='body {', metadata={}),\n",
|
||||
" Document(page_content='font-family: Arial, sans-serif;', metadata={}),\n",
|
||||
" Document(page_content='}\\n h1 {', metadata={}),\n",
|
||||
" Document(page_content='color: darkblue;\\n }', metadata={}),\n",
|
||||
" Document(page_content='</style>\\n </head>\\n <body>\\n <div>', metadata={}),\n",
|
||||
" Document(page_content='<h1>🦜️🔗 LangChain</h1>', metadata={}),\n",
|
||||
" Document(page_content='<p>⚡ Building applications with LLMs through', metadata={}),\n",
|
||||
" Document(page_content='composability ⚡</p>', metadata={}),\n",
|
||||
" Document(page_content='</div>\\n <div>', metadata={}),\n",
|
||||
" Document(page_content='As an open source project in a rapidly', metadata={}),\n",
|
||||
" Document(page_content='developing field, we are extremely open to contributions.', metadata={}),\n",
|
||||
" Document(page_content='</div>\\n </body>\\n</html>', metadata={})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"html_splitter = RecursiveCharacterTextSplitter.from_language(\n",
|
||||
" language=Language.MARKDOWN, chunk_size=60, chunk_overlap=0\n",
|
||||
")\n",
|
||||
"html_docs = html_splitter.create_documents([html_text])\n",
|
||||
"html_docs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,145 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "70e9b619",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MarkdownHeaderTextSplitter\n",
|
||||
"\n",
|
||||
"This splits a markdown file by a specified set of headers. For example, if we want to split this markdown:\n",
|
||||
"```\n",
|
||||
"md = '# Foo\\n\\n ## Bar\\n\\nHi this is Jim \\nHi this is Joe\\n\\n ## Baz\\n\\n Hi this is Molly' \n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Headers to split on:\n",
|
||||
"```\n",
|
||||
"[(\"#\", \"Header 1\"),(\"##\", \"Header 2\")]\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Expected output:\n",
|
||||
"```\n",
|
||||
"{'content': 'Hi this is Jim \\nHi this is Joe', 'metadata': {'Header 1': 'Foo', 'Header 2': 'Bar'}}\n",
|
||||
"{'content': 'Hi this is Molly', 'metadata': {'Header 1': 'Foo', 'Header 2': 'Baz'}}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Optionally, this also includes `return_each_line` in case a user want to perform other types of aggregation. \n",
|
||||
"\n",
|
||||
"If `return_each_line=True`, each line and associated header metadata are simply returned. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "19c044f0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import MarkdownHeaderTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "2ae3649b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'content': 'Hi this is Jim \\nHi this is Joe', 'metadata': {'Header 1': 'Foo', 'Header 2': 'Bar'}}\n",
|
||||
"{'content': 'Hi this is Lance', 'metadata': {'Header 1': 'Foo', 'Header 2': 'Bar', 'Header 3': 'Boo'}}\n",
|
||||
"{'content': 'Hi this is Molly', 'metadata': {'Header 1': 'Foo', 'Header 2': 'Baz'}}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"markdown_document = '# Foo\\n\\n ## Bar\\n\\nHi this is Jim\\n\\nHi this is Joe\\n\\n ### Boo \\n\\n Hi this is Lance \\n\\n ## Baz\\n\\n Hi this is Molly' \n",
|
||||
" \n",
|
||||
"headers_to_split_on = [\n",
|
||||
" (\"#\", \"Header 1\"),\n",
|
||||
" (\"##\", \"Header 2\"),\n",
|
||||
" (\"###\", \"Header 3\"),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)\n",
|
||||
"splits = markdown_splitter.split_text(markdown_document)\n",
|
||||
"for split in splits:\n",
|
||||
" print(split)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2a32026a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here's an example on a larger file with `return_each_line=True` passed, allowing each line to be examined."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "8af8f9a2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'content': 'Markdown[9] is a lightweight markup language for creating formatted text using a plain-text editor. John Gruber created Markdown in 2004 as a markup language that is appealing to human readers in its source code form.[9]', 'metadata': {'Header 1': 'Intro', 'Header 2': 'History'}}\n",
|
||||
"{'content': 'Markdown is widely used in blogging, instant messaging, online forums, collaborative software, documentation pages, and readme files.', 'metadata': {'Header 1': 'Intro', 'Header 2': 'History'}}\n",
|
||||
"{'content': 'As Markdown popularity grew rapidly, many Markdown implementations appeared, driven mostly by the need for', 'metadata': {'Header 1': 'Intro', 'Header 2': 'Rise and divergence'}}\n",
|
||||
"{'content': 'additional features such as tables, footnotes, definition lists,[note 1] and Markdown inside HTML blocks.', 'metadata': {'Header 1': 'Intro', 'Header 2': 'Rise and divergence'}}\n",
|
||||
"{'content': 'From 2012, a group of people, including Jeff Atwood and John MacFarlane, launched what Atwood characterised as a standardisation effort.', 'metadata': {'Header 1': 'Intro', 'Header 2': 'Rise and divergence', 'Header 4': 'Standardization'}}\n",
|
||||
"{'content': 'Implementations of Markdown are available for over a dozen programming languages.', 'metadata': {'Header 1': 'Intro', 'Header 2': 'Implementations'}}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"markdown_document = '# Intro \\n\\n ## History \\n\\n Markdown[9] is a lightweight markup language for creating formatted text using a plain-text editor. John Gruber created Markdown in 2004 as a markup language that is appealing to human readers in its source code form.[9] \\n\\n Markdown is widely used in blogging, instant messaging, online forums, collaborative software, documentation pages, and readme files. \\n\\n ## Rise and divergence \\n\\n As Markdown popularity grew rapidly, many Markdown implementations appeared, driven mostly by the need for \\n\\n additional features such as tables, footnotes, definition lists,[note 1] and Markdown inside HTML blocks. \\n\\n #### Standardization \\n\\n From 2012, a group of people, including Jeff Atwood and John MacFarlane, launched what Atwood characterised as a standardisation effort. \\n\\n ## Implementations \\n\\n Implementations of Markdown are available for over a dozen programming languages.'\n",
|
||||
" \n",
|
||||
"headers_to_split_on = [\n",
|
||||
" (\"#\", \"Header 1\"),\n",
|
||||
" (\"##\", \"Header 2\"),\n",
|
||||
" (\"###\", \"Header 3\"),\n",
|
||||
" (\"####\", \"Header 4\"),\n",
|
||||
"]\n",
|
||||
" \n",
|
||||
"markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on,return_each_line=True)\n",
|
||||
"splits = markdown_splitter.split_text(markdown_document)\n",
|
||||
"for line in splits:\n",
|
||||
" print(line)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "987183f2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -12,7 +12,8 @@
|
||||
"\n",
|
||||
"- `length_function`: how the length of chunks is calculated. Defaults to just counting number of characters, but it's pretty common to pass a token counter here.\n",
|
||||
"- `chunk_size`: the maximum size of your chunks (as measured by the length function).\n",
|
||||
"- `chunk_overlap`: the maximum overlap between chunks. It can be nice to have some overlap to maintain some continuity between chunks (eg do a sliding window)."
|
||||
"- `chunk_overlap`: the maximum overlap between chunks. It can be nice to have some overlap to maintain some continuity between chunks (eg do a sliding window).\n",
|
||||
"- `add_start_index` : wether to include the starting position of each chunk within the original document in the metadata. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -49,6 +50,7 @@
|
||||
" chunk_size = 100,\n",
|
||||
" chunk_overlap = 20,\n",
|
||||
" length_function = len,\n",
|
||||
" add_start_index = True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -62,8 +64,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and' lookup_str='' metadata={} lookup_index=0\n",
|
||||
"page_content='of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.' lookup_str='' metadata={} lookup_index=0\n"
|
||||
"page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and' metadata={'start_index': 0}\n",
|
||||
"page_content='of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.' metadata={'start_index': 82}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -90,7 +92,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.9.16"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
194
docs/modules/indexes/vectorstores/examples/awadb.ipynb
Normal file
194
docs/modules/indexes/vectorstores/examples/awadb.ipynb
Normal file
@@ -0,0 +1,194 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "833c4789",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AwaDB\n",
|
||||
"[AwaDB](https://github.com/awa-ai/awadb) is an AI Native database for the search and storage of embedding vectors used by LLM Applications.\n",
|
||||
"This notebook shows how to use functionality related to the AwaDB."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "252930ea",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install awadb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f2b71a47",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import AwaDB\n",
|
||||
"from langchain.document_loaders import TextLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "49be0bac",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size= 100, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "18714278",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = AwaDB.from_documents(docs)\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = db.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "62b7a4c5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a9b4be48",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "87fec6b5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Similarity search with score"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "17231924",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The returned distance score is between 0-1. 0 is dissimilar, 1 is the most similar"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f40ddae1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = db.similarity_search_with_score(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f0045583",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(docs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8c2da99d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"(Document(page_content='And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'}), 0.561813814013747)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0b49fb59",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Restore the table created and added data before"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1bfa6e25",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"AwaDB automatically persists added document data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2a0f3b35",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you can restore the table you created and added before, you can just do this as below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1fd4b5b0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"awadb_client = awadb.Client()\n",
|
||||
"ret = awadb_client.Load('langchain_awadb')\n",
|
||||
"if ret : print('awadb load table success')\n",
|
||||
"else:\n",
|
||||
" print('awadb load table failed')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5ae9a9dd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"awadb load table success"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
245
docs/modules/indexes/vectorstores/examples/azuresearch.ipynb
Normal file
245
docs/modules/indexes/vectorstores/examples/azuresearch.ipynb
Normal file
@@ -0,0 +1,245 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure Cognitive Search"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Install Azure Cognitive Search SDK"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install --index-url=https://pkgs.dev.azure.com/azure-sdk/public/_packaging/azure-sdk-for-python/pypi/simple/ azure-search-documents==11.4.0a20230509004\n",
|
||||
"!pip install azure-identity"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Import required libraries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os, json\n",
|
||||
"import openai\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.schema import BaseRetriever\n",
|
||||
"from langchain.vectorstores.azuresearch import AzureSearch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure OpenAI settings\n",
|
||||
"Configure the OpenAI settings to use Azure OpenAI or OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load environment variables from a .env file using load_dotenv():\n",
|
||||
"load_dotenv()\n",
|
||||
"\n",
|
||||
"openai.api_type = \"azure\"\n",
|
||||
"openai.api_base = \"YOUR_OPENAI_ENDPOINT\"\n",
|
||||
"openai.api_version = \"2023-05-15\"\n",
|
||||
"openai.api_key = \"YOUR_OPENAI_API_KEY\"\n",
|
||||
"model: str = \"text-embedding-ada-002\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure vector store settings\n",
|
||||
" \n",
|
||||
"Set up the vector store settings using environment variables:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store_address: str = 'YOUR_AZURE_SEARCH_ENDPOINT'\n",
|
||||
"vector_store_password: str = 'YOUR_AZURE_SEARCH_ADMIN_KEY'\n",
|
||||
"index_name: str = \"langchain-vector-demo\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create embeddings and vector store instances\n",
|
||||
" \n",
|
||||
"Create instances of the OpenAIEmbeddings and AzureSearch classes:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings: OpenAIEmbeddings = OpenAIEmbeddings(model=model, chunk_size=1) \n",
|
||||
"vector_store: AzureSearch = AzureSearch(azure_search_endpoint=vector_store_address, \n",
|
||||
" azure_search_key=vector_store_password, \n",
|
||||
" index_name=index_name, \n",
|
||||
" embedding_function=embeddings.embed_query) \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Insert text and embeddings into vector store\n",
|
||||
" \n",
|
||||
"Add texts and metadata from the JSON data to the vector store:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"loader = TextLoader('../../../state_of_the_union.txt', encoding='utf-8')\n",
|
||||
"\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Perform a vector similarity search\n",
|
||||
" \n",
|
||||
"Execute a pure vector similarity search using the similarity_search() method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Perform a similarity search\n",
|
||||
"docs = vector_store.similarity_search(query=\"What did the president say about Ketanji Brown Jackson\", k=3, search_type='similarity')\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Perform a Hybrid Search\n",
|
||||
"\n",
|
||||
"Execute hybrid search using the hybrid_search() method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Perform a hybrid search \n",
|
||||
"docs = vector_store.similarity_search(query=\"What did the president say about Ketanji Brown Jackson\", k=3)\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.9.13 ('.venv': venv)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "645053d6307d413a1a75681b5ebb6449bb2babba4bcb0bf65a1ddc3dbefb108a"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -40,20 +40,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 2,
|
||||
"id": "47f9b495-88f1-4286-8d5d-1416103931a7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"OpenAI API Key: ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
@@ -66,7 +58,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "aac9563e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -81,7 +73,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 10,
|
||||
"id": "a3c3999a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -99,7 +91,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 11,
|
||||
"id": "5eabdb75",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -114,7 +106,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 12,
|
||||
"id": "4b172de8",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -150,7 +142,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 13,
|
||||
"id": "186ee1d8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -160,18 +152,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 14,
|
||||
"id": "284e04b5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(Document(page_content='In state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \\n\\nWe cannot let this happen. \\n\\nTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', lookup_str='', metadata={'source': '../../state_of_the_union.txt'}, lookup_index=0),\n",
|
||||
" 0.3914415)"
|
||||
"(Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'}),\n",
|
||||
" 0.36913747)"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -191,7 +183,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 15,
|
||||
"id": "b558ebb7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -212,7 +204,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 16,
|
||||
"id": "428a6816",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -222,7 +214,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 17,
|
||||
"id": "56d1841c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -232,7 +224,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 18,
|
||||
"id": "39055525",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -242,17 +234,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 19,
|
||||
"id": "98378c4e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='In state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \\n\\nWe cannot let this happen. \\n\\nTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', lookup_str='', metadata={'source': '../../state_of_the_union.txt'}, lookup_index=0)"
|
||||
"Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -273,7 +265,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 20,
|
||||
"id": "6dfd2b78",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -284,17 +276,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 21,
|
||||
"id": "29960da7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'e0b74348-6c93-4893-8764-943139ec1d17': Document(page_content='foo', lookup_str='', metadata={}, lookup_index=0)}"
|
||||
"{'068c473b-d420-487a-806b-fb0ccea7f711': Document(page_content='foo', metadata={})}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -305,17 +297,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 22,
|
||||
"id": "83392605",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'bdc50ae3-a1bb-4678-9260-1b0979578f40': Document(page_content='bar', lookup_str='', metadata={}, lookup_index=0)}"
|
||||
"{'807e0c63-13f6-4070-9774-5c6f0fbb9866': Document(page_content='bar', metadata={})}"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -326,7 +318,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 23,
|
||||
"id": "a3fcc1c7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -336,18 +328,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 24,
|
||||
"id": "41c51f89",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'e0b74348-6c93-4893-8764-943139ec1d17': Document(page_content='foo', lookup_str='', metadata={}, lookup_index=0),\n",
|
||||
" 'd5211050-c777-493d-8825-4800e74cfdb6': Document(page_content='bar', lookup_str='', metadata={}, lookup_index=0)}"
|
||||
"{'068c473b-d420-487a-806b-fb0ccea7f711': Document(page_content='foo', metadata={}),\n",
|
||||
" '807e0c63-13f6-4070-9774-5c6f0fbb9866': Document(page_content='bar', metadata={})}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -357,12 +349,139 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f80b60de",
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "f4294b96",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"## Similarity Search with filtering\n",
|
||||
"FAISS vectorstore can also support filtering, since the FAISS does not natively support filtering we have to do it manually. This is done by first fetching more results than `k` and then filtering them. You can filter the documents based on metadata. You can also set the `fetch_k` parameter when calling any search method to set how many documents you want to fetch before filtering. Here is a small example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "d5bf812c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Content: foo, Metadata: {'page': 1}, Score: 5.159960813797904e-15\n",
|
||||
"Content: foo, Metadata: {'page': 2}, Score: 5.159960813797904e-15\n",
|
||||
"Content: foo, Metadata: {'page': 3}, Score: 5.159960813797904e-15\n",
|
||||
"Content: foo, Metadata: {'page': 4}, Score: 5.159960813797904e-15\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.schema import Document\n",
|
||||
"list_of_documents = [\n",
|
||||
" Document(page_content=\"foo\", metadata=dict(page=1)),\n",
|
||||
" Document(page_content=\"bar\", metadata=dict(page=1)),\n",
|
||||
" Document(page_content=\"foo\", metadata=dict(page=2)),\n",
|
||||
" Document(page_content=\"barbar\", metadata=dict(page=2)),\n",
|
||||
" Document(page_content=\"foo\", metadata=dict(page=3)),\n",
|
||||
" Document(page_content=\"bar burr\", metadata=dict(page=3)),\n",
|
||||
" Document(page_content=\"foo\", metadata=dict(page=4)),\n",
|
||||
" Document(page_content=\"bar bruh\", metadata=dict(page=4))\n",
|
||||
"]\n",
|
||||
"db = FAISS.from_documents(list_of_documents, embeddings)\n",
|
||||
"results_with_scores = db.similarity_search_with_score(\"foo\")\n",
|
||||
"for doc, score in results_with_scores:\n",
|
||||
" print(f\"Content: {doc.page_content}, Metadata: {doc.metadata}, Score: {score}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "3d33c126",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now we make the same query call but we filter for only `page = 1` "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "83159330",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Content: foo, Metadata: {'page': 1}, Score: 5.159960813797904e-15\n",
|
||||
"Content: bar, Metadata: {'page': 1}, Score: 0.3131446838378906\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results_with_scores = db.similarity_search_with_score(\"foo\", filter=dict(page=1))\n",
|
||||
"for doc, score in results_with_scores:\n",
|
||||
" print(f\"Content: {doc.page_content}, Metadata: {doc.metadata}, Score: {score}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "0be136e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Same thing can be done with the `max_marginal_relevance_search` as well."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "432c6980",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Content: foo, Metadata: {'page': 1}\n",
|
||||
"Content: bar, Metadata: {'page': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = db.max_marginal_relevance_search(\"foo\", filter=dict(page=1))\n",
|
||||
"for doc in results:\n",
|
||||
" print(f\"Content: {doc.page_content}, Metadata: {doc.metadata}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1b4ecd86",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here is an example of how to set `fetch_k` parameter when calling `similarity_search`. Usually you would want the `fetch_k` parameter >> `k` parameter. This is because the `fetch_k` parameter is the number of documents that will be fetched before filtering. If you set `fetch_k` to a low number, you might not get enough documents to filter from."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "1fd60fd1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Content: foo, Metadata: {'page': 1}, Score: 5.159960813797904e-15\n",
|
||||
"Content: bar, Metadata: {'page': 1}, Score: 0.3131446838378906\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = db.similarity_search(\"foo\", filter=dict(page=1), k=1, fetch_k=4)\n",
|
||||
"for doc, score in results_with_scores:\n",
|
||||
" print(f\"Content: {doc.page_content}, Metadata: {doc.metadata}, Score: {score}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -381,7 +500,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Binary file not shown.
157
docs/modules/indexes/vectorstores/examples/hologres.ipynb
Normal file
157
docs/modules/indexes/vectorstores/examples/hologres.ipynb
Normal file
@@ -0,0 +1,157 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Hologres\n",
|
||||
"\n",
|
||||
">[Hologres](https://www.alibabacloud.com/help/en/hologres/latest/introduction) is a unified real-time data warehousing service developed by Alibaba Cloud. You can use Hologres to write, update, process, and analyze large amounts of data in real time. \n",
|
||||
">Hologres supports standard SQL syntax, is compatible with PostgreSQL, and supports most PostgreSQL functions. Hologres supports online analytical processing (OLAP) and ad hoc analysis for up to petabytes of data, and provides high-concurrency and low-latency online data services. \n",
|
||||
"\n",
|
||||
">Hologres provides **vector database** functionality by adopting [Proxima](https://www.alibabacloud.com/help/en/hologres/latest/vector-processing).\n",
|
||||
">Proxima is a high-performance software library developed by Alibaba DAMO Academy. It allows you to search for the nearest neighbors of vectors. Proxima provides higher stability and performance than similar open source software such as Faiss. Proxima allows you to search for similar text or image embeddings with high throughput and low latency. Hologres is deeply integrated with Proxima to provide a high-performance vector search service.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `Hologres Proxima` vector database.\n",
|
||||
"Click [here](https://www.alibabacloud.com/zh/product/hologres) to fast deploy a Hologres cloud instance."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import Hologres"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Split documents and get embeddings by call OpenAI API"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Connect to Hologres by setting related ENVIRONMENTS.\n",
|
||||
"```\n",
|
||||
"export PG_HOST={host}\n",
|
||||
"export PG_PORT={port} # Optional, default is 80\n",
|
||||
"export PG_DATABASE={db_name} # Optional, default is postgres\n",
|
||||
"export PG_USER={username}\n",
|
||||
"export PG_PASSWORD={password}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Then store your embeddings and documents into Hologres"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"connection_string = Hologres.connection_string_from_db_params(\n",
|
||||
" host=os.environ.get(\"PGHOST\", \"localhost\"),\n",
|
||||
" port=int(os.environ.get(\"PGPORT\", \"80\")),\n",
|
||||
" database=os.environ.get(\"PGDATABASE\", \"postgres\"),\n",
|
||||
" user=os.environ.get(\"PGUSER\", \"postgres\"),\n",
|
||||
" password=os.environ.get(\"PGPASSWORD\", \"postgres\"),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"vector_db = Hologres.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" connection_string=connection_string,\n",
|
||||
" table_name=\"langchain_example_embeddings\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Query and retrieve data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = vector_db.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -5,16 +5,18 @@
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MongoDB Atlas Vector Search\n",
|
||||
"Commented out until further notice\n",
|
||||
"\n",
|
||||
">[MongoDB Atlas](https://www.mongodb.com/docs/atlas/) is a document database managed in the cloud. It also enables Lucene and its vector search feature.\n",
|
||||
"MongoDB Atlas Vector Search\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the functionality related to the `MongoDB Atlas Vector Search` feature where you can store your embeddings in MongoDB documents and create a Lucene vector index to perform a KNN search.\n",
|
||||
">[MongoDB Atlas](https://www.mongodb.com/docs/atlas/) is a fully-managed cloud database available in AWS , Azure, and GCP. It now has support for native Vector Search on your MongoDB document data.\n",
|
||||
"\n",
|
||||
"It uses the [knnBeta Operator](https://www.mongodb.com/docs/atlas/atlas-search/knn-beta) available in MongoDB Atlas Search. This feature is in early access and available only for evaluation purposes, to validate functionality, and to gather feedback from a small closed group of early access users. It is not recommended for production deployments as we may introduce breaking changes.\n",
|
||||
"This notebook shows how to use `MongoDB Atlas Vector Search` to store your embeddings in MongoDB documents, create a vector search index, and perform KNN search with an approximate nearest neighbor algorithm.\n",
|
||||
"\n",
|
||||
"To use MongoDB Atlas, you must have first deployed a cluster. Free clusters are available. \n",
|
||||
"Here is the MongoDB Atlas [quick start](https://www.mongodb.com/docs/atlas/getting-started/)."
|
||||
"It uses the [knnBeta Operator](https://www.mongodb.com/docs/atlas/atlas-search/knn-beta) available in MongoDB Atlas Search. This feature is in Public Preview and available for evaluation purposes, to validate functionality, and to gather feedback from public preview users. It is not recommended for production deployments as we may introduce breaking changes.\n",
|
||||
"\n",
|
||||
"To use MongoDB Atlas, you must first deploy a cluster. We have a Forever-Free tier of clusters available. \n",
|
||||
"To get started head over to Atlas here: [quick start](https://www.mongodb.com/docs/atlas/getting-started/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -37,16 +39,29 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"MONGODB_ATLAS_URI = os.environ['MONGODB_ATLAS_URI']"
|
||||
"MONGODB_ATLAS_CLUSTER_URI = getpass.getpass('MongoDB Atlas Cluster URI:')\n",
|
||||
"MONGODB_ATLAS_CLUSTER_URI = os.environ['MONGODB_ATLAS_CLUSTER_URI']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "320af802-9271-46ee-948f-d2453933d44b",
|
||||
"id": "457ace44-1d95-4001-9dd5-78811ab208ad",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key. Make sure the environment variable `OPENAI_API_KEY` is set up before proceeding."
|
||||
"We want to use `OpenAIEmbeddings` so we need to set up our OpenAI API Key. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2d8f240d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')\n",
|
||||
"OPENAI_API_KEY = os.environ['OPENAI_API_KEY']"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -54,8 +69,8 @@
|
||||
"id": "1f3ecc42",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's create a Lucene vector index on your cluster. In the below example, `embedding` is the name of the field that contains the embedding vector. Please refer to the [documentation](https://www.mongodb.com/docs/atlas/atlas-search/define-field-mappings-for-vector-search) to get more details on how to define an Atlas Search index.\n",
|
||||
"You can name the index `langchain_demo` and create the index on the namespace `lanchain_db.langchain_col`. Finally, write the following definition in the JSON editor:\n",
|
||||
"Now, let's create a vector search index on your cluster. In the below example, `embedding` is the name of the field that contains the embedding vector. Please refer to the [documentation](https://www.mongodb.com/docs/atlas/atlas-search/define-field-mappings-for-vector-search) to get more details on how to define an Atlas Vector Search index.\n",
|
||||
"You can name the index `langchain_demo` and create the index on the namespace `lanchain_db.langchain_col`. Finally, write the following definition in the JSON editor on MongoDB Atlas:\n",
|
||||
"\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
@@ -114,7 +129,7 @@
|
||||
"from pymongo import MongoClient\n",
|
||||
"\n",
|
||||
"# initialize MongoDB python client\n",
|
||||
"client = MongoClient(MONGODB_ATLAS_CONNECTION_STRING)\n",
|
||||
"client = MongoClient(MONGODB_ATLAS_CLUSTER_URI)\n",
|
||||
"\n",
|
||||
"db_name = \"lanchain_db\"\n",
|
||||
"collection_name = \"langchain_col\"\n",
|
||||
@@ -143,6 +158,47 @@
|
||||
"source": [
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "851a2ec9-9390-49a4-8412-3e132c9f789d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can reuse the vector search index you created, make sure the `OPENAI_API_KEY` environment variable is set up, then execute another query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6336fe79-3e73-48be-b20a-0ff1bb6a4399",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pymongo import MongoClient\n",
|
||||
"from langchain.vectorstores import MongoDBAtlasVectorSearch\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"MONGODB_ATLAS_URI = os.environ['MONGODB_ATLAS_URI']\n",
|
||||
"\n",
|
||||
"# initialize MongoDB python client\n",
|
||||
"client = MongoClient(MONGODB_ATLAS_URI)\n",
|
||||
"\n",
|
||||
"db_name = \"langchain_db\"\n",
|
||||
"collection_name = \"langchain_col\"\n",
|
||||
"collection = client[db_name][collection_name]\n",
|
||||
"index_name = \"langchain_index\"\n",
|
||||
"\n",
|
||||
"# initialize vector store\n",
|
||||
"vectorStore = MongoDBAtlasVectorSearch(\n",
|
||||
" collection, OpenAIEmbeddings(), index_name=index_name)\n",
|
||||
"\n",
|
||||
"# perform a similarity search between the embedding of the query and the embeddings of the documents\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = vectorStore.similarity_search(query)\n",
|
||||
"\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -161,7 +217,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install pinecone-client"
|
||||
"!pip install pinecone-client openai tiktoken"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -70,7 +70,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "aac9563e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -85,7 +85,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "a3c3999a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -135,13 +135,51 @@
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "d46d1452",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Maximal Marginal Relevance Searches\n",
|
||||
"\n",
|
||||
"In addition to using similarity search in the retriever object, you can also use `mmr` as retriever.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a359ed74",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"retriever = docsearch.as_retriever(search_type=\"mmr\")\n",
|
||||
"matched_docs = retriever.get_relevant_documents(query)\n",
|
||||
"for i, d in enumerate(matched_docs):\n",
|
||||
" print(f\"\\n## Document {i}\\n\")\n",
|
||||
" print(d.page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "7c477287",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Or use `max_marginal_relevance_search` directly:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9ca82740",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"found_docs = docsearch.max_marginal_relevance_search(query, k=2, fetch_k=10)\n",
|
||||
"for i, doc in enumerate(found_docs):\n",
|
||||
" print(f\"{i + 1}.\", doc.page_content, \"\\n\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
139
docs/modules/indexes/vectorstores/examples/singlestoredb.ipynb
Normal file
139
docs/modules/indexes/vectorstores/examples/singlestoredb.ipynb
Normal file
@@ -0,0 +1,139 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b9582dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SingleStoreDB vector search\n",
|
||||
"[SingleStore DB](https://singlestore.com) is a high-performance distributed database that supports deployment both in the [cloud](https://www.singlestore.com/cloud/) and on-premises. For a significant duration, it has provided support for vector functions such as [dot_product](https://docs.singlestore.com/managed-service/en/reference/sql-reference/vector-functions/dot_product.html), thereby positioning itself as an ideal solution for AI applications that require text similarity matching. \n",
|
||||
"This tutorial illustrates how to utilize the features of the SingleStore DB Vector Store."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e4a61a4d",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Establishing a connection to the database is facilitated through the singlestoredb Python connector.\n",
|
||||
"# Please ensure that this connector is installed in your working environment.\n",
|
||||
"!pip install singlestoredb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "39a0132a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"# We want to use OpenAIEmbeddings so we have to get the OpenAI API Key.\n",
|
||||
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6104fde8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain.vectorstores import SingleStoreDB\n",
|
||||
"from langchain.document_loaders import TextLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7b45113c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load text samples \n",
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"loader = TextLoader('../../../state_of_the_union.txt')\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "535b2687",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"There are several ways to establish a [connection](https://singlestoredb-python.labs.singlestore.com/generated/singlestoredb.connect.html) to the database. You can either set up environment variables or pass named parameters to the `SingleStoreDB constructor`. Alternatively, you may provide these parameters to the `from_documents` and `from_texts` methods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d0b316bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Setup connection url as environment variable\n",
|
||||
"os.environ['SINGLESTOREDB_URL'] = 'root:pass@localhost:3306/db'\n",
|
||||
"\n",
|
||||
"# Load documents to the store\n",
|
||||
"docsearch = SingleStoreDB.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" table_name = \"noteook\", # use table with a custom name \n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0eaa4297",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = docsearch.similarity_search(query) # Find documents that correspond to the query\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "86efff90",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -101,7 +101,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"id": "8429667e",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -133,7 +133,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"id": "a8c513ab",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -145,12 +145,12 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"found_docs = vectara.similarity_search(query)"
|
||||
"found_docs = vectara.similarity_search(query, n_sentence_context=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"id": "fc516993",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -164,7 +164,13 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. A former top litigator in private practice. A former federal public defender.\n"
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -185,7 +191,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"id": "8804a21d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -201,7 +207,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 8,
|
||||
"id": "756a6887",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -214,9 +220,15 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. A former top litigator in private practice. A former federal public defender.\n",
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Score: 1.0046461\n"
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
|
||||
"\n",
|
||||
"Score: 0.7129974\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -239,7 +251,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 9,
|
||||
"id": "9427195f",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -251,10 +263,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"VectorStoreRetriever(vectorstore=<langchain.vectorstores.vectara.Vectara object at 0x156d3e830>, search_type='similarity', search_kwargs={})"
|
||||
"VectaraRetriever(vectorstore=<langchain.vectorstores.vectara.Vectara object at 0x122db2830>, search_type='similarity', search_kwargs={'lambda_val': 0.025, 'k': 5, 'filter': '', 'n_sentence_context': '0'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -266,7 +278,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 10,
|
||||
"id": "f3c70c31",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -278,10 +290,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. A former top litigator in private practice. A former federal public defender.', metadata={'source': '../../modules/state_of_the_union.txt'})"
|
||||
"Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -316,7 +328,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -209,7 +209,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "8fc3487b",
|
||||
"metadata": {},
|
||||
@@ -218,7 +217,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "281c0fcc",
|
||||
"metadata": {},
|
||||
@@ -236,7 +234,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "503e2e75",
|
||||
"metadata": {},
|
||||
@@ -273,7 +270,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "fbd7a6cb",
|
||||
"metadata": {},
|
||||
@@ -282,7 +278,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "f349acb9",
|
||||
"metadata": {},
|
||||
@@ -384,7 +379,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -121,7 +121,7 @@
|
||||
"\n",
|
||||
"Human: Hi there my friend\n",
|
||||
"AI: Hi there, how are you doing today?\n",
|
||||
"Human: Not to bad - how are you?\n",
|
||||
"Human: Not too bad - how are you?\n",
|
||||
"Chatbot:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished LLMChain chain.\u001b[0m\n"
|
||||
|
||||
@@ -118,6 +118,29 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "955f1b15",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## DynamoDBChatMessageHistory with Custom Endpoint URL\n",
|
||||
"\n",
|
||||
"Sometimes it is useful to specify the URL to the AWS endpoint to connect to. For instance, when you are running locally against [Localstack](https://localstack.cloud/). For those cases you can specify the URL via the `endpoint_url` parameter in the constructor."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "225713c8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory.chat_message_histories import DynamoDBChatMessageHistory\n",
|
||||
"\n",
|
||||
"history = DynamoDBChatMessageHistory(table_name=\"SessionTable\", session_id=\"0\", endpoint_url=\"http://localhost.localstack.cloud:4566\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "3b33c988",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "d31df93e",
|
||||
"metadata": {},
|
||||
@@ -9,7 +10,7 @@
|
||||
"\n",
|
||||
"This notebook walks through how LangChain thinks about memory. \n",
|
||||
"\n",
|
||||
"Memory involves keeping a concept of state around throughout a user's interactions with an language model. A user's interactions with a language model are captured in the concept of ChatMessages, so this boils down to ingesting, capturing, transforming and extracting knowledge from a sequence of chat messages. There are many different ways to do this, each of which exists as its own memory type.\n",
|
||||
"Memory involves keeping a concept of state around throughout a user's interactions with a language model. A user's interactions with a language model are captured in the concept of ChatMessages, so this boils down to ingesting, capturing, transforming and extracting knowledge from a sequence of chat messages. There are many different ways to do this, each of which exists as its own memory type.\n",
|
||||
"\n",
|
||||
"In general, for each type of memory there are two ways to understanding using memory. These are the standalone functions which extract information from a sequence of messages, and then there is the way you can use this type of memory in a chain. \n",
|
||||
"\n",
|
||||
@@ -25,7 +26,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "87235cf1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -41,18 +42,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 3,
|
||||
"id": "be030822",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[HumanMessage(content='hi!', additional_kwargs={}),\n",
|
||||
" AIMessage(content='whats up?', additional_kwargs={})]"
|
||||
"[HumanMessage(content='hi!', additional_kwargs={}, example=False),\n",
|
||||
" AIMessage(content='whats up?', additional_kwargs={}, example=False)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -75,7 +76,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 4,
|
||||
"id": "a382b160",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -85,7 +86,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 5,
|
||||
"id": "a280d337",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -97,7 +98,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 7,
|
||||
"id": "1b739c0a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -107,7 +108,7 @@
|
||||
"{'history': 'Human: hi!\\nAI: whats up?'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -126,7 +127,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 8,
|
||||
"id": "798ceb1c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -138,18 +139,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 9,
|
||||
"id": "698688fd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'history': [HumanMessage(content='hi!', additional_kwargs={}),\n",
|
||||
" AIMessage(content='whats up?', additional_kwargs={})]}"
|
||||
"{'history': [HumanMessage(content='hi!', additional_kwargs={}, example=False),\n",
|
||||
" AIMessage(content='whats up?', additional_kwargs={}, example=False)]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -169,7 +170,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 10,
|
||||
"id": "54301321",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -188,7 +189,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 11,
|
||||
"id": "ae046bff",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -216,7 +217,7 @@
|
||||
"\" Hi there! It's nice to meet you. How can I help you today?\""
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -227,7 +228,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 12,
|
||||
"id": "d8e2a6ff",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -256,7 +257,7 @@
|
||||
"\" That's great! It's always nice to have a conversation with someone new. What would you like to talk about?\""
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -267,7 +268,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 13,
|
||||
"id": "15eda316",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -298,7 +299,7 @@
|
||||
"\" Sure! I'm an AI created to help people with their everyday tasks. I'm programmed to understand natural language and provide helpful information. I'm also constantly learning and updating my knowledge base so I can provide more accurate and helpful answers.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -319,7 +320,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 14,
|
||||
"id": "b5acbc4b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -338,7 +339,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 15,
|
||||
"id": "7812ee21",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -348,18 +349,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 16,
|
||||
"id": "3ed6e6a0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'type': 'human', 'data': {'content': 'hi!', 'additional_kwargs': {}}},\n",
|
||||
" {'type': 'ai', 'data': {'content': 'whats up?', 'additional_kwargs': {}}}]"
|
||||
"[{'type': 'human',\n",
|
||||
" 'data': {'content': 'hi!', 'additional_kwargs': {}, 'example': False}},\n",
|
||||
" {'type': 'ai',\n",
|
||||
" 'data': {'content': 'whats up?', 'additional_kwargs': {}, 'example': False}}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -370,7 +373,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 17,
|
||||
"id": "cdf4ebd2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -380,18 +383,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 18,
|
||||
"id": "9724e24b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[HumanMessage(content='hi!', additional_kwargs={}),\n",
|
||||
" AIMessage(content='whats up?', additional_kwargs={})]"
|
||||
"[HumanMessage(content='hi!', additional_kwargs={}, example=False),\n",
|
||||
" AIMessage(content='whats up?', additional_kwargs={}, example=False)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -407,14 +410,6 @@
|
||||
"source": [
|
||||
"And that's it for the getting started! There are plenty of different types of memory, check out our examples to see them all"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3dd37d93",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -433,7 +428,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -631,7 +631,7 @@
|
||||
"id": "56ea6a08",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You'll need to get a Momemto auth token to use this class. This can either be passed in to a momento.CacheClient if you'd like to instantiate that directly, as a named parameter `auth_token` to `MomentoChatMessageHistory.from_client_params`, or can just be set as an environment variable `MOMENTO_AUTH_TOKEN`."
|
||||
"You'll need to get a Momento auth token to use this class. This can either be passed in to a momento.CacheClient if you'd like to instantiate that directly, as a named parameter `auth_token` to `MomentoChatMessageHistory.from_client_params`, or can just be set as an environment variable `MOMENTO_AUTH_TOKEN`."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
196
docs/modules/models/llms/integrations/baseten.ipynb
Normal file
196
docs/modules/models/llms/integrations/baseten.ipynb
Normal file
@@ -0,0 +1,196 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Baseten\n",
|
||||
"\n",
|
||||
"[Baseten](https://baseten.co) provides all the infrastructure you need to deploy and serve ML models performantly, scalably, and cost-efficiently.\n",
|
||||
"\n",
|
||||
"This example demonstrates using Langchain with models deployed on Baseten."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"\n",
|
||||
"To run this notebook, you'll need a [Baseten account](https://baseten.co) and an [API key](https://docs.baseten.co/settings/api-keys).\n",
|
||||
"\n",
|
||||
"You'll also need to install the Baseten Python package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install baseten"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import baseten\n",
|
||||
"\n",
|
||||
"baseten.login(\"YOUR_API_KEY\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Single model call\n",
|
||||
"\n",
|
||||
"First, you'll need to deploy a model to Baseten.\n",
|
||||
"\n",
|
||||
"You can deploy foundation models like WizardLM and Alpaca with one click from the [Baseten model library](https://app.baseten.co/explore/) or if you have your own model, [deploy it with this tutorial](https://docs.baseten.co/deploying-models/deploy).\n",
|
||||
"\n",
|
||||
"In this example, we'll work with WizardLM. [Deploy WizardLM here](https://app.baseten.co/explore/llama) and follow along with the deployed [model's version ID](https://docs.baseten.co/managing-models/manage)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import Baseten"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load the model\n",
|
||||
"wizardlm = Baseten(model=\"MODEL_VERSION_ID\", verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Prompt the model\n",
|
||||
"\n",
|
||||
"wizardlm(\"What is the difference between a Wizard and a Sorcerer?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Chained model calls\n",
|
||||
"\n",
|
||||
"We can chain together multiple calls to one or multiple models, which is the whole point of Langchain!\n",
|
||||
"\n",
|
||||
"This example uses WizardLM to plan a meal with an entree, three sides, and an alcoholic and non-alcoholic beverage pairing."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import SimpleSequentialChain\n",
|
||||
"from langchain import PromptTemplate, LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Build the first link in the chain\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(\n",
|
||||
" input_variables=[\"cuisine\"],\n",
|
||||
" template=\"Name a complex entree for a {cuisine} dinner. Respond with just the name of a single dish.\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"link_one = LLMChain(llm=wizardlm, prompt=prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Build the second link in the chain\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(\n",
|
||||
" input_variables=[\"entree\"],\n",
|
||||
" template=\"What are three sides that would go with {entree}. Respond with only a list of the sides.\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"link_two = LLMChain(llm=wizardlm, prompt=prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Build the third link in the chain\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(\n",
|
||||
" input_variables=[\"sides\"],\n",
|
||||
" template=\"What is one alcoholic and one non-alcoholic beverage that would go well with this list of sides: {sides}. Respond with only the names of the beverages.\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"link_three = LLMChain(llm=wizardlm, prompt=prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Run the full chain!\n",
|
||||
"\n",
|
||||
"menu_maker = SimpleSequentialChain(chains=[link_one, link_two, link_three], verbose=True)\n",
|
||||
"menu_maker.run(\"South Indian\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"application/vnd.databricks.v1+cell": {
|
||||
@@ -43,6 +44,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"application/vnd.databricks.v1+cell": {
|
||||
@@ -163,14 +165,14 @@
|
||||
],
|
||||
"source": [
|
||||
"# Otherwise, you can manually specify the Databricks workspace hostname and personal access token \n",
|
||||
"# or set `DATABRICKS_HOST` and `DATABRICKS_API_TOKEN` environment variables, respectively.\n",
|
||||
"# or set `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables, respectively.\n",
|
||||
"# See https://docs.databricks.com/dev-tools/auth.html#databricks-personal-access-tokens\n",
|
||||
"# We strongly recommend not exposing the API token explicitly inside a notebook.\n",
|
||||
"# You can use Databricks secret manager to store your API token securely.\n",
|
||||
"# See https://docs.databricks.com/dev-tools/databricks-utils.html#secrets-utility-dbutilssecrets\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"os.environ[\"DATABRICKS_API_TOKEN\"] = dbutils.secrets.get(\"myworkspace\", \"api_token\")\n",
|
||||
"os.environ[\"DATABRICKS_TOKEN\"] = dbutils.secrets.get(\"myworkspace\", \"api_token\")\n",
|
||||
"\n",
|
||||
"llm = Databricks(host=\"myworkspace.cloud.databricks.com\", endpoint_name=\"dolly\")\n",
|
||||
"\n",
|
||||
@@ -257,6 +259,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"application/vnd.databricks.v1+cell": {
|
||||
@@ -273,7 +276,7 @@
|
||||
"Prerequisites:\n",
|
||||
"* An LLM loaded on a Databricks interactive cluster in \"single user\" or \"no isolation shared\" mode.\n",
|
||||
"* A local HTTP server running on the driver node to serve the model at `\"/\"` using HTTP POST with JSON input/output.\n",
|
||||
"* It uses a port number between `[3000, 8000]` and litens to the driver IP address or simply `0.0.0.0` instead of localhost only.\n",
|
||||
"* It uses a port number between `[3000, 8000]` and listens to the driver IP address or simply `0.0.0.0` instead of localhost only.\n",
|
||||
"* You have \"Can Attach To\" permission to the cluster.\n",
|
||||
"\n",
|
||||
"The expected server schema (using JSON schema) is:\n",
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "959300d4",
|
||||
"metadata": {},
|
||||
@@ -13,6 +14,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "4c1b8450-5eaf-4d34-8341-2d785448a1ff",
|
||||
"metadata": {
|
||||
@@ -60,6 +62,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "84dd44c1-c428-41f3-a911-520281386c94",
|
||||
"metadata": {},
|
||||
@@ -104,6 +107,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "ddaa06cf-95ec-48ce-b0ab-d892a7909693",
|
||||
"metadata": {},
|
||||
@@ -114,6 +118,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "4fa9337e-ccb5-4c52-9b7c-1653148bc256",
|
||||
"metadata": {},
|
||||
@@ -158,13 +163,14 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1a5c97af-89bc-4e59-95c1-223742a9160b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Dolly, by DataBricks\n",
|
||||
"### Dolly, by Databricks\n",
|
||||
"\n",
|
||||
"See [DataBricks](https://huggingface.co/databricks) organization page for a list of available models."
|
||||
"See [Databricks](https://huggingface.co/databricks) organization page for a list of available models."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -196,6 +202,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "03f6ae52-b5f9-4de6-832c-551cb3fa11ae",
|
||||
"metadata": {},
|
||||
@@ -233,6 +240,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "2bf838eb-1083-402f-b099-b07c452418c8",
|
||||
"metadata": {},
|
||||
|
||||
83
docs/modules/models/text_embedding/examples/dashscope.ipynb
Normal file
83
docs/modules/models/text_embedding/examples/dashscope.ipynb
Normal file
@@ -0,0 +1,83 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DashScope\n",
|
||||
"\n",
|
||||
"Let's load the DashScope Embedding class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import DashScopeEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = DashScopeEmbeddings(model='text-embedding-v1', dashscope_api_key='your-dashscope-api-key')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text = \"This is a test document.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_result = embeddings.embed_query(text)\n",
|
||||
"print(query_result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"doc_results = embeddings.embed_documents([\"foo\"])\n",
|
||||
"print(doc_results)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "chatgpt",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
133
docs/modules/models/text_embedding/examples/deepinfra.ipynb
Normal file
133
docs/modules/models/text_embedding/examples/deepinfra.ipynb
Normal file
@@ -0,0 +1,133 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DeepInfra\n",
|
||||
"\n",
|
||||
"[DeepInfra](https://deepinfra.com/?utm_source=langchain) is a serverless inference as a service that provides access to a [variety of LLMs](https://deepinfra.com/models?utm_source=langchain) and [embeddings models](https://deepinfra.com/models?type=embeddings&utm_source=langchain). This notebook goes over how to use LangChain with DeepInfra for text embeddings."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# sign up for an account: https://deepinfra.com/login?utm_source=langchain\n",
|
||||
"\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"DEEPINFRA_API_TOKEN = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"DEEPINFRA_API_TOKEN\"] = DEEPINFRA_API_TOKEN"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import DeepInfraEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = DeepInfraEmbeddings(\n",
|
||||
" model_id=\"sentence-transformers/clip-ViT-B-32\",\n",
|
||||
" query_instruction=\"\",\n",
|
||||
" embed_instruction=\"\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = [\"Dog is not a cat\",\n",
|
||||
" \"Beta is the second letter of Greek alphabet\"]\n",
|
||||
"document_result = embeddings.embed_documents(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What is the first letter of Greek alphabet\"\n",
|
||||
"query_result = embeddings.embed_query(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Cosine similarity between \"Dog is not a cat\" and query: 0.7489097144129355\n",
|
||||
"Cosine similarity between \"Beta is the second letter of Greek alphabet\" and query: 0.9519380640702013\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"query_numpy = np.array(query_result)\n",
|
||||
"for doc_res, doc in zip(document_result, docs):\n",
|
||||
" document_numpy = np.array(doc_res)\n",
|
||||
" similarity = np.dot(query_numpy, document_numpy) / (np.linalg.norm(query_numpy)*np.linalg.norm(document_numpy))\n",
|
||||
" print(f\"Cosine similarity between \\\"{doc}\\\" and query: {similarity}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
144
docs/modules/models/text_embedding/examples/embaas.ipynb
Normal file
144
docs/modules/models/text_embedding/examples/embaas.ipynb
Normal file
@@ -0,0 +1,144 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Embaas\n",
|
||||
"\n",
|
||||
"[embaas](https://embaas.io) is a fully managed NLP API service that offers features like embedding generation, document text extraction, document to embeddings and more. You can choose a [variety of pre-trained models](https://embaas.io/docs/models/embeddings).\n",
|
||||
"\n",
|
||||
"In this tutorial, we will show you how to use the embaas Embeddings API to generate embeddings for a given text.\n",
|
||||
"\n",
|
||||
"### Prerequisites\n",
|
||||
"Create your free embaas account at [https://embaas.io/register](https://embaas.io/register) and generate an [API key](https://embaas.io/dashboard/api-keys)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set API key\n",
|
||||
"embaas_api_key = \"YOUR_API_KEY\"\n",
|
||||
"# or set environment variable\n",
|
||||
"os.environ[\"EMBAAS_API_KEY\"] = \"YOUR_API_KEY\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import EmbaasEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = EmbaasEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-10T11:17:55.940265Z",
|
||||
"start_time": "2023-06-10T11:17:55.938517Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create embeddings for a single document\n",
|
||||
"doc_text = \"This is a test document.\"\n",
|
||||
"doc_text_embedding = embeddings.embed_query(doc_text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Print created embedding\n",
|
||||
"print(doc_text_embedding)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-10T11:19:25.237161Z",
|
||||
"start_time": "2023-06-10T11:19:25.235320Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create embeddings for multiple documents\n",
|
||||
"doc_texts = [\"This is a test document.\", \"This is another test document.\"]\n",
|
||||
"doc_texts_embeddings = embeddings.embed_documents(doc_texts)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Print created embeddings\n",
|
||||
"for i, doc_text_embedding in enumerate(doc_texts_embeddings):\n",
|
||||
" print(f\"Embedding for document {i + 1}: {doc_text_embedding}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-10T11:22:26.139769Z",
|
||||
"start_time": "2023-06-10T11:22:26.138357Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Using a different model and/or custom instruction\n",
|
||||
"embeddings = EmbaasEmbeddings(model=\"instructor-large\", instruction=\"Represent the Wikipedia document for retrieval\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For more detailed information about the embaas Embeddings API, please refer to [the official embaas API documentation](https://embaas.io/api-reference)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
@@ -864,7 +864,11 @@ class AgentExecutor(Chain):
|
||||
raise e
|
||||
text = str(e)
|
||||
if isinstance(self.handle_parsing_errors, bool):
|
||||
observation = "Invalid or incomplete response"
|
||||
if e.send_to_llm:
|
||||
observation = str(e.observation)
|
||||
text = str(e.llm_output)
|
||||
else:
|
||||
observation = "Invalid or incomplete response"
|
||||
elif isinstance(self.handle_parsing_errors, str):
|
||||
observation = self.handle_parsing_errors
|
||||
elif callable(self.handle_parsing_errors):
|
||||
|
||||
@@ -1,19 +1,26 @@
|
||||
"""Agent for working with pandas objects."""
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
from langchain.agents.agent import AgentExecutor, BaseSingleActionAgent
|
||||
from langchain.agents.agent_toolkits.pandas.prompt import (
|
||||
FUNCTIONS_WITH_DF,
|
||||
FUNCTIONS_WITH_MULTI_DF,
|
||||
MULTI_DF_PREFIX,
|
||||
MULTI_DF_PREFIX_FUNCTIONS,
|
||||
PREFIX,
|
||||
PREFIX_FUNCTIONS,
|
||||
SUFFIX_NO_DF,
|
||||
SUFFIX_WITH_DF,
|
||||
SUFFIX_WITH_MULTI_DF,
|
||||
)
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
|
||||
from langchain.agents.types import AgentType
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.schema import SystemMessage
|
||||
from langchain.tools.python.tool import PythonAstREPLTool
|
||||
|
||||
|
||||
@@ -137,9 +144,108 @@ def _get_prompt_and_tools(
|
||||
)
|
||||
|
||||
|
||||
def _get_functions_single_prompt(
|
||||
df: Any,
|
||||
prefix: Optional[str] = None,
|
||||
suffix: Optional[str] = None,
|
||||
include_df_in_prompt: Optional[bool] = True,
|
||||
) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]:
|
||||
if suffix is not None:
|
||||
suffix_to_use = suffix
|
||||
if include_df_in_prompt:
|
||||
suffix_to_use = suffix_to_use.format(df_head=str(df.head().to_markdown()))
|
||||
elif include_df_in_prompt:
|
||||
suffix_to_use = FUNCTIONS_WITH_DF.format(df_head=str(df.head().to_markdown()))
|
||||
else:
|
||||
suffix_to_use = ""
|
||||
|
||||
if prefix is None:
|
||||
prefix = PREFIX_FUNCTIONS
|
||||
|
||||
tools = [PythonAstREPLTool(locals={"df": df})]
|
||||
system_message = SystemMessage(content=prefix + suffix_to_use)
|
||||
prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message)
|
||||
return prompt, tools
|
||||
|
||||
|
||||
def _get_functions_multi_prompt(
|
||||
dfs: Any,
|
||||
prefix: Optional[str] = None,
|
||||
suffix: Optional[str] = None,
|
||||
include_df_in_prompt: Optional[bool] = True,
|
||||
) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]:
|
||||
if suffix is not None:
|
||||
suffix_to_use = suffix
|
||||
if include_df_in_prompt:
|
||||
dfs_head = "\n\n".join([d.head().to_markdown() for d in dfs])
|
||||
suffix_to_use = suffix_to_use.format(
|
||||
dfs_head=dfs_head,
|
||||
)
|
||||
elif include_df_in_prompt:
|
||||
dfs_head = "\n\n".join([d.head().to_markdown() for d in dfs])
|
||||
suffix_to_use = FUNCTIONS_WITH_MULTI_DF.format(
|
||||
dfs_head=dfs_head,
|
||||
)
|
||||
else:
|
||||
suffix_to_use = ""
|
||||
|
||||
if prefix is None:
|
||||
prefix = MULTI_DF_PREFIX_FUNCTIONS
|
||||
prefix = prefix.format(num_dfs=str(len(dfs)))
|
||||
|
||||
df_locals = {}
|
||||
for i, dataframe in enumerate(dfs):
|
||||
df_locals[f"df{i + 1}"] = dataframe
|
||||
tools = [PythonAstREPLTool(locals=df_locals)]
|
||||
system_message = SystemMessage(content=prefix + suffix_to_use)
|
||||
prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message)
|
||||
return prompt, tools
|
||||
|
||||
|
||||
def _get_functions_prompt_and_tools(
|
||||
df: Any,
|
||||
prefix: Optional[str] = None,
|
||||
suffix: Optional[str] = None,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
include_df_in_prompt: Optional[bool] = True,
|
||||
) -> Tuple[BasePromptTemplate, List[PythonAstREPLTool]]:
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
"pandas package not found, please install with `pip install pandas`"
|
||||
)
|
||||
if input_variables is not None:
|
||||
raise ValueError("`input_variables` is not supported at the moment.")
|
||||
|
||||
if include_df_in_prompt is not None and suffix is not None:
|
||||
raise ValueError("If suffix is specified, include_df_in_prompt should not be.")
|
||||
|
||||
if isinstance(df, list):
|
||||
for item in df:
|
||||
if not isinstance(item, pd.DataFrame):
|
||||
raise ValueError(f"Expected pandas object, got {type(df)}")
|
||||
return _get_functions_multi_prompt(
|
||||
df,
|
||||
prefix=prefix,
|
||||
suffix=suffix,
|
||||
include_df_in_prompt=include_df_in_prompt,
|
||||
)
|
||||
else:
|
||||
if not isinstance(df, pd.DataFrame):
|
||||
raise ValueError(f"Expected pandas object, got {type(df)}")
|
||||
return _get_functions_single_prompt(
|
||||
df,
|
||||
prefix=prefix,
|
||||
suffix=suffix,
|
||||
include_df_in_prompt=include_df_in_prompt,
|
||||
)
|
||||
|
||||
|
||||
def create_pandas_dataframe_agent(
|
||||
llm: BaseLanguageModel,
|
||||
df: Any,
|
||||
agent_type: AgentType = AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
prefix: Optional[str] = None,
|
||||
suffix: Optional[str] = None,
|
||||
@@ -154,26 +260,44 @@ def create_pandas_dataframe_agent(
|
||||
**kwargs: Dict[str, Any],
|
||||
) -> AgentExecutor:
|
||||
"""Construct a pandas agent from an LLM and dataframe."""
|
||||
|
||||
prompt, tools = _get_prompt_and_tools(
|
||||
df,
|
||||
prefix=prefix,
|
||||
suffix=suffix,
|
||||
input_variables=input_variables,
|
||||
include_df_in_prompt=include_df_in_prompt,
|
||||
)
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
agent = ZeroShotAgent(
|
||||
llm_chain=llm_chain,
|
||||
allowed_tools=tool_names,
|
||||
callback_manager=callback_manager,
|
||||
**kwargs,
|
||||
)
|
||||
agent: BaseSingleActionAgent
|
||||
if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION:
|
||||
prompt, tools = _get_prompt_and_tools(
|
||||
df,
|
||||
prefix=prefix,
|
||||
suffix=suffix,
|
||||
input_variables=input_variables,
|
||||
include_df_in_prompt=include_df_in_prompt,
|
||||
)
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
agent = ZeroShotAgent(
|
||||
llm_chain=llm_chain,
|
||||
allowed_tools=tool_names,
|
||||
callback_manager=callback_manager,
|
||||
**kwargs,
|
||||
)
|
||||
elif agent_type == AgentType.OPENAI_FUNCTIONS:
|
||||
_prompt, tools = _get_functions_prompt_and_tools(
|
||||
df,
|
||||
prefix=prefix,
|
||||
suffix=suffix,
|
||||
input_variables=input_variables,
|
||||
include_df_in_prompt=include_df_in_prompt,
|
||||
)
|
||||
agent = OpenAIFunctionsAgent(
|
||||
llm=llm,
|
||||
prompt=_prompt,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
**kwargs,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Agent type {agent_type} not supported at the moment.")
|
||||
return AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
|
||||
@@ -28,3 +28,17 @@ This is the result of `print(df.head())` for each dataframe:
|
||||
Begin!
|
||||
Question: {input}
|
||||
{agent_scratchpad}"""
|
||||
|
||||
PREFIX_FUNCTIONS = """
|
||||
You are working with a pandas dataframe in Python. The name of the dataframe is `df`."""
|
||||
|
||||
MULTI_DF_PREFIX_FUNCTIONS = """
|
||||
You are working with {num_dfs} pandas dataframes in Python named df1, df2, etc."""
|
||||
|
||||
FUNCTIONS_WITH_DF = """
|
||||
This is the result of `print(df.head())`:
|
||||
{df_head}"""
|
||||
|
||||
FUNCTIONS_WITH_MULTI_DF = """
|
||||
This is the result of `print(df.head())` for each dataframe:
|
||||
{dfs_head}"""
|
||||
|
||||
@@ -2,18 +2,22 @@
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
from langchain.agents.agent import AgentExecutor, BaseSingleActionAgent
|
||||
from langchain.agents.agent_toolkits.python.prompt import PREFIX
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
|
||||
from langchain.agents.types import AgentType
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.schema import SystemMessage
|
||||
from langchain.tools.python.tool import PythonREPLTool
|
||||
|
||||
|
||||
def create_python_agent(
|
||||
llm: BaseLanguageModel,
|
||||
tool: PythonREPLTool,
|
||||
agent_type: AgentType = AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
verbose: bool = False,
|
||||
prefix: str = PREFIX,
|
||||
@@ -22,14 +26,29 @@ def create_python_agent(
|
||||
) -> AgentExecutor:
|
||||
"""Construct a python agent from an LLM and tool."""
|
||||
tools = [tool]
|
||||
prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix)
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
|
||||
agent: BaseSingleActionAgent
|
||||
|
||||
if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION:
|
||||
prompt = ZeroShotAgent.create_prompt(tools, prefix=prefix)
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
|
||||
elif agent_type == AgentType.OPENAI_FUNCTIONS:
|
||||
system_message = SystemMessage(content=prefix)
|
||||
_prompt = OpenAIFunctionsAgent.create_prompt(system_message=system_message)
|
||||
agent = OpenAIFunctionsAgent(
|
||||
llm=llm,
|
||||
prompt=_prompt,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
**kwargs,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Agent type {agent_type} not supported at the moment.")
|
||||
return AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
|
||||
@@ -1,22 +1,35 @@
|
||||
"""SQL agent."""
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain.agents.agent import AgentExecutor
|
||||
from langchain.agents.agent_toolkits.sql.prompt import SQL_PREFIX, SQL_SUFFIX
|
||||
from langchain.agents.agent import AgentExecutor, BaseSingleActionAgent
|
||||
from langchain.agents.agent_toolkits.sql.prompt import (
|
||||
SQL_FUNCTIONS_SUFFIX,
|
||||
SQL_PREFIX,
|
||||
SQL_SUFFIX,
|
||||
)
|
||||
from langchain.agents.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
|
||||
from langchain.agents.agent_types import AgentType
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS
|
||||
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
MessagesPlaceholder,
|
||||
)
|
||||
from langchain.schema import AIMessage, SystemMessage
|
||||
|
||||
|
||||
def create_sql_agent(
|
||||
llm: BaseLanguageModel,
|
||||
toolkit: SQLDatabaseToolkit,
|
||||
agent_type: AgentType = AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
prefix: str = SQL_PREFIX,
|
||||
suffix: str = SQL_SUFFIX,
|
||||
suffix: Optional[str] = None,
|
||||
format_instructions: str = FORMAT_INSTRUCTIONS,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
top_k: int = 10,
|
||||
@@ -30,20 +43,44 @@ def create_sql_agent(
|
||||
"""Construct a sql agent from an LLM and tools."""
|
||||
tools = toolkit.get_tools()
|
||||
prefix = prefix.format(dialect=toolkit.dialect, top_k=top_k)
|
||||
prompt = ZeroShotAgent.create_prompt(
|
||||
tools,
|
||||
prefix=prefix,
|
||||
suffix=suffix,
|
||||
format_instructions=format_instructions,
|
||||
input_variables=input_variables,
|
||||
)
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
|
||||
agent: BaseSingleActionAgent
|
||||
|
||||
if agent_type == AgentType.ZERO_SHOT_REACT_DESCRIPTION:
|
||||
prompt = ZeroShotAgent.create_prompt(
|
||||
tools,
|
||||
prefix=prefix,
|
||||
suffix=suffix or SQL_SUFFIX,
|
||||
format_instructions=format_instructions,
|
||||
input_variables=input_variables,
|
||||
)
|
||||
llm_chain = LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
tool_names = [tool.name for tool in tools]
|
||||
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names, **kwargs)
|
||||
|
||||
elif agent_type == AgentType.OPENAI_FUNCTIONS:
|
||||
messages = [
|
||||
SystemMessage(content=prefix),
|
||||
HumanMessagePromptTemplate.from_template("{input}"),
|
||||
AIMessage(content=suffix or SQL_FUNCTIONS_SUFFIX),
|
||||
MessagesPlaceholder(variable_name="agent_scratchpad"),
|
||||
]
|
||||
input_variables = ["input", "agent_scratchpad"]
|
||||
_prompt = ChatPromptTemplate(input_variables=input_variables, messages=messages)
|
||||
|
||||
agent = OpenAIFunctionsAgent(
|
||||
llm=llm,
|
||||
prompt=_prompt,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
**kwargs,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Agent type {agent_type} not supported at the moment.")
|
||||
|
||||
return AgentExecutor.from_agent_and_tools(
|
||||
agent=agent,
|
||||
tools=tools,
|
||||
|
||||
@@ -19,3 +19,5 @@ SQL_SUFFIX = """Begin!
|
||||
Question: {input}
|
||||
Thought: I should look at the tables in the database to see what I can query. Then I should query the schema of the most relevant tables.
|
||||
{agent_scratchpad}"""
|
||||
|
||||
SQL_FUNCTIONS_SUFFIX = """I should look at the tables in the database to see what I can query. Then I should query the schema of the most relevant tables."""
|
||||
|
||||
@@ -11,3 +11,4 @@ class AgentType(str, Enum):
|
||||
STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION = (
|
||||
"structured-chat-zero-shot-react-description"
|
||||
)
|
||||
OPENAI_FUNCTIONS = "openai-functions"
|
||||
|
||||
@@ -34,6 +34,7 @@ from langchain.tools.requests.tool import (
|
||||
from langchain.tools.scenexplain.tool import SceneXplainTool
|
||||
from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun
|
||||
from langchain.tools.shell.tool import ShellTool
|
||||
from langchain.tools.sleep.tool import SleepTool
|
||||
from langchain.tools.wikipedia.tool import WikipediaQueryRun
|
||||
from langchain.tools.wolfram_alpha.tool import WolframAlphaQueryRun
|
||||
from langchain.tools.openweathermap.tool import OpenWeatherMapQueryRun
|
||||
@@ -82,6 +83,10 @@ def _get_terminal() -> BaseTool:
|
||||
return ShellTool()
|
||||
|
||||
|
||||
def _get_sleep() -> BaseTool:
|
||||
return SleepTool()
|
||||
|
||||
|
||||
_BASE_TOOLS: Dict[str, Callable[[], BaseTool]] = {
|
||||
"python_repl": _get_python_repl,
|
||||
"requests": _get_tools_requests_get, # preserved for backwards compatability
|
||||
@@ -91,6 +96,7 @@ _BASE_TOOLS: Dict[str, Callable[[], BaseTool]] = {
|
||||
"requests_put": _get_tools_requests_put,
|
||||
"requests_delete": _get_tools_requests_delete,
|
||||
"terminal": _get_terminal,
|
||||
"sleep": _get_sleep,
|
||||
}
|
||||
|
||||
|
||||
|
||||
251
langchain/agents/openai_functions_agent/base.py
Normal file
251
langchain/agents/openai_functions_agent/base.py
Normal file
@@ -0,0 +1,251 @@
|
||||
"""Module implements an agent that uses OpenAI's APIs function enabled API."""
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from json import JSONDecodeError
|
||||
from typing import Any, List, Optional, Sequence, Tuple, Union
|
||||
|
||||
from langchain.agents import BaseSingleActionAgent
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.callbacks.manager import Callbacks
|
||||
from langchain.chat_models.openai import ChatOpenAI
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.prompts.chat import (
|
||||
BaseMessagePromptTemplate,
|
||||
ChatPromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
MessagesPlaceholder,
|
||||
)
|
||||
from langchain.schema import (
|
||||
AgentAction,
|
||||
AgentFinish,
|
||||
AIMessage,
|
||||
BaseMessage,
|
||||
FunctionMessage,
|
||||
OutputParserException,
|
||||
SystemMessage,
|
||||
)
|
||||
from langchain.tools import BaseTool
|
||||
from langchain.tools.convert_to_openai import format_tool_to_openai_function
|
||||
|
||||
|
||||
@dataclass
|
||||
class _FunctionsAgentAction(AgentAction):
|
||||
message_log: List[BaseMessage]
|
||||
|
||||
|
||||
def _convert_agent_action_to_messages(
|
||||
agent_action: AgentAction, observation: str
|
||||
) -> List[BaseMessage]:
|
||||
"""Convert an agent action to a message.
|
||||
|
||||
This code is used to reconstruct the original AI message from the agent action.
|
||||
|
||||
Args:
|
||||
agent_action: Agent action to convert.
|
||||
|
||||
Returns:
|
||||
AIMessage that corresponds to the original tool invocation.
|
||||
"""
|
||||
if isinstance(agent_action, _FunctionsAgentAction):
|
||||
return agent_action.message_log + [
|
||||
_create_function_message(agent_action, observation)
|
||||
]
|
||||
else:
|
||||
return [AIMessage(content=agent_action.log)]
|
||||
|
||||
|
||||
def _create_function_message(
|
||||
agent_action: AgentAction, observation: str
|
||||
) -> FunctionMessage:
|
||||
"""Convert agent action and observation into a function message.
|
||||
Args:
|
||||
agent_action: the tool invocation request from the agent
|
||||
observation: the result of the tool invocation
|
||||
Returns:
|
||||
FunctionMessage that corresponds to the original tool invocation
|
||||
"""
|
||||
if not isinstance(observation, str):
|
||||
try:
|
||||
content = json.dumps(observation)
|
||||
except Exception:
|
||||
content = str(observation)
|
||||
else:
|
||||
content = observation
|
||||
return FunctionMessage(
|
||||
name=agent_action.tool,
|
||||
content=content,
|
||||
)
|
||||
|
||||
|
||||
def _format_intermediate_steps(
|
||||
intermediate_steps: List[Tuple[AgentAction, str]],
|
||||
) -> List[BaseMessage]:
|
||||
"""Format intermediate steps.
|
||||
Args:
|
||||
intermediate_steps: Steps the LLM has taken to date, along with observations
|
||||
Returns:
|
||||
list of messages to send to the LLM for the next prediction
|
||||
"""
|
||||
messages = []
|
||||
|
||||
for intermediate_step in intermediate_steps:
|
||||
agent_action, observation = intermediate_step
|
||||
messages.extend(_convert_agent_action_to_messages(agent_action, observation))
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
def _parse_ai_message(message: BaseMessage) -> Union[AgentAction, AgentFinish]:
|
||||
"""Parse an AI message."""
|
||||
if not isinstance(message, AIMessage):
|
||||
raise TypeError(f"Expected an AI message got {type(message)}")
|
||||
|
||||
function_call = message.additional_kwargs.get("function_call", {})
|
||||
|
||||
if function_call:
|
||||
function_call = message.additional_kwargs["function_call"]
|
||||
function_name = function_call["name"]
|
||||
try:
|
||||
_tool_input = json.loads(function_call["arguments"])
|
||||
except JSONDecodeError:
|
||||
raise OutputParserException(
|
||||
f"Could not parse tool input: {function_call} because "
|
||||
f"the `arguments` is not valid JSON."
|
||||
)
|
||||
|
||||
# HACK HACK HACK:
|
||||
# The code that encodes tool input into Open AI uses a special variable
|
||||
# name called `__arg1` to handle old style tools that do not expose a
|
||||
# schema and expect a single string argument as an input.
|
||||
# We unpack the argument here if it exists.
|
||||
# Open AI does not support passing in a JSON array as an argument.
|
||||
if "__arg1" in _tool_input:
|
||||
tool_input = _tool_input["__arg1"]
|
||||
else:
|
||||
tool_input = _tool_input
|
||||
|
||||
content_msg = "responded: {content}\n" if message.content else "\n"
|
||||
|
||||
return _FunctionsAgentAction(
|
||||
tool=function_name,
|
||||
tool_input=tool_input,
|
||||
log=f"\nInvoking: `{function_name}` with `{tool_input}`\n{content_msg}\n",
|
||||
message_log=[message],
|
||||
)
|
||||
|
||||
return AgentFinish(return_values={"output": message.content}, log=message.content)
|
||||
|
||||
|
||||
class OpenAIFunctionsAgent(BaseSingleActionAgent):
|
||||
"""An Agent driven by OpenAIs function powered API."""
|
||||
|
||||
llm: BaseLanguageModel
|
||||
tools: Sequence[BaseTool]
|
||||
prompt: BasePromptTemplate
|
||||
|
||||
def get_allowed_tools(self) -> List[str]:
|
||||
"""Get allowed tools."""
|
||||
return list([t.name for t in self.tools])
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Get input keys. Input refers to user input here."""
|
||||
return ["input"]
|
||||
|
||||
@property
|
||||
def functions(self) -> List[dict]:
|
||||
return [dict(format_tool_to_openai_function(t)) for t in self.tools]
|
||||
|
||||
def plan(
|
||||
self,
|
||||
intermediate_steps: List[Tuple[AgentAction, str]],
|
||||
callbacks: Callbacks = None,
|
||||
**kwargs: Any,
|
||||
) -> Union[AgentAction, AgentFinish]:
|
||||
"""Given input, decided what to do.
|
||||
Args:
|
||||
intermediate_steps: Steps the LLM has taken to date, along with observations
|
||||
**kwargs: User inputs.
|
||||
Returns:
|
||||
Action specifying what tool to use.
|
||||
"""
|
||||
user_input = kwargs["input"]
|
||||
agent_scratchpad = _format_intermediate_steps(intermediate_steps)
|
||||
prompt = self.prompt.format_prompt(
|
||||
input=user_input, agent_scratchpad=agent_scratchpad
|
||||
)
|
||||
messages = prompt.to_messages()
|
||||
predicted_message = self.llm.predict_messages(
|
||||
messages, functions=self.functions, callbacks=callbacks
|
||||
)
|
||||
agent_decision = _parse_ai_message(predicted_message)
|
||||
return agent_decision
|
||||
|
||||
async def aplan(
|
||||
self,
|
||||
intermediate_steps: List[Tuple[AgentAction, str]],
|
||||
callbacks: Callbacks = None,
|
||||
**kwargs: Any,
|
||||
) -> Union[AgentAction, AgentFinish]:
|
||||
"""Given input, decided what to do.
|
||||
Args:
|
||||
intermediate_steps: Steps the LLM has taken to date,
|
||||
along with observations
|
||||
**kwargs: User inputs.
|
||||
Returns:
|
||||
Action specifying what tool to use.
|
||||
"""
|
||||
user_input = kwargs["input"]
|
||||
agent_scratchpad = _format_intermediate_steps(intermediate_steps)
|
||||
prompt = self.prompt.format_prompt(
|
||||
input=user_input, agent_scratchpad=agent_scratchpad
|
||||
)
|
||||
messages = prompt.to_messages()
|
||||
predicted_message = await self.llm.apredict_messages(
|
||||
messages, functions=self.functions
|
||||
)
|
||||
agent_decision = _parse_ai_message(predicted_message)
|
||||
return agent_decision
|
||||
|
||||
@classmethod
|
||||
def create_prompt(
|
||||
cls,
|
||||
system_message: Optional[SystemMessage] = SystemMessage(
|
||||
content="You are a helpful AI assistant."
|
||||
),
|
||||
) -> BasePromptTemplate:
|
||||
messages: List[Union[BaseMessagePromptTemplate, BaseMessage]]
|
||||
if system_message:
|
||||
messages = [system_message]
|
||||
else:
|
||||
messages = []
|
||||
|
||||
messages.extend(
|
||||
[
|
||||
HumanMessagePromptTemplate.from_template("{input}"),
|
||||
MessagesPlaceholder(variable_name="agent_scratchpad"),
|
||||
]
|
||||
)
|
||||
input_variables = ["input", "agent_scratchpad"]
|
||||
return ChatPromptTemplate(input_variables=input_variables, messages=messages)
|
||||
|
||||
@classmethod
|
||||
def from_llm_and_tools(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
tools: Sequence[BaseTool],
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
**kwargs: Any,
|
||||
) -> BaseSingleActionAgent:
|
||||
"""Construct an agent from an LLM and tools."""
|
||||
if not isinstance(llm, ChatOpenAI):
|
||||
raise ValueError("Only supported with OpenAI models.")
|
||||
prompt = cls.create_prompt()
|
||||
return cls(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
tools=tools,
|
||||
callback_manager=callback_manager,
|
||||
**kwargs,
|
||||
)
|
||||
@@ -6,6 +6,7 @@ from langchain.agents.chat.base import ChatAgent
|
||||
from langchain.agents.conversational.base import ConversationalAgent
|
||||
from langchain.agents.conversational_chat.base import ConversationalChatAgent
|
||||
from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
|
||||
from langchain.agents.react.base import ReActDocstoreAgent
|
||||
from langchain.agents.self_ask_with_search.base import SelfAskWithSearchAgent
|
||||
from langchain.agents.structured_chat.base import StructuredChatAgent
|
||||
@@ -18,4 +19,5 @@ AGENT_TO_CLASS: Dict[AgentType, Type[BaseSingleActionAgent]] = {
|
||||
AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION: ChatAgent,
|
||||
AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION: ConversationalChatAgent,
|
||||
AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION: StructuredChatAgent,
|
||||
AgentType.OPENAI_FUNCTIONS: OpenAIFunctionsAgent,
|
||||
}
|
||||
|
||||
@@ -2,11 +2,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Optional, Sequence, Set
|
||||
|
||||
from pydantic import BaseModel
|
||||
from typing import Any, List, Optional, Sequence, Set
|
||||
|
||||
from langchain.callbacks.manager import Callbacks
|
||||
from langchain.load.serializable import Serializable
|
||||
from langchain.schema import BaseMessage, LLMResult, PromptValue, get_buffer_string
|
||||
|
||||
|
||||
@@ -29,13 +28,14 @@ def _get_token_ids_default_method(text: str) -> List[int]:
|
||||
return tokenizer.encode(text)
|
||||
|
||||
|
||||
class BaseLanguageModel(BaseModel, ABC):
|
||||
class BaseLanguageModel(Serializable, ABC):
|
||||
@abstractmethod
|
||||
def generate_prompt(
|
||||
self,
|
||||
prompts: List[PromptValue],
|
||||
stop: Optional[List[str]] = None,
|
||||
callbacks: Callbacks = None,
|
||||
**kwargs: Any,
|
||||
) -> LLMResult:
|
||||
"""Take in a list of prompt values and return an LLMResult."""
|
||||
|
||||
@@ -45,26 +45,39 @@ class BaseLanguageModel(BaseModel, ABC):
|
||||
prompts: List[PromptValue],
|
||||
stop: Optional[List[str]] = None,
|
||||
callbacks: Callbacks = None,
|
||||
**kwargs: Any,
|
||||
) -> LLMResult:
|
||||
"""Take in a list of prompt values and return an LLMResult."""
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, text: str, *, stop: Optional[Sequence[str]] = None) -> str:
|
||||
def predict(
|
||||
self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
|
||||
) -> str:
|
||||
"""Predict text from text."""
|
||||
|
||||
@abstractmethod
|
||||
def predict_messages(
|
||||
self, messages: List[BaseMessage], *, stop: Optional[Sequence[str]] = None
|
||||
self,
|
||||
messages: List[BaseMessage],
|
||||
*,
|
||||
stop: Optional[Sequence[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> BaseMessage:
|
||||
"""Predict message from messages."""
|
||||
|
||||
@abstractmethod
|
||||
async def apredict(self, text: str, *, stop: Optional[Sequence[str]] = None) -> str:
|
||||
async def apredict(
|
||||
self, text: str, *, stop: Optional[Sequence[str]] = None, **kwargs: Any
|
||||
) -> str:
|
||||
"""Predict text from text."""
|
||||
|
||||
@abstractmethod
|
||||
async def apredict_messages(
|
||||
self, messages: List[BaseMessage], *, stop: Optional[Sequence[str]] = None
|
||||
self,
|
||||
messages: List[BaseMessage],
|
||||
*,
|
||||
stop: Optional[Sequence[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> BaseMessage:
|
||||
"""Predict message from messages."""
|
||||
|
||||
|
||||
@@ -124,6 +124,7 @@ class CallbackManagerMixin:
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Run when LLM starts running."""
|
||||
@@ -135,6 +136,7 @@ class CallbackManagerMixin:
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Run when a chat model starts running."""
|
||||
@@ -149,6 +151,7 @@ class CallbackManagerMixin:
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Run when chain starts running."""
|
||||
@@ -160,6 +163,7 @@ class CallbackManagerMixin:
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Run when tool starts running."""
|
||||
@@ -221,6 +225,7 @@ class AsyncCallbackHandler(BaseCallbackHandler):
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Run when LLM starts running."""
|
||||
@@ -232,6 +237,7 @@ class AsyncCallbackHandler(BaseCallbackHandler):
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
"""Run when a chat model starts running."""
|
||||
@@ -276,6 +282,7 @@ class AsyncCallbackHandler(BaseCallbackHandler):
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Run when chain starts running."""
|
||||
@@ -307,6 +314,7 @@ class AsyncCallbackHandler(BaseCallbackHandler):
|
||||
*,
|
||||
run_id: UUID,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Run when tool starts running."""
|
||||
@@ -370,6 +378,9 @@ class BaseCallbackManager(CallbackManagerMixin):
|
||||
handlers: List[BaseCallbackHandler],
|
||||
inheritable_handlers: Optional[List[BaseCallbackHandler]] = None,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
*,
|
||||
tags: Optional[List[str]] = None,
|
||||
inheritable_tags: Optional[List[str]] = None,
|
||||
) -> None:
|
||||
"""Initialize callback manager."""
|
||||
self.handlers: List[BaseCallbackHandler] = handlers
|
||||
@@ -377,6 +388,8 @@ class BaseCallbackManager(CallbackManagerMixin):
|
||||
inheritable_handlers or []
|
||||
)
|
||||
self.parent_run_id: Optional[UUID] = parent_run_id
|
||||
self.tags = tags or []
|
||||
self.inheritable_tags = inheritable_tags or []
|
||||
|
||||
@property
|
||||
def is_async(self) -> bool:
|
||||
@@ -406,3 +419,16 @@ class BaseCallbackManager(CallbackManagerMixin):
|
||||
def set_handler(self, handler: BaseCallbackHandler, inherit: bool = True) -> None:
|
||||
"""Set handler as the only handler on the callback manager."""
|
||||
self.set_handlers([handler], inherit=inherit)
|
||||
|
||||
def add_tags(self, tags: List[str], inherit: bool = True) -> None:
|
||||
for tag in tags:
|
||||
if tag in self.tags:
|
||||
self.remove_tags([tag])
|
||||
self.tags.extend(tags)
|
||||
if inherit:
|
||||
self.inheritable_tags.extend(tags)
|
||||
|
||||
def remove_tags(self, tags: List[str]) -> None:
|
||||
for tag in tags:
|
||||
self.tags.remove(tag)
|
||||
self.inheritable_tags.remove(tag)
|
||||
|
||||
@@ -133,15 +133,11 @@ def trace_as_chain_group(
|
||||
*,
|
||||
session_name: Optional[str] = None,
|
||||
example_id: Optional[Union[str, UUID]] = None,
|
||||
tenant_id: Optional[str] = None,
|
||||
session_extra: Optional[Dict[str, Any]] = None,
|
||||
) -> Generator[CallbackManager, None, None]:
|
||||
"""Get a callback manager for a chain group in a context manager."""
|
||||
cb = LangChainTracer(
|
||||
tenant_id=tenant_id,
|
||||
session_name=session_name,
|
||||
example_id=example_id,
|
||||
session_extra=session_extra,
|
||||
)
|
||||
cm = CallbackManager.configure(
|
||||
inheritable_callbacks=[cb],
|
||||
@@ -158,15 +154,11 @@ async def atrace_as_chain_group(
|
||||
*,
|
||||
session_name: Optional[str] = None,
|
||||
example_id: Optional[Union[str, UUID]] = None,
|
||||
tenant_id: Optional[str] = None,
|
||||
session_extra: Optional[Dict[str, Any]] = None,
|
||||
) -> AsyncGenerator[AsyncCallbackManager, None]:
|
||||
"""Get a callback manager for a chain group in a context manager."""
|
||||
cb = LangChainTracer(
|
||||
tenant_id=tenant_id,
|
||||
session_name=session_name,
|
||||
example_id=example_id,
|
||||
session_extra=session_extra,
|
||||
)
|
||||
cm = AsyncCallbackManager.configure(
|
||||
inheritable_callbacks=[cb],
|
||||
@@ -212,7 +204,7 @@ def _handle_event(
|
||||
except Exception as e:
|
||||
if handler.raise_error:
|
||||
raise e
|
||||
logging.warning(f"Error in {event_name} callback: {e}")
|
||||
logger.warning(f"Error in {event_name} callback: {e}")
|
||||
|
||||
|
||||
async def _ahandle_event_for_handler(
|
||||
@@ -246,6 +238,8 @@ async def _ahandle_event_for_handler(
|
||||
else:
|
||||
logger.warning(f"Error in {event_name} callback: {e}")
|
||||
except Exception as e:
|
||||
if handler.raise_error:
|
||||
raise e
|
||||
logger.warning(f"Error in {event_name} callback: {e}")
|
||||
|
||||
|
||||
@@ -275,21 +269,32 @@ class BaseRunManager(RunManagerMixin):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
run_id: UUID,
|
||||
handlers: List[BaseCallbackHandler],
|
||||
inheritable_handlers: List[BaseCallbackHandler],
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
tags: List[str],
|
||||
inheritable_tags: List[str],
|
||||
) -> None:
|
||||
"""Initialize run manager."""
|
||||
self.run_id = run_id
|
||||
self.handlers = handlers
|
||||
self.inheritable_handlers = inheritable_handlers
|
||||
self.tags = tags
|
||||
self.inheritable_tags = inheritable_tags
|
||||
self.parent_run_id = parent_run_id
|
||||
|
||||
@classmethod
|
||||
def get_noop_manager(cls: Type[BRM]) -> BRM:
|
||||
"""Return a manager that doesn't perform any operations."""
|
||||
return cls(uuid4(), [], [])
|
||||
return cls(
|
||||
run_id=uuid4(),
|
||||
handlers=[],
|
||||
inheritable_handlers=[],
|
||||
tags=[],
|
||||
inheritable_tags=[],
|
||||
)
|
||||
|
||||
|
||||
class RunManager(BaseRunManager):
|
||||
@@ -431,10 +436,13 @@ class AsyncCallbackManagerForLLMRun(AsyncRunManager, LLMManagerMixin):
|
||||
class CallbackManagerForChainRun(RunManager, ChainManagerMixin):
|
||||
"""Callback manager for chain run."""
|
||||
|
||||
def get_child(self) -> CallbackManager:
|
||||
def get_child(self, tag: Optional[str] = None) -> CallbackManager:
|
||||
"""Get a child callback manager."""
|
||||
manager = CallbackManager([], parent_run_id=self.run_id)
|
||||
manager = CallbackManager(handlers=[], parent_run_id=self.run_id)
|
||||
manager.set_handlers(self.inheritable_handlers)
|
||||
manager.add_tags(self.inheritable_tags)
|
||||
if tag is not None:
|
||||
manager.add_tags([tag], False)
|
||||
return manager
|
||||
|
||||
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
||||
@@ -493,10 +501,13 @@ class CallbackManagerForChainRun(RunManager, ChainManagerMixin):
|
||||
class AsyncCallbackManagerForChainRun(AsyncRunManager, ChainManagerMixin):
|
||||
"""Async callback manager for chain run."""
|
||||
|
||||
def get_child(self) -> AsyncCallbackManager:
|
||||
def get_child(self, tag: Optional[str] = None) -> AsyncCallbackManager:
|
||||
"""Get a child callback manager."""
|
||||
manager = AsyncCallbackManager([], parent_run_id=self.run_id)
|
||||
manager = AsyncCallbackManager(handlers=[], parent_run_id=self.run_id)
|
||||
manager.set_handlers(self.inheritable_handlers)
|
||||
manager.add_tags(self.inheritable_tags)
|
||||
if tag is not None:
|
||||
manager.add_tags([tag], False)
|
||||
return manager
|
||||
|
||||
async def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
||||
@@ -555,10 +566,13 @@ class AsyncCallbackManagerForChainRun(AsyncRunManager, ChainManagerMixin):
|
||||
class CallbackManagerForToolRun(RunManager, ToolManagerMixin):
|
||||
"""Callback manager for tool run."""
|
||||
|
||||
def get_child(self) -> CallbackManager:
|
||||
def get_child(self, tag: Optional[str] = None) -> CallbackManager:
|
||||
"""Get a child callback manager."""
|
||||
manager = CallbackManager([], parent_run_id=self.run_id)
|
||||
manager = CallbackManager(handlers=[], parent_run_id=self.run_id)
|
||||
manager.set_handlers(self.inheritable_handlers)
|
||||
manager.add_tags(self.inheritable_tags)
|
||||
if tag is not None:
|
||||
manager.add_tags([tag], False)
|
||||
return manager
|
||||
|
||||
def on_tool_end(
|
||||
@@ -597,10 +611,13 @@ class CallbackManagerForToolRun(RunManager, ToolManagerMixin):
|
||||
class AsyncCallbackManagerForToolRun(AsyncRunManager, ToolManagerMixin):
|
||||
"""Async callback manager for tool run."""
|
||||
|
||||
def get_child(self) -> AsyncCallbackManager:
|
||||
def get_child(self, tag: Optional[str] = None) -> AsyncCallbackManager:
|
||||
"""Get a child callback manager."""
|
||||
manager = AsyncCallbackManager([], parent_run_id=self.run_id)
|
||||
manager = AsyncCallbackManager(handlers=[], parent_run_id=self.run_id)
|
||||
manager.set_handlers(self.inheritable_handlers)
|
||||
manager.add_tags(self.inheritable_tags)
|
||||
if tag is not None:
|
||||
manager.add_tags([tag], False)
|
||||
return manager
|
||||
|
||||
async def on_tool_end(self, output: str, **kwargs: Any) -> None:
|
||||
@@ -654,11 +671,17 @@ class CallbackManager(BaseCallbackManager):
|
||||
prompts,
|
||||
run_id=run_id,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
return CallbackManagerForLLMRun(
|
||||
run_id, self.handlers, self.inheritable_handlers, self.parent_run_id
|
||||
run_id=run_id,
|
||||
handlers=self.handlers,
|
||||
inheritable_handlers=self.inheritable_handlers,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
inheritable_tags=self.inheritable_tags,
|
||||
)
|
||||
|
||||
def on_chat_model_start(
|
||||
@@ -679,13 +702,19 @@ class CallbackManager(BaseCallbackManager):
|
||||
messages,
|
||||
run_id=run_id,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Re-use the LLM Run Manager since the outputs are treated
|
||||
# the same for now
|
||||
return CallbackManagerForLLMRun(
|
||||
run_id, self.handlers, self.inheritable_handlers, self.parent_run_id
|
||||
run_id=run_id,
|
||||
handlers=self.handlers,
|
||||
inheritable_handlers=self.inheritable_handlers,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
inheritable_tags=self.inheritable_tags,
|
||||
)
|
||||
|
||||
def on_chain_start(
|
||||
@@ -707,11 +736,17 @@ class CallbackManager(BaseCallbackManager):
|
||||
inputs,
|
||||
run_id=run_id,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
return CallbackManagerForChainRun(
|
||||
run_id, self.handlers, self.inheritable_handlers, self.parent_run_id
|
||||
run_id=run_id,
|
||||
handlers=self.handlers,
|
||||
inheritable_handlers=self.inheritable_handlers,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
inheritable_tags=self.inheritable_tags,
|
||||
)
|
||||
|
||||
def on_tool_start(
|
||||
@@ -734,11 +769,17 @@ class CallbackManager(BaseCallbackManager):
|
||||
input_str,
|
||||
run_id=run_id,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
return CallbackManagerForToolRun(
|
||||
run_id, self.handlers, self.inheritable_handlers, self.parent_run_id
|
||||
run_id=run_id,
|
||||
handlers=self.handlers,
|
||||
inheritable_handlers=self.inheritable_handlers,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
inheritable_tags=self.inheritable_tags,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@@ -747,9 +788,18 @@ class CallbackManager(BaseCallbackManager):
|
||||
inheritable_callbacks: Callbacks = None,
|
||||
local_callbacks: Callbacks = None,
|
||||
verbose: bool = False,
|
||||
inheritable_tags: Optional[List[str]] = None,
|
||||
local_tags: Optional[List[str]] = None,
|
||||
) -> CallbackManager:
|
||||
"""Configure the callback manager."""
|
||||
return _configure(cls, inheritable_callbacks, local_callbacks, verbose)
|
||||
return _configure(
|
||||
cls,
|
||||
inheritable_callbacks,
|
||||
local_callbacks,
|
||||
verbose,
|
||||
inheritable_tags,
|
||||
local_tags,
|
||||
)
|
||||
|
||||
|
||||
class AsyncCallbackManager(BaseCallbackManager):
|
||||
@@ -779,11 +829,17 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
prompts,
|
||||
run_id=run_id,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
return AsyncCallbackManagerForLLMRun(
|
||||
run_id, self.handlers, self.inheritable_handlers, self.parent_run_id
|
||||
run_id=run_id,
|
||||
handlers=self.handlers,
|
||||
inheritable_handlers=self.inheritable_handlers,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
inheritable_tags=self.inheritable_tags,
|
||||
)
|
||||
|
||||
async def on_chat_model_start(
|
||||
@@ -804,11 +860,17 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
messages,
|
||||
run_id=run_id,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
return AsyncCallbackManagerForLLMRun(
|
||||
run_id, self.handlers, self.inheritable_handlers, self.parent_run_id
|
||||
run_id=run_id,
|
||||
handlers=self.handlers,
|
||||
inheritable_handlers=self.inheritable_handlers,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
inheritable_tags=self.inheritable_tags,
|
||||
)
|
||||
|
||||
async def on_chain_start(
|
||||
@@ -830,11 +892,17 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
inputs,
|
||||
run_id=run_id,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
return AsyncCallbackManagerForChainRun(
|
||||
run_id, self.handlers, self.inheritable_handlers, self.parent_run_id
|
||||
run_id=run_id,
|
||||
handlers=self.handlers,
|
||||
inheritable_handlers=self.inheritable_handlers,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
inheritable_tags=self.inheritable_tags,
|
||||
)
|
||||
|
||||
async def on_tool_start(
|
||||
@@ -857,11 +925,17 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
input_str,
|
||||
run_id=run_id,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
return AsyncCallbackManagerForToolRun(
|
||||
run_id, self.handlers, self.inheritable_handlers, self.parent_run_id
|
||||
run_id=run_id,
|
||||
handlers=self.handlers,
|
||||
inheritable_handlers=self.inheritable_handlers,
|
||||
parent_run_id=self.parent_run_id,
|
||||
tags=self.tags,
|
||||
inheritable_tags=self.inheritable_tags,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@@ -870,22 +944,43 @@ class AsyncCallbackManager(BaseCallbackManager):
|
||||
inheritable_callbacks: Callbacks = None,
|
||||
local_callbacks: Callbacks = None,
|
||||
verbose: bool = False,
|
||||
inheritable_tags: Optional[List[str]] = None,
|
||||
local_tags: Optional[List[str]] = None,
|
||||
) -> AsyncCallbackManager:
|
||||
"""Configure the callback manager."""
|
||||
return _configure(cls, inheritable_callbacks, local_callbacks, verbose)
|
||||
return _configure(
|
||||
cls,
|
||||
inheritable_callbacks,
|
||||
local_callbacks,
|
||||
verbose,
|
||||
inheritable_tags,
|
||||
local_tags,
|
||||
)
|
||||
|
||||
|
||||
T = TypeVar("T", CallbackManager, AsyncCallbackManager)
|
||||
|
||||
|
||||
def env_var_is_set(env_var: str) -> bool:
|
||||
"""Check if an environment variable is set."""
|
||||
return env_var in os.environ and os.environ[env_var] not in (
|
||||
"",
|
||||
"0",
|
||||
"false",
|
||||
"False",
|
||||
)
|
||||
|
||||
|
||||
def _configure(
|
||||
callback_manager_cls: Type[T],
|
||||
inheritable_callbacks: Callbacks = None,
|
||||
local_callbacks: Callbacks = None,
|
||||
verbose: bool = False,
|
||||
inheritable_tags: Optional[List[str]] = None,
|
||||
local_tags: Optional[List[str]] = None,
|
||||
) -> T:
|
||||
"""Configure the callback manager."""
|
||||
callback_manager = callback_manager_cls([])
|
||||
callback_manager = callback_manager_cls(handlers=[])
|
||||
if inheritable_callbacks or local_callbacks:
|
||||
if isinstance(inheritable_callbacks, list) or inheritable_callbacks is None:
|
||||
inheritable_callbacks_ = inheritable_callbacks or []
|
||||
@@ -898,6 +993,8 @@ def _configure(
|
||||
handlers=inheritable_callbacks.handlers,
|
||||
inheritable_handlers=inheritable_callbacks.inheritable_handlers,
|
||||
parent_run_id=inheritable_callbacks.parent_run_id,
|
||||
tags=inheritable_callbacks.tags,
|
||||
inheritable_tags=inheritable_callbacks.inheritable_tags,
|
||||
)
|
||||
local_handlers_ = (
|
||||
local_callbacks
|
||||
@@ -906,23 +1003,25 @@ def _configure(
|
||||
)
|
||||
for handler in local_handlers_:
|
||||
callback_manager.add_handler(handler, False)
|
||||
if inheritable_tags or local_tags:
|
||||
callback_manager.add_tags(inheritable_tags or [])
|
||||
callback_manager.add_tags(local_tags or [], False)
|
||||
|
||||
tracer = tracing_callback_var.get()
|
||||
wandb_tracer = wandb_tracing_callback_var.get()
|
||||
open_ai = openai_callback_var.get()
|
||||
tracing_enabled_ = (
|
||||
os.environ.get("LANGCHAIN_TRACING") is not None
|
||||
env_var_is_set("LANGCHAIN_TRACING")
|
||||
or tracer is not None
|
||||
or os.environ.get("LANGCHAIN_HANDLER") is not None
|
||||
or env_var_is_set("LANGCHAIN_HANDLER")
|
||||
)
|
||||
wandb_tracing_enabled_ = (
|
||||
os.environ.get("LANGCHAIN_WANDB_TRACING") is not None
|
||||
or wandb_tracer is not None
|
||||
env_var_is_set("LANGCHAIN_WANDB_TRACING") or wandb_tracer is not None
|
||||
)
|
||||
|
||||
tracer_v2 = tracing_v2_callback_var.get()
|
||||
tracing_v2_enabled_ = (
|
||||
os.environ.get("LANGCHAIN_TRACING_V2") is not None or tracer_v2 is not None
|
||||
env_var_is_set("LANGCHAIN_TRACING_V2") or tracer_v2 is not None
|
||||
)
|
||||
tracer_session = os.environ.get("LANGCHAIN_SESSION")
|
||||
debug = _get_debug()
|
||||
|
||||
@@ -37,7 +37,7 @@ class StdOutCallbackHandler(BaseCallbackHandler):
|
||||
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
|
||||
) -> None:
|
||||
"""Print out that we are entering a chain."""
|
||||
class_name = serialized["name"]
|
||||
class_name = serialized.get("name", "")
|
||||
print(f"\n\n\033[1m> Entering new {class_name} chain...\033[0m")
|
||||
|
||||
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
||||
|
||||
@@ -85,6 +85,7 @@ class BaseTracer(BaseCallbackHandler, ABC):
|
||||
prompts: List[str],
|
||||
*,
|
||||
run_id: UUID,
|
||||
tags: Optional[List[str]] = None,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
@@ -93,7 +94,6 @@ class BaseTracer(BaseCallbackHandler, ABC):
|
||||
execution_order = self._get_execution_order(parent_run_id_)
|
||||
llm_run = Run(
|
||||
id=run_id,
|
||||
name=serialized.get("name"),
|
||||
parent_run_id=parent_run_id,
|
||||
serialized=serialized,
|
||||
inputs={"prompts": prompts},
|
||||
@@ -102,6 +102,7 @@ class BaseTracer(BaseCallbackHandler, ABC):
|
||||
execution_order=execution_order,
|
||||
child_execution_order=execution_order,
|
||||
run_type=RunTypeEnum.llm,
|
||||
tags=tags or [],
|
||||
)
|
||||
self._start_trace(llm_run)
|
||||
self._on_llm_start(llm_run)
|
||||
@@ -146,6 +147,7 @@ class BaseTracer(BaseCallbackHandler, ABC):
|
||||
inputs: Dict[str, Any],
|
||||
*,
|
||||
run_id: UUID,
|
||||
tags: Optional[List[str]] = None,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
@@ -154,7 +156,6 @@ class BaseTracer(BaseCallbackHandler, ABC):
|
||||
execution_order = self._get_execution_order(parent_run_id_)
|
||||
chain_run = Run(
|
||||
id=run_id,
|
||||
name=serialized.get("name"),
|
||||
parent_run_id=parent_run_id,
|
||||
serialized=serialized,
|
||||
inputs=inputs,
|
||||
@@ -164,6 +165,7 @@ class BaseTracer(BaseCallbackHandler, ABC):
|
||||
child_execution_order=execution_order,
|
||||
child_runs=[],
|
||||
run_type=RunTypeEnum.chain,
|
||||
tags=tags or [],
|
||||
)
|
||||
self._start_trace(chain_run)
|
||||
self._on_chain_start(chain_run)
|
||||
@@ -208,6 +210,7 @@ class BaseTracer(BaseCallbackHandler, ABC):
|
||||
input_str: str,
|
||||
*,
|
||||
run_id: UUID,
|
||||
tags: Optional[List[str]] = None,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
@@ -216,7 +219,6 @@ class BaseTracer(BaseCallbackHandler, ABC):
|
||||
execution_order = self._get_execution_order(parent_run_id_)
|
||||
tool_run = Run(
|
||||
id=run_id,
|
||||
name=serialized.get("name"),
|
||||
parent_run_id=parent_run_id,
|
||||
serialized=serialized,
|
||||
inputs={"input": input_str},
|
||||
@@ -226,6 +228,7 @@ class BaseTracer(BaseCallbackHandler, ABC):
|
||||
child_execution_order=execution_order,
|
||||
child_runs=[],
|
||||
run_type=RunTypeEnum.tool,
|
||||
tags=tags or [],
|
||||
)
|
||||
self._start_trace(tool_run)
|
||||
self._on_tool_start(tool_run)
|
||||
|
||||
@@ -8,59 +8,28 @@ from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
from uuid import UUID
|
||||
|
||||
import requests
|
||||
from requests.exceptions import HTTPError
|
||||
from tenacity import (
|
||||
before_sleep_log,
|
||||
retry,
|
||||
retry_if_exception_type,
|
||||
stop_after_attempt,
|
||||
wait_exponential,
|
||||
)
|
||||
from langchainplus_sdk import LangChainPlusClient
|
||||
|
||||
from langchain.callbacks.tracers.base import BaseTracer
|
||||
from langchain.callbacks.tracers.schemas import (
|
||||
Run,
|
||||
RunCreate,
|
||||
RunTypeEnum,
|
||||
RunUpdate,
|
||||
TracerSession,
|
||||
)
|
||||
from langchain.env import get_runtime_environment
|
||||
from langchain.schema import BaseMessage, messages_to_dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
_LOGGED = set()
|
||||
|
||||
|
||||
def get_headers() -> Dict[str, Any]:
|
||||
"""Get the headers for the LangChain API."""
|
||||
headers: Dict[str, Any] = {"Content-Type": "application/json"}
|
||||
if os.getenv("LANGCHAIN_API_KEY"):
|
||||
headers["x-api-key"] = os.getenv("LANGCHAIN_API_KEY")
|
||||
return headers
|
||||
|
||||
|
||||
def get_endpoint() -> str:
|
||||
return os.getenv("LANGCHAIN_ENDPOINT", "http://localhost:1984")
|
||||
|
||||
|
||||
class LangChainTracerAPIError(Exception):
|
||||
"""An error occurred while communicating with the LangChain API."""
|
||||
|
||||
|
||||
class LangChainTracerUserError(Exception):
|
||||
"""An error occurred while communicating with the LangChain API."""
|
||||
|
||||
|
||||
class LangChainTracerError(Exception):
|
||||
"""An error occurred while communicating with the LangChain API."""
|
||||
|
||||
|
||||
retry_decorator = retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||
retry=retry_if_exception_type(LangChainTracerAPIError),
|
||||
before_sleep=before_sleep_log(logger, logging.WARNING),
|
||||
)
|
||||
def log_error_once(method: str, exception: Exception) -> None:
|
||||
"""Log an error once."""
|
||||
global _LOGGED
|
||||
if (method, type(exception)) in _LOGGED:
|
||||
return
|
||||
_LOGGED.add((method, type(exception)))
|
||||
logger.error(exception)
|
||||
|
||||
|
||||
class LangChainTracer(BaseTracer):
|
||||
@@ -70,19 +39,19 @@ class LangChainTracer(BaseTracer):
|
||||
self,
|
||||
example_id: Optional[Union[UUID, str]] = None,
|
||||
session_name: Optional[str] = None,
|
||||
client: Optional[LangChainPlusClient] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Initialize the LangChain tracer."""
|
||||
super().__init__(**kwargs)
|
||||
self.session: Optional[TracerSession] = None
|
||||
self._endpoint = get_endpoint()
|
||||
self._headers = get_headers()
|
||||
self.example_id = (
|
||||
UUID(example_id) if isinstance(example_id, str) else example_id
|
||||
)
|
||||
self.session_name = session_name or os.getenv("LANGCHAIN_SESSION", "default")
|
||||
# set max_workers to 1 to process tasks in order
|
||||
self.executor = ThreadPoolExecutor(max_workers=1)
|
||||
self.client = client or LangChainPlusClient()
|
||||
|
||||
def on_chat_model_start(
|
||||
self,
|
||||
@@ -90,6 +59,7 @@ class LangChainTracer(BaseTracer):
|
||||
messages: List[List[BaseMessage]],
|
||||
*,
|
||||
run_id: UUID,
|
||||
tags: Optional[List[str]] = None,
|
||||
parent_run_id: Optional[UUID] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
@@ -98,7 +68,6 @@ class LangChainTracer(BaseTracer):
|
||||
execution_order = self._get_execution_order(parent_run_id_)
|
||||
chat_model_run = Run(
|
||||
id=run_id,
|
||||
name=serialized.get("name"),
|
||||
parent_run_id=parent_run_id,
|
||||
serialized=serialized,
|
||||
inputs={"messages": [messages_to_dict(batch) for batch in messages]},
|
||||
@@ -107,6 +76,7 @@ class LangChainTracer(BaseTracer):
|
||||
execution_order=execution_order,
|
||||
child_execution_order=execution_order,
|
||||
run_type=RunTypeEnum.llm,
|
||||
tags=tags,
|
||||
)
|
||||
self._start_trace(chat_model_run)
|
||||
self._on_chat_model_start(chat_model_run)
|
||||
@@ -114,60 +84,29 @@ class LangChainTracer(BaseTracer):
|
||||
def _persist_run(self, run: Run) -> None:
|
||||
"""The Langchain Tracer uses Post/Patch rather than persist."""
|
||||
|
||||
@retry_decorator
|
||||
def _persist_run_single(self, run: Run) -> None:
|
||||
"""Persist a run."""
|
||||
if run.parent_run_id is None:
|
||||
run.reference_example_id = self.example_id
|
||||
run_dict = run.dict()
|
||||
del run_dict["child_runs"]
|
||||
run_create = RunCreate(**run_dict, session_name=self.session_name)
|
||||
response = None
|
||||
run_dict = run.dict(exclude={"child_runs"})
|
||||
extra = run_dict.get("extra", {})
|
||||
extra["runtime"] = get_runtime_environment()
|
||||
run_dict["extra"] = extra
|
||||
try:
|
||||
# TODO: Add retries when async
|
||||
response = requests.post(
|
||||
f"{self._endpoint}/runs",
|
||||
data=run_create.json(),
|
||||
headers=self._headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
except HTTPError as e:
|
||||
if response is not None and response.status_code == 500:
|
||||
raise LangChainTracerAPIError(
|
||||
f"Failed to upsert persist run to LangChain API. {e}"
|
||||
)
|
||||
else:
|
||||
raise LangChainTracerUserError(
|
||||
f"Failed to persist run to LangChain API. {e}"
|
||||
)
|
||||
run = self.client.create_run(**run_dict, session_name=self.session_name)
|
||||
except Exception as e:
|
||||
raise LangChainTracerError(
|
||||
f"Failed to persist run to LangChain API. {e}"
|
||||
) from e
|
||||
# Errors are swallowed by the thread executor so we need to log them here
|
||||
log_error_once("post", e)
|
||||
raise
|
||||
|
||||
@retry_decorator
|
||||
def _update_run_single(self, run: Run) -> None:
|
||||
"""Update a run."""
|
||||
run_update = RunUpdate(**run.dict())
|
||||
response = None
|
||||
try:
|
||||
response = requests.patch(
|
||||
f"{self._endpoint}/runs/{run.id}",
|
||||
data=run_update.json(),
|
||||
headers=self._headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
except HTTPError as e:
|
||||
if response is not None and response.status_code == 500:
|
||||
raise LangChainTracerAPIError(
|
||||
f"Failed to update run to LangChain API. {e}"
|
||||
)
|
||||
else:
|
||||
raise LangChainTracerUserError(f"Failed to run to LangChain API. {e}")
|
||||
self.client.update_run(run.id, **run.dict())
|
||||
except Exception as e:
|
||||
raise LangChainTracerError(
|
||||
f"Failed to update run to LangChain API. {e}"
|
||||
) from e
|
||||
# Errors are swallowed by the thread executor so we need to log them here
|
||||
log_error_once("patch", e)
|
||||
raise
|
||||
|
||||
def _on_llm_start(self, run: Run) -> None:
|
||||
"""Persist an LLM run."""
|
||||
|
||||
@@ -2,12 +2,11 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Optional, Union
|
||||
from typing import Any, Dict, Optional, Union
|
||||
|
||||
import requests
|
||||
|
||||
from langchain.callbacks.tracers.base import BaseTracer
|
||||
from langchain.callbacks.tracers.langchain import get_headers
|
||||
from langchain.callbacks.tracers.schemas import (
|
||||
ChainRun,
|
||||
LLMRun,
|
||||
@@ -21,6 +20,14 @@ from langchain.schema import get_buffer_string
|
||||
from langchain.utils import raise_for_status_with_text
|
||||
|
||||
|
||||
def get_headers() -> Dict[str, Any]:
|
||||
"""Get the headers for the LangChain API."""
|
||||
headers: Dict[str, Any] = {"Content-Type": "application/json"}
|
||||
if os.getenv("LANGCHAIN_API_KEY"):
|
||||
headers["x-api-key"] = os.getenv("LANGCHAIN_API_KEY")
|
||||
return headers
|
||||
|
||||
|
||||
def _get_endpoint() -> str:
|
||||
return os.getenv("LANGCHAIN_ENDPOINT", "http://localhost:8000")
|
||||
|
||||
|
||||
20
langchain/callbacks/tracers/run_collector.py
Normal file
20
langchain/callbacks/tracers/run_collector.py
Normal file
@@ -0,0 +1,20 @@
|
||||
"""A tracer that collects all nested runs in a list."""
|
||||
from typing import Any, List
|
||||
|
||||
from langchain.callbacks.tracers.base import BaseTracer
|
||||
from langchain.callbacks.tracers.schemas import Run
|
||||
|
||||
|
||||
class RunCollectorCallbackHandler(BaseTracer):
|
||||
"""A tracer that collects all nested runs in a list.
|
||||
|
||||
Useful for inspection and for evaluation."""
|
||||
|
||||
name = "run-collector_callback_handler"
|
||||
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
super().__init__(**kwargs)
|
||||
self.traced_runs: List[Run] = []
|
||||
|
||||
def _persist_run(self, run: Run) -> None:
|
||||
self.traced_runs.append(run)
|
||||
@@ -2,13 +2,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from langchainplus_sdk.schemas import RunBase as BaseRunV2
|
||||
from langchainplus_sdk.schemas import RunTypeEnum
|
||||
from pydantic import BaseModel, Field, root_validator
|
||||
|
||||
from langchain.env import get_runtime_environment
|
||||
from langchain.schema import LLMResult
|
||||
|
||||
|
||||
@@ -88,66 +88,38 @@ class ToolRun(BaseRun):
|
||||
# Begin V2 API Schemas
|
||||
|
||||
|
||||
class RunTypeEnum(str, Enum):
|
||||
"""Enum for run types."""
|
||||
class Run(BaseRunV2):
|
||||
"""Run schema for the V2 API in the Tracer."""
|
||||
|
||||
tool = "tool"
|
||||
chain = "chain"
|
||||
llm = "llm"
|
||||
|
||||
|
||||
class RunBase(BaseModel):
|
||||
"""Base Run schema."""
|
||||
|
||||
id: Optional[UUID]
|
||||
start_time: datetime.datetime = Field(default_factory=datetime.datetime.utcnow)
|
||||
end_time: datetime.datetime = Field(default_factory=datetime.datetime.utcnow)
|
||||
extra: Optional[Dict[str, Any]] = None
|
||||
error: Optional[str]
|
||||
execution_order: int
|
||||
child_execution_order: Optional[int]
|
||||
serialized: dict
|
||||
inputs: dict
|
||||
outputs: Optional[dict]
|
||||
reference_example_id: Optional[UUID]
|
||||
run_type: RunTypeEnum
|
||||
parent_run_id: Optional[UUID]
|
||||
|
||||
|
||||
class Run(RunBase):
|
||||
"""Run schema when loading from the DB."""
|
||||
|
||||
name: str
|
||||
child_execution_order: int
|
||||
child_runs: List[Run] = Field(default_factory=list)
|
||||
tags: Optional[List[str]] = Field(default_factory=list)
|
||||
|
||||
@root_validator(pre=True)
|
||||
def assign_name(cls, values: dict) -> dict:
|
||||
"""Assign name to the run."""
|
||||
if "name" not in values:
|
||||
values["name"] = values["serialized"]["name"]
|
||||
if values.get("name") is None:
|
||||
if "name" in values["serialized"]:
|
||||
values["name"] = values["serialized"]["name"]
|
||||
elif "id" in values["serialized"]:
|
||||
values["name"] = values["serialized"]["id"][-1]
|
||||
return values
|
||||
|
||||
|
||||
class RunCreate(RunBase):
|
||||
name: str
|
||||
session_name: Optional[str] = None
|
||||
|
||||
@root_validator(pre=True)
|
||||
def add_runtime_env(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Add env info to the run."""
|
||||
extra = values.get("extra", {})
|
||||
extra["runtime"] = get_runtime_environment()
|
||||
values["extra"] = extra
|
||||
return values
|
||||
|
||||
|
||||
class RunUpdate(BaseModel):
|
||||
end_time: Optional[datetime.datetime]
|
||||
error: Optional[str]
|
||||
outputs: Optional[dict]
|
||||
parent_run_id: Optional[UUID]
|
||||
reference_example_id: Optional[UUID]
|
||||
|
||||
|
||||
ChainRun.update_forward_refs()
|
||||
ToolRun.update_forward_refs()
|
||||
|
||||
__all__ = [
|
||||
"BaseRun",
|
||||
"ChainRun",
|
||||
"LLMRun",
|
||||
"Run",
|
||||
"RunTypeEnum",
|
||||
"ToolRun",
|
||||
"TracerSession",
|
||||
"TracerSessionBase",
|
||||
"TracerSessionV1",
|
||||
"TracerSessionV1Base",
|
||||
"TracerSessionV1Create",
|
||||
]
|
||||
|
||||
@@ -8,7 +8,7 @@ from langchain.input import get_bolded_text, get_colored_text
|
||||
|
||||
def try_json_stringify(obj: Any, fallback: str) -> str:
|
||||
try:
|
||||
return json.dumps(obj, indent=2)
|
||||
return json.dumps(obj, indent=2, ensure_ascii=False)
|
||||
except Exception:
|
||||
return fallback
|
||||
|
||||
|
||||
@@ -60,10 +60,20 @@ def _convert_llm_run_to_wb_span(trace_tree: Any, run: Run) -> trace_tree.Span:
|
||||
return base_span
|
||||
|
||||
|
||||
def _serialize_inputs(run_inputs: dict) -> Union[dict, list]:
|
||||
if "input_documents" in run_inputs:
|
||||
docs = run_inputs["input_documents"]
|
||||
return [doc.json() for doc in docs]
|
||||
else:
|
||||
return run_inputs
|
||||
|
||||
|
||||
def _convert_chain_run_to_wb_span(trace_tree: Any, run: Run) -> trace_tree.Span:
|
||||
base_span = _convert_run_to_wb_span(trace_tree, run)
|
||||
|
||||
base_span.results = [trace_tree.Result(inputs=run.inputs, outputs=run.outputs)]
|
||||
base_span.results = [
|
||||
trace_tree.Result(inputs=_serialize_inputs(run.inputs), outputs=run.outputs)
|
||||
]
|
||||
base_span.child_spans = [
|
||||
_convert_lc_run_to_wb_span(trace_tree, child_run)
|
||||
for child_run in run.child_runs
|
||||
@@ -79,7 +89,9 @@ def _convert_chain_run_to_wb_span(trace_tree: Any, run: Run) -> trace_tree.Span:
|
||||
|
||||
def _convert_tool_run_to_wb_span(trace_tree: Any, run: Run) -> trace_tree.Span:
|
||||
base_span = _convert_run_to_wb_span(trace_tree, run)
|
||||
base_span.results = [trace_tree.Result(inputs=run.inputs, outputs=run.outputs)]
|
||||
base_span.results = [
|
||||
trace_tree.Result(inputs=_serialize_inputs(run.inputs), outputs=run.outputs)
|
||||
]
|
||||
base_span.child_spans = [
|
||||
_convert_lc_run_to_wb_span(trace_tree, child_run)
|
||||
for child_run in run.child_runs
|
||||
|
||||
@@ -11,6 +11,7 @@ from langchain.chains.conversational_retrieval.base import (
|
||||
from langchain.chains.flare.base import FlareChain
|
||||
from langchain.chains.graph_qa.base import GraphQAChain
|
||||
from langchain.chains.graph_qa.cypher import GraphCypherQAChain
|
||||
from langchain.chains.graph_qa.nebulagraph import NebulaGraphQAChain
|
||||
from langchain.chains.hyde.base import HypotheticalDocumentEmbedder
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.chains.llm_bash.base import LLMBashChain
|
||||
@@ -67,4 +68,5 @@ __all__ = [
|
||||
"ConversationalRetrievalChain",
|
||||
"OpenAPIEndpointChain",
|
||||
"FlareChain",
|
||||
"NebulaGraphQAChain",
|
||||
]
|
||||
|
||||
@@ -78,6 +78,7 @@ class APIChain(Chain):
|
||||
callbacks=_run_manager.get_child(),
|
||||
)
|
||||
_run_manager.on_text(api_url, color="green", end="\n", verbose=self.verbose)
|
||||
api_url = api_url.strip()
|
||||
api_response = self.requests_wrapper.get(api_url)
|
||||
_run_manager.on_text(
|
||||
api_response, color="yellow", end="\n", verbose=self.verbose
|
||||
@@ -106,6 +107,7 @@ class APIChain(Chain):
|
||||
await _run_manager.on_text(
|
||||
api_url, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
api_url = api_url.strip()
|
||||
api_response = await self.requests_wrapper.aget(api_url)
|
||||
await _run_manager.on_text(
|
||||
api_response, color="yellow", end="\n", verbose=self.verbose
|
||||
|
||||
@@ -7,7 +7,7 @@ from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import yaml
|
||||
from pydantic import BaseModel, Field, root_validator, validator
|
||||
from pydantic import Field, root_validator, validator
|
||||
|
||||
import langchain
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
@@ -18,14 +18,16 @@ from langchain.callbacks.manager import (
|
||||
CallbackManagerForChainRun,
|
||||
Callbacks,
|
||||
)
|
||||
from langchain.schema import BaseMemory
|
||||
from langchain.load.dump import dumpd
|
||||
from langchain.load.serializable import Serializable
|
||||
from langchain.schema import RUN_KEY, BaseMemory, RunInfo
|
||||
|
||||
|
||||
def _get_verbosity() -> bool:
|
||||
return langchain.verbose
|
||||
|
||||
|
||||
class Chain(BaseModel, ABC):
|
||||
class Chain(Serializable, ABC):
|
||||
"""Base interface that all chains should implement."""
|
||||
|
||||
memory: Optional[BaseMemory] = None
|
||||
@@ -34,6 +36,7 @@ class Chain(BaseModel, ABC):
|
||||
verbose: bool = Field(
|
||||
default_factory=_get_verbosity
|
||||
) # Whether to print the response text
|
||||
tags: Optional[List[str]] = None
|
||||
|
||||
class Config:
|
||||
"""Configuration for this pydantic object."""
|
||||
@@ -108,6 +111,9 @@ class Chain(BaseModel, ABC):
|
||||
inputs: Union[Dict[str, Any], Any],
|
||||
return_only_outputs: bool = False,
|
||||
callbacks: Callbacks = None,
|
||||
*,
|
||||
tags: Optional[List[str]] = None,
|
||||
include_run_info: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Run the logic of this chain and add to output if desired.
|
||||
|
||||
@@ -118,15 +124,18 @@ class Chain(BaseModel, ABC):
|
||||
response. If True, only new keys generated by this chain will be
|
||||
returned. If False, both input keys and new keys generated by this
|
||||
chain will be returned. Defaults to False.
|
||||
|
||||
callbacks: Callbacks to use for this chain run. If not provided, will
|
||||
use the callbacks provided to the chain.
|
||||
include_run_info: Whether to include run info in the response. Defaults
|
||||
to False.
|
||||
"""
|
||||
inputs = self.prep_inputs(inputs)
|
||||
callback_manager = CallbackManager.configure(
|
||||
callbacks, self.callbacks, self.verbose
|
||||
callbacks, self.callbacks, self.verbose, tags, self.tags
|
||||
)
|
||||
new_arg_supported = inspect.signature(self._call).parameters.get("run_manager")
|
||||
run_manager = callback_manager.on_chain_start(
|
||||
{"name": self.__class__.__name__},
|
||||
dumpd(self),
|
||||
inputs,
|
||||
)
|
||||
try:
|
||||
@@ -139,13 +148,21 @@ class Chain(BaseModel, ABC):
|
||||
run_manager.on_chain_error(e)
|
||||
raise e
|
||||
run_manager.on_chain_end(outputs)
|
||||
return self.prep_outputs(inputs, outputs, return_only_outputs)
|
||||
final_outputs: Dict[str, Any] = self.prep_outputs(
|
||||
inputs, outputs, return_only_outputs
|
||||
)
|
||||
if include_run_info:
|
||||
final_outputs[RUN_KEY] = RunInfo(run_id=run_manager.run_id)
|
||||
return final_outputs
|
||||
|
||||
async def acall(
|
||||
self,
|
||||
inputs: Union[Dict[str, Any], Any],
|
||||
return_only_outputs: bool = False,
|
||||
callbacks: Callbacks = None,
|
||||
*,
|
||||
tags: Optional[List[str]] = None,
|
||||
include_run_info: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Run the logic of this chain and add to output if desired.
|
||||
|
||||
@@ -156,15 +173,18 @@ class Chain(BaseModel, ABC):
|
||||
response. If True, only new keys generated by this chain will be
|
||||
returned. If False, both input keys and new keys generated by this
|
||||
chain will be returned. Defaults to False.
|
||||
|
||||
callbacks: Callbacks to use for this chain run. If not provided, will
|
||||
use the callbacks provided to the chain.
|
||||
include_run_info: Whether to include run info in the response. Defaults
|
||||
to False.
|
||||
"""
|
||||
inputs = self.prep_inputs(inputs)
|
||||
callback_manager = AsyncCallbackManager.configure(
|
||||
callbacks, self.callbacks, self.verbose
|
||||
callbacks, self.callbacks, self.verbose, tags, self.tags
|
||||
)
|
||||
new_arg_supported = inspect.signature(self._acall).parameters.get("run_manager")
|
||||
run_manager = await callback_manager.on_chain_start(
|
||||
{"name": self.__class__.__name__},
|
||||
dumpd(self),
|
||||
inputs,
|
||||
)
|
||||
try:
|
||||
@@ -177,7 +197,12 @@ class Chain(BaseModel, ABC):
|
||||
await run_manager.on_chain_error(e)
|
||||
raise e
|
||||
await run_manager.on_chain_end(outputs)
|
||||
return self.prep_outputs(inputs, outputs, return_only_outputs)
|
||||
final_outputs: Dict[str, Any] = self.prep_outputs(
|
||||
inputs, outputs, return_only_outputs
|
||||
)
|
||||
if include_run_info:
|
||||
final_outputs[RUN_KEY] = RunInfo(run_id=run_manager.run_id)
|
||||
return final_outputs
|
||||
|
||||
def prep_outputs(
|
||||
self,
|
||||
@@ -222,7 +247,13 @@ class Chain(BaseModel, ABC):
|
||||
"""Call the chain on all inputs in the list."""
|
||||
return [self(inputs, callbacks=callbacks) for inputs in input_list]
|
||||
|
||||
def run(self, *args: Any, callbacks: Callbacks = None, **kwargs: Any) -> str:
|
||||
def run(
|
||||
self,
|
||||
*args: Any,
|
||||
callbacks: Callbacks = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
"""Run the chain as text in, text out or multiple variables, text out."""
|
||||
if len(self.output_keys) != 1:
|
||||
raise ValueError(
|
||||
@@ -233,10 +264,10 @@ class Chain(BaseModel, ABC):
|
||||
if args and not kwargs:
|
||||
if len(args) != 1:
|
||||
raise ValueError("`run` supports only one positional argument.")
|
||||
return self(args[0], callbacks=callbacks)[self.output_keys[0]]
|
||||
return self(args[0], callbacks=callbacks, tags=tags)[self.output_keys[0]]
|
||||
|
||||
if kwargs and not args:
|
||||
return self(kwargs, callbacks=callbacks)[self.output_keys[0]]
|
||||
return self(kwargs, callbacks=callbacks, tags=tags)[self.output_keys[0]]
|
||||
|
||||
if not kwargs and not args:
|
||||
raise ValueError(
|
||||
@@ -249,7 +280,13 @@ class Chain(BaseModel, ABC):
|
||||
f" but not both. Got args: {args} and kwargs: {kwargs}."
|
||||
)
|
||||
|
||||
async def arun(self, *args: Any, callbacks: Callbacks = None, **kwargs: Any) -> str:
|
||||
async def arun(
|
||||
self,
|
||||
*args: Any,
|
||||
callbacks: Callbacks = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
"""Run the chain as text in, text out or multiple variables, text out."""
|
||||
if len(self.output_keys) != 1:
|
||||
raise ValueError(
|
||||
@@ -260,10 +297,14 @@ class Chain(BaseModel, ABC):
|
||||
if args and not kwargs:
|
||||
if len(args) != 1:
|
||||
raise ValueError("`run` supports only one positional argument.")
|
||||
return (await self.acall(args[0], callbacks=callbacks))[self.output_keys[0]]
|
||||
return (await self.acall(args[0], callbacks=callbacks, tags=tags))[
|
||||
self.output_keys[0]
|
||||
]
|
||||
|
||||
if kwargs and not args:
|
||||
return (await self.acall(kwargs, callbacks=callbacks))[self.output_keys[0]]
|
||||
return (await self.acall(kwargs, callbacks=callbacks, tags=tags))[
|
||||
self.output_keys[0]
|
||||
]
|
||||
|
||||
raise ValueError(
|
||||
f"`run` supported with either positional arguments or keyword arguments"
|
||||
|
||||
@@ -98,7 +98,7 @@ class ConstitutionalChain(Chain):
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
response = self.chain.run(
|
||||
**inputs,
|
||||
callbacks=_run_manager.get_child(),
|
||||
callbacks=_run_manager.get_child("original"),
|
||||
)
|
||||
initial_response = response
|
||||
input_prompt = self.chain.prompt.format(**inputs)
|
||||
@@ -116,7 +116,7 @@ class ConstitutionalChain(Chain):
|
||||
input_prompt=input_prompt,
|
||||
output_from_model=response,
|
||||
critique_request=constitutional_principle.critique_request,
|
||||
callbacks=_run_manager.get_child(),
|
||||
callbacks=_run_manager.get_child("critique"),
|
||||
)
|
||||
critique = self._parse_critique(
|
||||
output_string=raw_critique,
|
||||
@@ -137,7 +137,7 @@ class ConstitutionalChain(Chain):
|
||||
critique_request=constitutional_principle.critique_request,
|
||||
critique=critique,
|
||||
revision_request=constitutional_principle.revision_request,
|
||||
callbacks=_run_manager.get_child(),
|
||||
callbacks=_run_manager.get_child("revision"),
|
||||
).strip()
|
||||
response = revision
|
||||
critiques_and_revisions.append((critique, revision))
|
||||
|
||||
@@ -12,6 +12,7 @@ from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForChainRun,
|
||||
CallbackManagerForChainRun,
|
||||
Callbacks,
|
||||
)
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
|
||||
@@ -204,6 +205,7 @@ class ConversationalRetrievalChain(BaseConversationalRetrievalChain):
|
||||
verbose: bool = False,
|
||||
condense_question_llm: Optional[BaseLanguageModel] = None,
|
||||
combine_docs_chain_kwargs: Optional[Dict] = None,
|
||||
callbacks: Callbacks = None,
|
||||
**kwargs: Any,
|
||||
) -> BaseConversationalRetrievalChain:
|
||||
"""Load chain from LLM."""
|
||||
@@ -212,17 +214,22 @@ class ConversationalRetrievalChain(BaseConversationalRetrievalChain):
|
||||
llm,
|
||||
chain_type=chain_type,
|
||||
verbose=verbose,
|
||||
callbacks=callbacks,
|
||||
**combine_docs_chain_kwargs,
|
||||
)
|
||||
|
||||
_llm = condense_question_llm or llm
|
||||
condense_question_chain = LLMChain(
|
||||
llm=_llm, prompt=condense_question_prompt, verbose=verbose
|
||||
llm=_llm,
|
||||
prompt=condense_question_prompt,
|
||||
verbose=verbose,
|
||||
callbacks=callbacks,
|
||||
)
|
||||
return cls(
|
||||
retriever=retriever,
|
||||
combine_docs_chain=doc_chain,
|
||||
question_generator=condense_question_chain,
|
||||
callbacks=callbacks,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@@ -264,6 +271,7 @@ class ChatVectorDBChain(BaseConversationalRetrievalChain):
|
||||
condense_question_prompt: BasePromptTemplate = CONDENSE_QUESTION_PROMPT,
|
||||
chain_type: str = "stuff",
|
||||
combine_docs_chain_kwargs: Optional[Dict] = None,
|
||||
callbacks: Callbacks = None,
|
||||
**kwargs: Any,
|
||||
) -> BaseConversationalRetrievalChain:
|
||||
"""Load chain from LLM."""
|
||||
@@ -271,12 +279,16 @@ class ChatVectorDBChain(BaseConversationalRetrievalChain):
|
||||
doc_chain = load_qa_chain(
|
||||
llm,
|
||||
chain_type=chain_type,
|
||||
callbacks=callbacks,
|
||||
**combine_docs_chain_kwargs,
|
||||
)
|
||||
condense_question_chain = LLMChain(llm=llm, prompt=condense_question_prompt)
|
||||
condense_question_chain = LLMChain(
|
||||
llm=llm, prompt=condense_question_prompt, callbacks=callbacks
|
||||
)
|
||||
return cls(
|
||||
vectorstore=vectorstore,
|
||||
combine_docs_chain=doc_chain,
|
||||
question_generator=condense_question_chain,
|
||||
callbacks=callbacks,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@@ -14,6 +14,8 @@ from langchain.chains.llm import LLMChain
|
||||
from langchain.graphs.neo4j_graph import Neo4jGraph
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
|
||||
INTERMEDIATE_STEPS_KEY = "intermediate_steps"
|
||||
|
||||
|
||||
def extract_cypher(text: str) -> str:
|
||||
# The pattern to find Cypher code enclosed in triple backticks
|
||||
@@ -33,6 +35,12 @@ class GraphCypherQAChain(Chain):
|
||||
qa_chain: LLMChain
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
top_k: int = 10
|
||||
"""Number of results to return from the query"""
|
||||
return_intermediate_steps: bool = False
|
||||
"""Whether or not to return the intermediate steps along with the final answer."""
|
||||
return_direct: bool = False
|
||||
"""Whether or not to return the result of querying the graph directly."""
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
@@ -74,12 +82,14 @@ class GraphCypherQAChain(Chain):
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, str]:
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate Cypher statement, use it to look up in db and answer question."""
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
callbacks = _run_manager.get_child()
|
||||
question = inputs[self.input_key]
|
||||
|
||||
intermediate_steps: List = []
|
||||
|
||||
generated_cypher = self.cypher_generation_chain.run(
|
||||
{"question": question, "schema": self.graph.get_schema}, callbacks=callbacks
|
||||
)
|
||||
@@ -91,14 +101,30 @@ class GraphCypherQAChain(Chain):
|
||||
_run_manager.on_text(
|
||||
generated_cypher, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
context = self.graph.query(generated_cypher)
|
||||
|
||||
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
str(context), color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
result = self.qa_chain(
|
||||
{"question": question, "context": context},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
return {self.output_key: result[self.qa_chain.output_key]}
|
||||
intermediate_steps.append({"query": generated_cypher})
|
||||
|
||||
# Retrieve and limit the number of results
|
||||
context = self.graph.query(generated_cypher)[: self.top_k]
|
||||
|
||||
if self.return_direct:
|
||||
final_result = context
|
||||
else:
|
||||
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
str(context), color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
intermediate_steps.append({"context": context})
|
||||
|
||||
result = self.qa_chain(
|
||||
{"question": question, "context": context},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
final_result = result[self.qa_chain.output_key]
|
||||
|
||||
chain_result: Dict[str, Any] = {self.output_key: final_result}
|
||||
if self.return_intermediate_steps:
|
||||
chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps
|
||||
|
||||
return chain_result
|
||||
|
||||
91
langchain/chains/graph_qa/nebulagraph.py
Normal file
91
langchain/chains/graph_qa/nebulagraph.py
Normal file
@@ -0,0 +1,91 @@
|
||||
"""Question answering over a graph."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.manager import CallbackManagerForChainRun
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.graph_qa.prompts import CYPHER_QA_PROMPT, NGQL_GENERATION_PROMPT
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.graphs.nebula_graph import NebulaGraph
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
|
||||
|
||||
class NebulaGraphQAChain(Chain):
|
||||
"""Chain for question-answering against a graph by generating nGQL statements."""
|
||||
|
||||
graph: NebulaGraph = Field(exclude=True)
|
||||
ngql_generation_chain: LLMChain
|
||||
qa_chain: LLMChain
|
||||
input_key: str = "query" #: :meta private:
|
||||
output_key: str = "result" #: :meta private:
|
||||
|
||||
@property
|
||||
def input_keys(self) -> List[str]:
|
||||
"""Return the input keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
return [self.input_key]
|
||||
|
||||
@property
|
||||
def output_keys(self) -> List[str]:
|
||||
"""Return the output keys.
|
||||
|
||||
:meta private:
|
||||
"""
|
||||
_output_keys = [self.output_key]
|
||||
return _output_keys
|
||||
|
||||
@classmethod
|
||||
def from_llm(
|
||||
cls,
|
||||
llm: BaseLanguageModel,
|
||||
*,
|
||||
qa_prompt: BasePromptTemplate = CYPHER_QA_PROMPT,
|
||||
ngql_prompt: BasePromptTemplate = NGQL_GENERATION_PROMPT,
|
||||
**kwargs: Any,
|
||||
) -> NebulaGraphQAChain:
|
||||
"""Initialize from LLM."""
|
||||
qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
|
||||
ngql_generation_chain = LLMChain(llm=llm, prompt=ngql_prompt)
|
||||
|
||||
return cls(
|
||||
qa_chain=qa_chain,
|
||||
ngql_generation_chain=ngql_generation_chain,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _call(
|
||||
self,
|
||||
inputs: Dict[str, Any],
|
||||
run_manager: Optional[CallbackManagerForChainRun] = None,
|
||||
) -> Dict[str, str]:
|
||||
"""Generate nGQL statement, use it to look up in db and answer question."""
|
||||
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
|
||||
callbacks = _run_manager.get_child()
|
||||
question = inputs[self.input_key]
|
||||
|
||||
generated_ngql = self.ngql_generation_chain.run(
|
||||
{"question": question, "schema": self.graph.get_schema}, callbacks=callbacks
|
||||
)
|
||||
|
||||
_run_manager.on_text("Generated nGQL:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
generated_ngql, color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
context = self.graph.query(generated_ngql)
|
||||
|
||||
_run_manager.on_text("Full Context:", end="\n", verbose=self.verbose)
|
||||
_run_manager.on_text(
|
||||
str(context), color="green", end="\n", verbose=self.verbose
|
||||
)
|
||||
|
||||
result = self.qa_chain(
|
||||
{"question": question, "context": context},
|
||||
callbacks=callbacks,
|
||||
)
|
||||
return {self.output_key: result[self.qa_chain.output_key]}
|
||||
@@ -49,6 +49,29 @@ CYPHER_GENERATION_PROMPT = PromptTemplate(
|
||||
input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
|
||||
)
|
||||
|
||||
NEBULAGRAPH_EXTRA_INSTRUCTIONS = """
|
||||
Instructions:
|
||||
|
||||
First, generate cypher then convert it to NebulaGraph Cypher dialect(rather than standard):
|
||||
1. it requires explicit label specification when referring to node properties: v.`Foo`.name
|
||||
2. it uses double equals sign for comparison: `==` rather than `=`
|
||||
For instance:
|
||||
```diff
|
||||
< MATCH (p:person)-[:directed]->(m:movie) WHERE m.name = 'The Godfather II'
|
||||
< RETURN p.name;
|
||||
---
|
||||
> MATCH (p:`person`)-[:directed]->(m:`movie`) WHERE m.`movie`.`name` == 'The Godfather II'
|
||||
> RETURN p.`person`.`name`;
|
||||
```\n"""
|
||||
|
||||
NGQL_GENERATION_TEMPLATE = CYPHER_GENERATION_TEMPLATE.replace(
|
||||
"Generate Cypher", "Generate NebulaGraph Cypher"
|
||||
).replace("Instructions:", NEBULAGRAPH_EXTRA_INSTRUCTIONS)
|
||||
|
||||
NGQL_GENERATION_PROMPT = PromptTemplate(
|
||||
input_variables=["schema", "question"], template=NGQL_GENERATION_TEMPLATE
|
||||
)
|
||||
|
||||
CYPHER_QA_TEMPLATE = """You are an assistant that helps to form nice and human understandable answers.
|
||||
The information part contains the provided information that you must use to construct an answer.
|
||||
The provided information is authorative, you must never doubt it or try to use your internal knowledge to correct it.
|
||||
|
||||
@@ -15,6 +15,7 @@ from langchain.callbacks.manager import (
|
||||
)
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.input import get_colored_text
|
||||
from langchain.load.dump import dumpd
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
from langchain.schema import LLMResult, PromptValue
|
||||
@@ -34,6 +35,10 @@ class LLMChain(Chain):
|
||||
llm = LLMChain(llm=OpenAI(), prompt=prompt)
|
||||
"""
|
||||
|
||||
@property
|
||||
def lc_serializable(self) -> bool:
|
||||
return True
|
||||
|
||||
prompt: BasePromptTemplate
|
||||
"""Prompt object to use."""
|
||||
llm: BaseLanguageModel
|
||||
@@ -147,7 +152,7 @@ class LLMChain(Chain):
|
||||
callbacks, self.callbacks, self.verbose
|
||||
)
|
||||
run_manager = callback_manager.on_chain_start(
|
||||
{"name": self.__class__.__name__},
|
||||
dumpd(self),
|
||||
{"input_list": input_list},
|
||||
)
|
||||
try:
|
||||
@@ -167,7 +172,7 @@ class LLMChain(Chain):
|
||||
callbacks, self.callbacks, self.verbose
|
||||
)
|
||||
run_manager = await callback_manager.on_chain_start(
|
||||
{"name": self.__class__.__name__},
|
||||
dumpd(self),
|
||||
{"input_list": input_list},
|
||||
)
|
||||
try:
|
||||
@@ -278,7 +283,7 @@ class LLMChain(Chain):
|
||||
return "llm_chain"
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, llm: BaseLanguageModel, template: str) -> Chain:
|
||||
def from_string(cls, llm: BaseLanguageModel, template: str) -> LLMChain:
|
||||
"""Create LLMChain from LLM and template."""
|
||||
prompt_template = PromptTemplate.from_template(template)
|
||||
return cls(llm=llm, prompt=prompt_template)
|
||||
|
||||
@@ -20,7 +20,7 @@ from langchain.chains.llm_requests import LLMRequestsChain
|
||||
from langchain.chains.pal.base import PALChain
|
||||
from langchain.chains.qa_with_sources.base import QAWithSourcesChain
|
||||
from langchain.chains.qa_with_sources.vector_db import VectorDBQAWithSourcesChain
|
||||
from langchain.chains.retrieval_qa.base import VectorDBQA
|
||||
from langchain.chains.retrieval_qa.base import RetrievalQA, VectorDBQA
|
||||
from langchain.chains.sql_database.base import SQLDatabaseChain
|
||||
from langchain.llms.loading import load_llm, load_llm_from_config
|
||||
from langchain.prompts.loading import load_prompt, load_prompt_from_config
|
||||
@@ -372,6 +372,28 @@ def _load_vector_db_qa_with_sources_chain(
|
||||
)
|
||||
|
||||
|
||||
def _load_retrieval_qa(config: dict, **kwargs: Any) -> RetrievalQA:
|
||||
if "retriever" in kwargs:
|
||||
retriever = kwargs.pop("retriever")
|
||||
else:
|
||||
raise ValueError("`retriever` must be present.")
|
||||
if "combine_documents_chain" in config:
|
||||
combine_documents_chain_config = config.pop("combine_documents_chain")
|
||||
combine_documents_chain = load_chain_from_config(combine_documents_chain_config)
|
||||
elif "combine_documents_chain_path" in config:
|
||||
combine_documents_chain = load_chain(config.pop("combine_documents_chain_path"))
|
||||
else:
|
||||
raise ValueError(
|
||||
"One of `combine_documents_chain` or "
|
||||
"`combine_documents_chain_path` must be present."
|
||||
)
|
||||
return RetrievalQA(
|
||||
combine_documents_chain=combine_documents_chain,
|
||||
retriever=retriever,
|
||||
**config,
|
||||
)
|
||||
|
||||
|
||||
def _load_vector_db_qa(config: dict, **kwargs: Any) -> VectorDBQA:
|
||||
if "vectorstore" in kwargs:
|
||||
vectorstore = kwargs.pop("vectorstore")
|
||||
@@ -459,6 +481,7 @@ type_to_loader_dict = {
|
||||
"sql_database_chain": _load_sql_database_chain,
|
||||
"vector_db_qa_with_sources_chain": _load_vector_db_qa_with_sources_chain,
|
||||
"vector_db_qa": _load_vector_db_qa,
|
||||
"retrieval_qa": _load_retrieval_qa,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -149,7 +149,7 @@ def load_qa_with_sources_chain(
|
||||
Args:
|
||||
llm: Language Model to use in the chain.
|
||||
chain_type: Type of document combining chain to use. Should be one of "stuff",
|
||||
"map_reduce", and "refine".
|
||||
"map_reduce", "refine" and "map_rerank".
|
||||
verbose: Whether chains should be run in verbose mode or not. Note that this
|
||||
applies to all chains that make up the final chain.
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user