mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-10 11:10:23 +00:00
Compare commits
227 Commits
eugene/upd
...
v0.1.3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ba326b98d0 | ||
|
|
54149292f8 | ||
|
|
ef6a335570 | ||
|
|
1f4ac62dee | ||
|
|
39d1cbfecf | ||
|
|
d0a8082188 | ||
|
|
5de59f9236 | ||
|
|
226fe645f1 | ||
|
|
4b7969efc5 | ||
|
|
fb41b68ea1 | ||
|
|
3b0226b2c6 | ||
|
|
c98994c3c9 | ||
|
|
c88750d54b | ||
|
|
e5672bc944 | ||
|
|
404abf139a | ||
|
|
a500527030 | ||
|
|
b9e7f6f38a | ||
|
|
d6275e47f2 | ||
|
|
5694728816 | ||
|
|
a950fa0487 | ||
|
|
1011b681dc | ||
|
|
b26a22f307 | ||
|
|
8da34118bc | ||
|
|
d1b4ead87c | ||
|
|
fbe592a5ce | ||
|
|
d511366dd3 | ||
|
|
774e543e1f | ||
|
|
b9f5104e6c | ||
|
|
35ec0bbd3b | ||
|
|
2ac3a82d85 | ||
|
|
cfe95ab085 | ||
|
|
dd5b8107b1 | ||
|
|
873de14cd8 | ||
|
|
6b2a57161a | ||
|
|
aad2aa7188 | ||
|
|
1b9001db47 | ||
|
|
01c2f27ffa | ||
|
|
369e90d427 | ||
|
|
a1c0cf21c9 | ||
|
|
7ecd2f22ac | ||
|
|
8569b8f680 | ||
|
|
fc196cab12 | ||
|
|
eac91b60c9 | ||
|
|
85e8423312 | ||
|
|
de209af533 | ||
|
|
54f90fc6bc | ||
|
|
1445ac95e8 | ||
|
|
af9f1738ca | ||
|
|
8779013847 | ||
|
|
9cf0f5eb78 | ||
|
|
1dc6c1ce06 | ||
|
|
05162928c0 | ||
|
|
acc14802d1 | ||
|
|
e1c59779ad | ||
|
|
971a68d04f | ||
|
|
f9be877ed7 | ||
|
|
076dbb1a8f | ||
|
|
c6bd7778b0 | ||
|
|
89372fca22 | ||
|
|
5396604ef4 | ||
|
|
c2a614eddc | ||
|
|
ef75bb63ce | ||
|
|
3d23a5eb36 | ||
|
|
ffae98d371 | ||
|
|
1e29b676d5 | ||
|
|
4ef0ed4ddc | ||
|
|
91230ef5d1 | ||
|
|
39b3c6d94c | ||
|
|
9b0a531aa2 | ||
|
|
63e2acc964 | ||
|
|
881d1c3ec5 | ||
|
|
e3828bee43 | ||
|
|
2454fefc53 | ||
|
|
84bf5787a7 | ||
|
|
6f7a414955 | ||
|
|
cc2e30fa13 | ||
|
|
3b649f4331 | ||
|
|
c0d453d8ac | ||
|
|
021b0484a8 | ||
|
|
f63906a9c2 | ||
|
|
3ccbe11363 | ||
|
|
fc84083ce5 | ||
|
|
9d32af72ce | ||
|
|
3613d8a2ad | ||
|
|
0f99646ca6 | ||
|
|
177af65dc4 | ||
|
|
f175bf7d7b | ||
|
|
e5878c467a | ||
|
|
2f348c695a | ||
|
|
50959abf0c | ||
|
|
b9495da92d | ||
|
|
eec3347939 | ||
|
|
92bc80483a | ||
|
|
0e76d84137 | ||
|
|
aa35b43bcd | ||
|
|
f2b2d59e82 | ||
|
|
f60f59d69f | ||
|
|
6bc6d64a12 | ||
|
|
65b231d40b | ||
|
|
ed118950fe | ||
|
|
aa2e642ce3 | ||
|
|
6b9e3ed9e9 | ||
|
|
ecd4f0a7ec | ||
|
|
27ad65cc68 | ||
|
|
7d444724d7 | ||
|
|
5d8c147332 | ||
|
|
3502a407d9 | ||
|
|
ca014d5b04 | ||
|
|
1e80113ac9 | ||
|
|
27ed2673da | ||
|
|
f238217cea | ||
|
|
2af813c7eb | ||
|
|
679a3ae933 | ||
|
|
7ad9eba8f4 | ||
|
|
58f0ba306b | ||
|
|
ec9642d667 | ||
|
|
5c73fd5bba | ||
|
|
fb940d11df | ||
|
|
1fa056c324 | ||
|
|
11327e6b64 | ||
|
|
2709d3e5f2 | ||
|
|
c5f6b828ad | ||
|
|
e7ddec1f2c | ||
|
|
49aff3ea5b | ||
|
|
60b1bd02d7 | ||
|
|
9e9ad9b0e9 | ||
|
|
d350be959d | ||
|
|
d0e101e4e0 | ||
|
|
bc0cb1148a | ||
|
|
8597484195 | ||
|
|
9c2f1f07a0 | ||
|
|
f406dc3872 | ||
|
|
da96c511d1 | ||
|
|
b0c3e3db2b | ||
|
|
d91126fc64 | ||
|
|
3606c5d5e9 | ||
|
|
a35e5f19a8 | ||
|
|
06fe2f4fb0 | ||
|
|
ce10fe0c2f | ||
|
|
e5cf1e2414 | ||
|
|
f3601b0aaf | ||
|
|
c323742f4f | ||
|
|
f974eb5b8b | ||
|
|
4df14a61fc | ||
|
|
8840a8cc95 | ||
|
|
3d34347a85 | ||
|
|
62a2e9ee19 | ||
|
|
6b6269441c | ||
|
|
5f057f24ac | ||
|
|
076593382a | ||
|
|
c5656a4905 | ||
|
|
770f57196e | ||
|
|
52114bdfac | ||
|
|
ca288d8f2c | ||
|
|
476fb328ee | ||
|
|
697a6f2c80 | ||
|
|
061e63eef2 | ||
|
|
d196646811 | ||
|
|
5cf06db3b3 | ||
|
|
d334efc848 | ||
|
|
251afda549 | ||
|
|
7220124368 | ||
|
|
ee378a0f40 | ||
|
|
ddf4e7c633 | ||
|
|
ce21392a21 | ||
|
|
9e779ca846 | ||
|
|
daa9ccae52 | ||
|
|
7c57cfd8f0 | ||
|
|
beec7259c8 | ||
|
|
b11fd3bedc | ||
|
|
7306032dcf | ||
|
|
21e0df937f | ||
|
|
15c2b4a47e | ||
|
|
fb676d8a9b | ||
|
|
6137c7608d | ||
|
|
e80aab2275 | ||
|
|
ce7723c1e5 | ||
|
|
8799b028a6 | ||
|
|
fb7e66b809 | ||
|
|
c0773ab329 | ||
|
|
14244bd7e5 | ||
|
|
768e5e33bc | ||
|
|
86321a949f | ||
|
|
60d6a416e6 | ||
|
|
f7706637a8 | ||
|
|
0fa06732b7 | ||
|
|
7b084b4cc7 | ||
|
|
bccb07f93e | ||
|
|
3f75fd41cc | ||
|
|
eb6e385dc5 | ||
|
|
74bac7bda1 | ||
|
|
845e407e08 | ||
|
|
a74f3a4979 | ||
|
|
efe6cfafe2 | ||
|
|
1afac77439 | ||
|
|
9fb09c1c30 | ||
|
|
eb76f9c9fe | ||
|
|
bc60203d0f | ||
|
|
c697c89ca4 | ||
|
|
69533c8628 | ||
|
|
6a48ea43ec | ||
|
|
6a2889a4ec | ||
|
|
95020637bc | ||
|
|
d5808f786c | ||
|
|
13b90232c1 | ||
|
|
9b3962fc25 | ||
|
|
e26e1f8b37 | ||
|
|
eb9b334a6b | ||
|
|
560bb49c99 | ||
|
|
81d1ba05dc | ||
|
|
74d9fc2f9e | ||
|
|
bdd90ae2ee | ||
|
|
5efec068c9 | ||
|
|
ec4dab0449 | ||
|
|
f454e95461 | ||
|
|
782dd44be9 | ||
|
|
112208baa5 | ||
|
|
129552e3d6 | ||
|
|
438beb6c94 | ||
|
|
ebb6ad4f7a | ||
|
|
437cebc955 | ||
|
|
80d41a8da3 | ||
|
|
623f87c888 | ||
|
|
44101b6b0e | ||
|
|
46b7a8d913 | ||
|
|
c11dbefedc | ||
|
|
c56060bb7d |
7
.github/ISSUE_TEMPLATE/config.yml
vendored
7
.github/ISSUE_TEMPLATE/config.yml
vendored
@@ -1,9 +1,12 @@
|
||||
blank_issues_enabled: true
|
||||
blank_issues_enabled: false
|
||||
version: 2.1
|
||||
contact_links:
|
||||
- name: 🤔 Question or Problem
|
||||
about: Ask a question or ask about a problem in GitHub Discussions.
|
||||
url: https://github.com/langchain-ai/langchain/discussions
|
||||
url: https://www.github.com/langchain-ai/langchain/discussions/categories/q-a
|
||||
- name: Discord
|
||||
url: https://discord.gg/6adMQxSpJS
|
||||
about: General community discussions
|
||||
- name: Show and tell
|
||||
about: Show what you built with LangChain
|
||||
url: https://www.github.com/langchain-ai/langchain/discussions/categories/show-and-tell
|
||||
|
||||
25
.github/ISSUE_TEMPLATE/privileged.yml
vendored
Normal file
25
.github/ISSUE_TEMPLATE/privileged.yml
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
name: 🔒 Privileged
|
||||
description: You are a LangChain maintainer, or was asked directly by a maintainer to create an issue here. If not, check the other options.
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for your interest in LangChain! 🚀
|
||||
|
||||
If you are not a LangChain maintainer or were not asked directly by a maintainer to create an issue, then please start the conversation in a [Question in GitHub Discussions](https://github.com/langchain-ai/langchain/discussions/categories/q-a) instead.
|
||||
|
||||
You are a LangChain maintainer if you maintain any of the packages inside of the LangChain repository
|
||||
or are a regular contributor to LangChain with previous merged merged pull requests.
|
||||
- type: checkboxes
|
||||
id: privileged
|
||||
attributes:
|
||||
label: Privileged issue
|
||||
description: Confirm that you are allowed to create an issue here.
|
||||
options:
|
||||
- label: I am a LangChain maintainer, or was asked directly by a LangChain maintainer to create an issue here.
|
||||
required: true
|
||||
- type: textarea
|
||||
id: content
|
||||
attributes:
|
||||
label: Issue Content
|
||||
description: Add the content of the issue here.
|
||||
2
.github/workflows/_all_ci.yml
vendored
2
.github/workflows/_all_ci.yml
vendored
@@ -32,7 +32,7 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
POETRY_VERSION: "1.7.1"
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
|
||||
@@ -9,7 +9,7 @@ on:
|
||||
description: "From which folder this pipeline executes"
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
POETRY_VERSION: "1.7.1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
2
.github/workflows/_dependencies.yml
vendored
2
.github/workflows/_dependencies.yml
vendored
@@ -13,7 +13,7 @@ on:
|
||||
description: "Relative path to the langchain library folder"
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
POETRY_VERSION: "1.7.1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
6
.github/workflows/_integration_test.yml
vendored
6
.github/workflows/_integration_test.yml
vendored
@@ -8,10 +8,11 @@ on:
|
||||
type: string
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
POETRY_VERSION: "1.7.1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
environment: Scheduled testing
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
@@ -51,6 +52,9 @@ jobs:
|
||||
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
|
||||
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
|
||||
GOOGLE_SEARCH_API_KEY: ${{ secrets.GOOGLE_SEARCH_API_KEY }}
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
run: |
|
||||
make integration_tests
|
||||
|
||||
|
||||
2
.github/workflows/_lint.yml
vendored
2
.github/workflows/_lint.yml
vendored
@@ -13,7 +13,7 @@ on:
|
||||
description: "Relative path to the langchain library folder"
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
POETRY_VERSION: "1.7.1"
|
||||
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
|
||||
|
||||
# This env var allows us to get inline annotations when ruff has complaints.
|
||||
|
||||
17
.github/workflows/_release.yml
vendored
17
.github/workflows/_release.yml
vendored
@@ -16,11 +16,12 @@ on:
|
||||
|
||||
env:
|
||||
PYTHON_VERSION: "3.10"
|
||||
POETRY_VERSION: "1.6.1"
|
||||
POETRY_VERSION: "1.7.1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: github.ref == 'refs/heads/master'
|
||||
environment: Scheduled testing
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
outputs:
|
||||
@@ -117,11 +118,18 @@ jobs:
|
||||
# are not found on test PyPI can be resolved and installed anyway.
|
||||
# (https://test.pypi.org/simple). This will include the PKG_NAME==VERSION
|
||||
# package because VERSION will not have been uploaded to regular PyPI yet.
|
||||
#
|
||||
# - attempt install again after 5 seconds if it fails because there is
|
||||
# sometimes a delay in availability on test pypi
|
||||
run: |
|
||||
poetry run pip install \
|
||||
--extra-index-url https://test.pypi.org/simple/ \
|
||||
"$PKG_NAME==$VERSION"
|
||||
"$PKG_NAME==$VERSION" || \
|
||||
( \
|
||||
sleep 5 && \
|
||||
poetry run pip install \
|
||||
--extra-index-url https://test.pypi.org/simple/ \
|
||||
"$PKG_NAME==$VERSION" \
|
||||
)
|
||||
|
||||
# Replace all dashes in the package name with underscores,
|
||||
# since that's how Python imports packages with dashes in the name.
|
||||
@@ -163,6 +171,9 @@ jobs:
|
||||
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
|
||||
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
|
||||
GOOGLE_SEARCH_API_KEY: ${{ secrets.GOOGLE_SEARCH_API_KEY }}
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
run: make integration_tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
|
||||
2
.github/workflows/_test.yml
vendored
2
.github/workflows/_test.yml
vendored
@@ -13,7 +13,7 @@ on:
|
||||
description: "Relative path to the langchain library folder"
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
POETRY_VERSION: "1.7.1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
2
.github/workflows/_test_release.yml
vendored
2
.github/workflows/_test_release.yml
vendored
@@ -9,7 +9,7 @@ on:
|
||||
description: "From which folder this pipeline executes"
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
POETRY_VERSION: "1.7.1"
|
||||
PYTHON_VERSION: "3.10"
|
||||
|
||||
jobs:
|
||||
|
||||
2
.github/workflows/scheduled_test.yml
vendored
2
.github/workflows/scheduled_test.yml
vendored
@@ -6,7 +6,7 @@ on:
|
||||
- cron: '0 13 * * *'
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
POETRY_VERSION: "1.7.1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
2
.github/workflows/templates_ci.yml
vendored
2
.github/workflows/templates_ci.yml
vendored
@@ -24,7 +24,7 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.6.1"
|
||||
POETRY_VERSION: "1.7.1"
|
||||
WORKDIR: "templates"
|
||||
|
||||
jobs:
|
||||
|
||||
@@ -49,7 +49,7 @@ The LangChain libraries themselves are made up of several different packages.
|
||||
- **[`langchain-community`](libs/community)**: Third party integrations.
|
||||
- **[`langchain`](libs/langchain)**: Chains, agents, and retrieval strategies that make up an application's cognitive architecture.
|
||||
|
||||

|
||||

|
||||
|
||||
## 🧱 What can you build with LangChain?
|
||||
**❓ Retrieval augmented generation**
|
||||
|
||||
@@ -82,7 +82,7 @@
|
||||
"prompt = ChatPromptTemplate.from_template(template)\n",
|
||||
"\n",
|
||||
"# LLM\n",
|
||||
"from langchain_community.llms import Together\n",
|
||||
"from langchain_together import Together\n",
|
||||
"\n",
|
||||
"llm = Together(\n",
|
||||
" model=\"mistralai/Mixtral-8x7B-Instruct-v0.1\",\n",
|
||||
|
||||
@@ -6,7 +6,7 @@ pydantic<2
|
||||
autodoc_pydantic==1.8.0
|
||||
myst_parser
|
||||
nbsphinx==0.8.9
|
||||
sphinx==4.5.0
|
||||
sphinx>=5
|
||||
sphinx-autobuild==2021.3.14
|
||||
sphinx_rtd_theme==1.0.0
|
||||
sphinx-typlog-theme==0.8.0
|
||||
|
||||
@@ -32,7 +32,7 @@ For a [development container](https://containers.dev/), see the [.devcontainer f
|
||||
|
||||
### Dependency Management: Poetry and other env/dependency managers
|
||||
|
||||
This project utilizes [Poetry](https://python-poetry.org/) v1.6.1+ as a dependency manager.
|
||||
This project utilizes [Poetry](https://python-poetry.org/) v1.7.1+ as a dependency manager.
|
||||
|
||||
❗Note: *Before installing Poetry*, if you use `Conda`, create and activate a new Conda env (e.g. `conda create -n langchain python=3.9`)
|
||||
|
||||
@@ -75,7 +75,7 @@ make test
|
||||
|
||||
If during installation you receive a `WheelFileValidationError` for `debugpy`, please make sure you are running
|
||||
Poetry v1.6.1+. This bug was present in older versions of Poetry (e.g. 1.4.1) and has been resolved in newer releases.
|
||||
If you are still seeing this bug on v1.6.1, you may also try disabling "modern installation"
|
||||
If you are still seeing this bug on v1.6.1+, you may also try disabling "modern installation"
|
||||
(`poetry config installer.modern-installation false`) and re-installing requirements.
|
||||
See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details.
|
||||
|
||||
|
||||
@@ -302,7 +302,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -177,7 +177,7 @@
|
||||
"source": [
|
||||
"## Get the prompts\n",
|
||||
"\n",
|
||||
"An important part of every chain is the prompts that are used. You can get the graphs present in the chain:"
|
||||
"An important part of every chain is the prompts that are used. You can get the prompts present in the chain:"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -14,7 +14,7 @@ This framework consists of several parts.
|
||||
- **[LangServe](/docs/langserve)**: A library for deploying LangChain chains as a REST API.
|
||||
- **[LangSmith](/docs/langsmith)**: A developer platform that lets you debug, test, evaluate, and monitor chains built on any LLM framework and seamlessly integrates with LangChain.
|
||||
|
||||

|
||||

|
||||
|
||||
Together, these products simplify the entire application lifecycle:
|
||||
- **Develop**: Write your applications in LangChain/LangChain.js. Hit the ground running using Templates for reference.
|
||||
@@ -78,7 +78,7 @@ Let models choose which tools to use given high-level directives
|
||||
Walkthroughs and techniques for common end-to-end use cases, like:
|
||||
- [Document question answering](/docs/use_cases/question_answering/)
|
||||
- [Chatbots](/docs/use_cases/chatbots/)
|
||||
- [Analyzing structured data](/docs/use_cases/qa_structured/sql/)
|
||||
- [Analyzing structured data](/docs/use_cases/sql/)
|
||||
- and much more...
|
||||
|
||||
### [Integrations](/docs/integrations/providers/)
|
||||
|
||||
@@ -59,7 +59,7 @@ In this quickstart, we will walk through a few different ways of doing that.
|
||||
We will start with a simple LLM chain, which just relies on information in the prompt template to respond.
|
||||
Next, we will build a retrieval chain, which fetches data from a separate database and passes that into the prompt template.
|
||||
We will then add in chat history, to create a conversation retrieval chain. This allows you interact in a chat manner with this LLM, so it remembers previous questions.
|
||||
Finally, we will build an agent - which utilizes and LLM to determine whether or not it needs to fetch data to answer questions.
|
||||
Finally, we will build an agent - which utilizes an LLM to determine whether or not it needs to fetch data to answer questions.
|
||||
We will cover these at a high level, but there are lot of details to all of these!
|
||||
We will link to relevant docs.
|
||||
|
||||
@@ -597,6 +597,6 @@ To continue on your journey, we recommend you read the following (in order):
|
||||
- [Model IO](/docs/modules/model_io) covers more details of prompts, LLMs, and output parsers.
|
||||
- [Retrieval](/docs/modules/data_connection) covers more details of everything related to retrieval
|
||||
- [Agents](/docs/modules/agents) covers details of everything related to agents
|
||||
- Explore common [end-to-end use cases](/docs/use_cases/qa_structured/sql) and [template applications](/docs/templates)
|
||||
- Explore common [end-to-end use cases](/docs/use_cases/) and [template applications](/docs/templates)
|
||||
- [Read up on LangSmith](/docs/langsmith/), the platform for debugging, testing, monitoring and more
|
||||
- Learn more about serving your applications with [LangServe](/docs/langserve)
|
||||
|
||||
@@ -12,7 +12,7 @@ Platforms with tracing capabilities like [LangSmith](/docs/langsmith/) and [Wand
|
||||
|
||||
For anyone building production-grade LLM applications, we highly recommend using a platform like this.
|
||||
|
||||

|
||||

|
||||
|
||||
## `set_debug` and `set_verbose`
|
||||
|
||||
|
||||
138
docs/docs/integrations/callbacks/comet_tracing.ipynb
Normal file
138
docs/docs/integrations/callbacks/comet_tracing.ipynb
Normal file
@@ -0,0 +1,138 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5371a9bb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Comet Tracing\n",
|
||||
"\n",
|
||||
"There are two ways to trace your LangChains executions with Comet:\n",
|
||||
"\n",
|
||||
"1. Setting the `LANGCHAIN_COMET_TRACING` environment variable to \"true\". This is the recommended way.\n",
|
||||
"2. Import the `CometTracer` manually and pass it explicitely."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "17c04cc6-c93d-4b6c-a033-e897577f4ed1",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-18T12:47:46.580776Z",
|
||||
"start_time": "2023-05-18T12:47:46.577833Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"import comet_llm\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_COMET_TRACING\"] = \"true\"\n",
|
||||
"\n",
|
||||
"# Connect to Comet if no API Key is set\n",
|
||||
"comet_llm.init()\n",
|
||||
"\n",
|
||||
"# comet documentation to configure comet using env variables\n",
|
||||
"# https://www.comet.com/docs/v2/api-and-sdk/llm-sdk/configuration/\n",
|
||||
"# here we are configuring the comet project\n",
|
||||
"os.environ[\"COMET_PROJECT_NAME\"] = \"comet-example-langchain-tracing\"\n",
|
||||
"\n",
|
||||
"from langchain.agents import AgentType, initialize_agent, load_tools\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1b62cd48",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-18T12:47:47.445229Z",
|
||||
"start_time": "2023-05-18T12:47:47.436424Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Agent run with tracing. Ensure that OPENAI_API_KEY is set appropriately to run this example.\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"tools = load_tools([\"llm-math\"], llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bfa16b79-aa4b-4d41-a067-70d1f593f667",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-18T12:48:01.816137Z",
|
||||
"start_time": "2023-05-18T12:47:49.109574Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"agent.run(\"What is 2 raised to .123243 power?\") # this should be traced\n",
|
||||
"# An url for the chain like the following should print in your console:\n",
|
||||
"# https://www.comet.com/<workspace>/<project_name>\n",
|
||||
"# The url can be used to view the LLM chain in Comet."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5e212e7d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Now, we unset the environment variable and use a context manager.\n",
|
||||
"if \"LANGCHAIN_COMET_TRACING\" in os.environ:\n",
|
||||
" del os.environ[\"LANGCHAIN_COMET_TRACING\"]\n",
|
||||
"\n",
|
||||
"from langchain.callbacks.tracers.comet import CometTracer\n",
|
||||
"\n",
|
||||
"tracer = CometTracer()\n",
|
||||
"\n",
|
||||
"# Recreate the LLM, tools and agent and passing the callback to each of them\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"tools = load_tools([\"llm-math\"], llm=llm)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"agent.run(\n",
|
||||
" \"What is 2 raised to .123243 power?\", callbacks=[tracer]\n",
|
||||
") # this should be traced"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -46,7 +46,7 @@ thoughts and actions live in your app.
|
||||
```python
|
||||
from langchain_openai import OpenAI
|
||||
from langchain.agents import AgentType, initialize_agent, load_tools
|
||||
from langchain.callbacks import StreamlitCallbackHandler
|
||||
from langchain_community.callbacks import StreamlitCallbackHandler
|
||||
import streamlit as st
|
||||
|
||||
llm = OpenAI(temperature=0, streaming=True)
|
||||
|
||||
@@ -53,9 +53,16 @@
|
||||
"- AquilaChat-7B"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -65,83 +72,105 @@
|
||||
"from langchain_community.chat_models import QianfanChatEndpoint\n",
|
||||
"from langchain_core.language_models.chat_models import HumanMessage\n",
|
||||
"\n",
|
||||
"os.environ[\"QIANFAN_AK\"] = \"your_ak\"\n",
|
||||
"os.environ[\"QIANFAN_SK\"] = \"your_sk\"\n",
|
||||
"\n",
|
||||
"chat = QianfanChatEndpoint(\n",
|
||||
" streaming=True,\n",
|
||||
")\n",
|
||||
"res = chat([HumanMessage(content=\"write a funny joke\")])"
|
||||
"os.environ[\"QIANFAN_AK\"] = \"Your_api_key\"\n",
|
||||
"os.environ[\"QIANFAN_SK\"] = \"You_secret_Key\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[INFO] [09-15 20:00:36] logging.py:55 [t:139698882193216]: requesting llm api endpoint: /chat/eb-instant\n",
|
||||
"[INFO] [09-15 20:00:37] logging.py:55 [t:139698882193216]: async requesting llm api endpoint: /chat/eb-instant\n"
|
||||
]
|
||||
},
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='您好!请问您需要什么帮助?我将尽力回答您的问题。')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat = QianfanChatEndpoint(streaming=True)\n",
|
||||
"messages = [HumanMessage(content=\"Hello\")]\n",
|
||||
"chat.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='您好!有什么我可以帮助您的吗?')"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"await chat.ainvoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[AIMessage(content='您好!有什么我可以帮助您的吗?')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat.batch([messages])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Streaming"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"chat resp: content='您好,您似乎输入' additional_kwargs={} example=False\n",
|
||||
"chat resp: content='了一个话题标签,请问需要我帮您找到什么资料或者帮助您解答什么问题吗?' additional_kwargs={} example=False\n",
|
||||
"chat resp: content='' additional_kwargs={} example=False\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[INFO] [09-15 20:00:39] logging.py:55 [t:139698882193216]: async requesting llm api endpoint: /chat/eb-instant\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"generations=[[ChatGeneration(text=\"The sea is a vast expanse of water that covers much of the Earth's surface. It is a source of travel, trade, and entertainment, and is also a place of scientific exploration and marine conservation. The sea is an important part of our world, and we should cherish and protect it.\", generation_info={'finish_reason': 'finished'}, message=AIMessage(content=\"The sea is a vast expanse of water that covers much of the Earth's surface. It is a source of travel, trade, and entertainment, and is also a place of scientific exploration and marine conservation. The sea is an important part of our world, and we should cherish and protect it.\", additional_kwargs={}, example=False))]] llm_output={} run=[RunInfo(run_id=UUID('d48160a6-5960-4c1d-8a0e-90e6b51a209b'))]\n",
|
||||
"astream content='The sea is a vast' additional_kwargs={} example=False\n",
|
||||
"astream content=' expanse of water, a place of mystery and adventure. It is the source of many cultures and civilizations, and a center of trade and exploration. The sea is also a source of life and beauty, with its unique marine life and diverse' additional_kwargs={} example=False\n",
|
||||
"astream content=' coral reefs. Whether you are swimming, diving, or just watching the sea, it is a place that captivates the imagination and transforms the spirit.' additional_kwargs={} example=False\n"
|
||||
"您好!有什么我可以帮助您的吗?\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models import QianfanChatEndpoint\n",
|
||||
"\n",
|
||||
"chatLLM = QianfanChatEndpoint()\n",
|
||||
"res = chatLLM.stream([HumanMessage(content=\"hi\")], streaming=True)\n",
|
||||
"for r in res:\n",
|
||||
" print(\"chat resp:\", r)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def run_aio_generate():\n",
|
||||
" resp = await chatLLM.agenerate(\n",
|
||||
" messages=[[HumanMessage(content=\"write a 20 words sentence about sea.\")]]\n",
|
||||
" )\n",
|
||||
" print(resp)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"await run_aio_generate()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def run_aio_stream():\n",
|
||||
" async for res in chatLLM.astream(\n",
|
||||
" [HumanMessage(content=\"write a 20 words sentence about sea.\")]\n",
|
||||
" ):\n",
|
||||
" print(\"astream\", res)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"await run_aio_stream()"
|
||||
"try:\n",
|
||||
" for chunk in chat.stream(messages):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)\n",
|
||||
"except TypeError as e:\n",
|
||||
" print(\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -151,39 +180,36 @@
|
||||
"source": [
|
||||
"## Use different models in Qianfan\n",
|
||||
"\n",
|
||||
"In the case you want to deploy your own model based on Ernie Bot or third-party open-source model, you could follow these steps:\n",
|
||||
"The default model is ERNIE-Bot-turbo, in the case you want to deploy your own model based on Ernie Bot or third-party open-source model, you could follow these steps:\n",
|
||||
"\n",
|
||||
"- 1. (Optional, if the model are included in the default models, skip it)Deploy your model in Qianfan Console, get your own customized deploy endpoint.\n",
|
||||
"- 2. Set up the field called `endpoint` in the initialization:"
|
||||
"1. (Optional, if the model are included in the default models, skip it) Deploy your model in Qianfan Console, get your own customized deploy endpoint.\n",
|
||||
"2. Set up the field called `endpoint` in the initialization:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[INFO] [09-15 20:00:50] logging.py:55 [t:139698882193216]: requesting llm api endpoint: /chat/bloomz_7b1\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content='你好!很高兴见到你。' additional_kwargs={} example=False\n"
|
||||
]
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Hello,可以回答问题了,我会竭尽全力为您解答,请问有什么问题吗?')"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chatBloom = QianfanChatEndpoint(\n",
|
||||
"chatBot = QianfanChatEndpoint(\n",
|
||||
" streaming=True,\n",
|
||||
" model=\"BLOOMZ-7B\",\n",
|
||||
" model=\"ERNIE-Bot\",\n",
|
||||
")\n",
|
||||
"res = chatBloom([HumanMessage(content=\"hi\")])\n",
|
||||
"print(res)"
|
||||
"\n",
|
||||
"messages = [HumanMessage(content=\"Hello\")]\n",
|
||||
"chatBot.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -202,35 +228,25 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[INFO] [09-15 20:00:57] logging.py:55 [t:139698882193216]: requesting llm api endpoint: /chat/eb-instant\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content='您好,您似乎输入' additional_kwargs={} example=False\n",
|
||||
"content='了一个文本字符串,但并没有给出具体的问题或场景。' additional_kwargs={} example=False\n",
|
||||
"content='如果您能提供更多信息,我可以更好地回答您的问题。' additional_kwargs={} example=False\n",
|
||||
"content='' additional_kwargs={} example=False\n"
|
||||
]
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='您好!有什么我可以帮助您的吗?')"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"res = chat.stream(\n",
|
||||
" [HumanMessage(content=\"hi\")],\n",
|
||||
"chat.invoke(\n",
|
||||
" [HumanMessage(content=\"Hello\")],\n",
|
||||
" **{\"top_p\": 0.4, \"temperature\": 0.1, \"penalty_score\": 1},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for r in res:\n",
|
||||
" print(r)"
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -250,7 +266,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.5"
|
||||
"version": "3.9.18"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
224
docs/docs/integrations/chat/deepinfra.ipynb
Normal file
224
docs/docs/integrations/chat/deepinfra.ipynb
Normal file
@@ -0,0 +1,224 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "bf733a38-db84-4363-89e2-de6735c37230",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DeepInfra\n",
|
||||
"\n",
|
||||
"[DeepInfra](https://deepinfra.com/?utm_source=langchain) is a serverless inference as a service that provides access to a [variety of LLMs](https://deepinfra.com/models?utm_source=langchain) and [embeddings models](https://deepinfra.com/models?type=embeddings&utm_source=langchain). This notebook goes over how to use LangChain with DeepInfra for chat models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set the Environment API Key\n",
|
||||
"Make sure to get your API key from DeepInfra. You have to [Login](https://deepinfra.com/login?from=%2Fdash) and get a new token.\n",
|
||||
"\n",
|
||||
"You are given a 1 hour free of serverless GPU compute to test different models. (see [here](https://github.com/deepinfra/deepctl#deepctl))\n",
|
||||
"You can print your token with `deepctl auth token`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# get a new token: https://deepinfra.com/login?from=%2Fdash\n",
|
||||
"\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"DEEPINFRA_API_TOKEN = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# or pass deepinfra_api_token parameter to the ChatDeepInfra constructor\n",
|
||||
"os.environ[\"DEEPINFRA_API_TOKEN\"] = DEEPINFRA_API_TOKEN"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "d4a7c55d-b235-4ca4-a579-c90cc9570da9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatDeepInfra\n",
|
||||
"from langchain.schema import HumanMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "70cf04e8-423a-4ff6-8b09-f11fb711c817",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat = ChatDeepInfra(model=\"meta-llama/Llama-2-7b-chat-hf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "8199ef8f-eb8b-4253-9ea0-6c24a013ca4c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"Translate this sentence from English to French. I love programming.\"\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "c361ab1e-8c0c-4206-9e3c-9d1424a12b9c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## `ChatDeepInfra` also supports async and streaming functionality:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "93a21c5c-6ef9-4688-be60-b2e1f94842fb",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "c5fac0e9-05a4-4fc1-a3b3-e5bbb24b971b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"LLMResult(generations=[[ChatGeneration(text=\" J'aime programmer.\", generation_info=None, message=AIMessage(content=\" J'aime programmer.\", additional_kwargs={}, example=False))]], llm_output={}, run=[RunInfo(run_id=UUID('8cc8fb68-1c35-439c-96a0-695036a93652'))])"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"await chat.agenerate([messages])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "025be980-e50d-4a68-93dc-c9c7b500ce34",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" J'aime la programmation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat = ChatDeepInfra(\n",
|
||||
" streaming=True,\n",
|
||||
" verbose=True,\n",
|
||||
" callbacks=[StreamingStdOutCallbackHandler()],\n",
|
||||
")\n",
|
||||
"chat(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c253883f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -16,29 +16,58 @@
|
||||
"# ErnieBotChat\n",
|
||||
"\n",
|
||||
"[ERNIE-Bot](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/jlil56u11) is a large language model developed by Baidu, covering a huge amount of Chinese data.\n",
|
||||
"This notebook covers how to get started with ErnieBot chat models.\n",
|
||||
"This notebook covers how to get started with ErnieBot chat models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Deprecated Warning**\n",
|
||||
"\n",
|
||||
"We recommend users using `langchain_community.chat_models.ErnieBotChat` \n",
|
||||
"to use `langchain_community.chat_models.QianfanChatEndpoint` instead.\n",
|
||||
"\n",
|
||||
"documentation for `QianfanChatEndpoint` is [here](./baidu_qianfan_endpoint).\n",
|
||||
"\n",
|
||||
"they are 4 why we recommend users to use `QianfanChatEndpoint`:\n",
|
||||
"\n",
|
||||
"**Note:** We recommend users using this class to switch to [Baidu Qianfan](./baidu_qianfan_endpoint). they are 3 why we recommend users to use `QianfanChatEndpoint`:\n",
|
||||
"1. `QianfanChatEndpoint` support more LLM in the Qianfan platform.\n",
|
||||
"2. `QianfanChatEndpoint` support streaming mode.\n",
|
||||
"3. `QianfanChatEndpoint` support function calling usgage.\n",
|
||||
"\n",
|
||||
"4. `ErnieBotChat` is lack of maintenance and deprecated."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Some tips for migration:\n",
|
||||
"\n",
|
||||
"- change `ernie_client_id` to `qianfan_ak`, also change `ernie_client_secret` to `qianfan_sk`.\n",
|
||||
"- install `qianfan` package. \n",
|
||||
" ```\n",
|
||||
" pip install qianfan\n",
|
||||
" ```"
|
||||
"- install `qianfan` package. like `pip install qianfan`\n",
|
||||
"- change `ErnieBotChat` to `QianfanChatEndpoint`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models import ErnieBotChat"
|
||||
"from langchain_community.chat_models.baidu_qianfan_endpoint import QianfanChatEndpoint\n",
|
||||
"\n",
|
||||
"chat = QianfanChatEndpoint(\n",
|
||||
" qianfan_ak=\"your qianfan ak\",\n",
|
||||
" qianfan_sk=\"your qianfan sk\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -47,6 +76,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models import ErnieBotChat\n",
|
||||
"\n",
|
||||
"chat = ErnieBotChat(\n",
|
||||
" ernie_client_id=\"YOUR_CLIENT_ID\", ernie_client_secret=\"YOUR_CLIENT_SECRET\"\n",
|
||||
")"
|
||||
|
||||
@@ -320,11 +320,26 @@
|
||||
"4. Message may be blocked if they violate the safety checks of the LLM. In this case, the model will return an empty response."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "75fdfad6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "92b5aca5",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
"source": [
|
||||
"## Additional Configuraation\n",
|
||||
"\n",
|
||||
"You can pass the following parameters to ChatGoogleGenerativeAI in order to customize the SDK's behavior:\n",
|
||||
"\n",
|
||||
"- `client_options`: [Client Options](https://googleapis.dev/python/google-api-core/latest/client_options.html#module-google.api_core.client_options) to pass to the Google API Client, such as a custom `client_options[\"api_endpoint\"]`\n",
|
||||
"- `transport`: The transport method to use, such as `rest`, `grpc`, or `grpc_asyncio`."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -35,29 +34,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-google-vertexai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -67,7 +55,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -76,7 +64,7 @@
|
||||
"AIMessage(content=\" J'aime la programmation.\")"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -92,6 +80,40 @@
|
||||
"chain.invoke({})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Gemini doesn't support SystemMessage at the moment, but it can be added to the first human message in the row. If you want such behavior, just set the `convert_system_message_to_human` to `True`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'aime la programmation.\")"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"system = \"You are a helpful assistant who translate English to French\"\n",
|
||||
"human = \"Translate this sentence from English to French. I love programming.\"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n",
|
||||
"\n",
|
||||
"chat = ChatVertexAI(model_name=\"gemini-pro\", convert_system_message_to_human=True)\n",
|
||||
"\n",
|
||||
"chain = prompt | chat\n",
|
||||
"chain.invoke({})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -101,7 +123,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -110,7 +132,7 @@
|
||||
"AIMessage(content=' プログラミングが大好きです')"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -122,6 +144,8 @@
|
||||
"human = \"{text}\"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n",
|
||||
"\n",
|
||||
"chat = ChatVertexAI()\n",
|
||||
"\n",
|
||||
"chain = prompt | chat\n",
|
||||
"\n",
|
||||
"chain.invoke(\n",
|
||||
@@ -134,7 +158,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
@@ -154,7 +177,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -165,27 +188,51 @@
|
||||
"text": [
|
||||
" ```python\n",
|
||||
"def is_prime(n):\n",
|
||||
" if n <= 1:\n",
|
||||
" return False\n",
|
||||
" for i in range(2, n):\n",
|
||||
" if n % i == 0:\n",
|
||||
" return False\n",
|
||||
" return True\n",
|
||||
" \"\"\"\n",
|
||||
" Check if a number is prime.\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" n: The number to check.\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" True if n is prime, False otherwise.\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" # If n is 1, it is not prime.\n",
|
||||
" if n == 1:\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
" # Iterate over all numbers from 2 to the square root of n.\n",
|
||||
" for i in range(2, int(n ** 0.5) + 1):\n",
|
||||
" # If n is divisible by any number from 2 to its square root, it is not prime.\n",
|
||||
" if n % i == 0:\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
" # If n is divisible by no number from 2 to its square root, it is prime.\n",
|
||||
" return True\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def find_prime_numbers(n):\n",
|
||||
" prime_numbers = []\n",
|
||||
" for i in range(2, n + 1):\n",
|
||||
" if is_prime(i):\n",
|
||||
" prime_numbers.append(i)\n",
|
||||
" return prime_numbers\n",
|
||||
" \"\"\"\n",
|
||||
" Find all prime numbers up to a given number.\n",
|
||||
"\n",
|
||||
"print(find_prime_numbers(100))\n",
|
||||
"```\n",
|
||||
" Args:\n",
|
||||
" n: The upper bound for the prime numbers to find.\n",
|
||||
"\n",
|
||||
"Output:\n",
|
||||
" Returns:\n",
|
||||
" A list of all prime numbers up to n.\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]\n",
|
||||
" # Create a list of all numbers from 2 to n.\n",
|
||||
" numbers = list(range(2, n + 1))\n",
|
||||
"\n",
|
||||
" # Iterate over the list of numbers and remove any that are not prime.\n",
|
||||
" for number in numbers:\n",
|
||||
" if not is_prime(number):\n",
|
||||
" numbers.remove(number)\n",
|
||||
"\n",
|
||||
" # Return the list of prime numbers.\n",
|
||||
" return numbers\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
@@ -199,6 +246,143 @@
|
||||
"print(message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Full generation info\n",
|
||||
"\n",
|
||||
"We can use the `generate` method to get back extra metadata like [safety attributes](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/responsible-ai#safety_attribute_confidence_scoring) and not just chat completions\n",
|
||||
"\n",
|
||||
"Note that the `generation_info` will be different depending if you're using a gemini model or not.\n",
|
||||
"\n",
|
||||
"### Gemini model\n",
|
||||
"\n",
|
||||
"`generation_info` will include:\n",
|
||||
"\n",
|
||||
"- `is_blocked`: whether generation was blocked or not\n",
|
||||
"- `safety_ratings`: safety ratings' categories and probability labels"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'is_blocked': False,\n",
|
||||
" 'safety_ratings': [{'category': 'HARM_CATEGORY_HARASSMENT',\n",
|
||||
" 'probability_label': 'NEGLIGIBLE'},\n",
|
||||
" {'category': 'HARM_CATEGORY_HATE_SPEECH',\n",
|
||||
" 'probability_label': 'NEGLIGIBLE'},\n",
|
||||
" {'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT',\n",
|
||||
" 'probability_label': 'NEGLIGIBLE'},\n",
|
||||
" {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT',\n",
|
||||
" 'probability_label': 'NEGLIGIBLE'}]}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from pprint import pprint\n",
|
||||
"\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langchain_google_vertexai import ChatVertexAI, HarmBlockThreshold, HarmCategory\n",
|
||||
"\n",
|
||||
"human = \"Translate this sentence from English to French. I love programming.\"\n",
|
||||
"messages = [HumanMessage(content=human)]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chat = ChatVertexAI(\n",
|
||||
" model_name=\"gemini-pro\",\n",
|
||||
" safety_settings={\n",
|
||||
" HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"result = chat.generate([messages])\n",
|
||||
"pprint(result.generations[0][0].generation_info)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Non-gemini model\n",
|
||||
"\n",
|
||||
"`generation_info` will include:\n",
|
||||
"\n",
|
||||
"- `is_blocked`: whether generation was blocked or not\n",
|
||||
"- `safety_attributes`: a dictionary mapping safety attributes to their scores"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'is_blocked': False,\n",
|
||||
" 'safety_attributes': {'Derogatory': 0.1,\n",
|
||||
" 'Finance': 0.3,\n",
|
||||
" 'Insult': 0.1,\n",
|
||||
" 'Sexual': 0.1}}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat = ChatVertexAI() # default is `chat-bison`\n",
|
||||
"\n",
|
||||
"result = chat.generate([messages])\n",
|
||||
"pprint(result.generations[0][0].generation_info)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Function Calling with Gemini\n",
|
||||
"\n",
|
||||
"We can call Gemini models with tools."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"MyModel(name='Erick', age=27)"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.pydantic_v1 import BaseModel\n",
|
||||
"from langchain_google_vertexai import create_structured_runnable\n",
|
||||
"\n",
|
||||
"llm = ChatVertexAI(model_name=\"gemini-pro\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class MyModel(BaseModel):\n",
|
||||
" name: str\n",
|
||||
" age: int\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chain = create_structured_runnable(MyModel, llm)\n",
|
||||
"chain.invoke(\"My name is Erick and I'm 27 years old\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -210,7 +394,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -224,7 +408,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -268,7 +452,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -309,8 +493,14 @@
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"environment": {
|
||||
"kernel": "python3",
|
||||
"name": "common-cpu.m108",
|
||||
"type": "gcloud",
|
||||
"uri": "gcr.io/deeplearning-platform-release/base-cpu:m108"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -324,7 +514,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.10"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
135
docs/docs/integrations/chat/llama_edge.ipynb
Normal file
135
docs/docs/integrations/chat/llama_edge.ipynb
Normal file
@@ -0,0 +1,135 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LlamaEdge\n",
|
||||
"\n",
|
||||
"[LlamaEdge](https://github.com/second-state/LlamaEdge) allows you to chat with LLMs of [GGUF](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/README.md) format both locally and via chat service.\n",
|
||||
"\n",
|
||||
"- `LlamaEdgeChatService` provides developers an OpenAI API compatible service to chat with LLMs via HTTP requests.\n",
|
||||
"\n",
|
||||
"- `LlamaEdgeChatLocal` enables developers to chat with LLMs locally (coming soon).\n",
|
||||
"\n",
|
||||
"Both `LlamaEdgeChatService` and `LlamaEdgeChatLocal` run on the infrastructure driven by [WasmEdge Runtime](https://wasmedge.org/), which provides a lightweight and portable WebAssembly container environment for LLM inference tasks.\n",
|
||||
"\n",
|
||||
"## Chat via API Service\n",
|
||||
"\n",
|
||||
"`LlamaEdgeChatService` works on the `llama-api-server`. Following the steps in [llama-api-server quick-start](https://github.com/second-state/llama-utils/tree/main/api-server#readme), you can host your own API service so that you can chat with any models you like on any device you have anywhere as long as the internet is available."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models.llama_edge import LlamaEdgeChatService\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Chat with LLMs in the non-streaming mode"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Bot] Hello! The capital of France is Paris.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# service url\n",
|
||||
"service_url = \"https://b008-54-186-154-209.ngrok-free.app\"\n",
|
||||
"\n",
|
||||
"# create wasm-chat service instance\n",
|
||||
"chat = LlamaEdgeChatService(service_url=service_url)\n",
|
||||
"\n",
|
||||
"# create message sequence\n",
|
||||
"system_message = SystemMessage(content=\"You are an AI assistant\")\n",
|
||||
"user_message = HumanMessage(content=\"What is the capital of France?\")\n",
|
||||
"messages = [system_message, user_message]\n",
|
||||
"\n",
|
||||
"# chat with wasm-chat service\n",
|
||||
"response = chat(messages)\n",
|
||||
"\n",
|
||||
"print(f\"[Bot] {response.content}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Chat with LLMs in the streaming mode"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Bot] Hello! I'm happy to help you with your question. The capital of Norway is Oslo.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# service url\n",
|
||||
"service_url = \"https://b008-54-186-154-209.ngrok-free.app\"\n",
|
||||
"\n",
|
||||
"# create wasm-chat service instance\n",
|
||||
"chat = LlamaEdgeChatService(service_url=service_url, streaming=True)\n",
|
||||
"\n",
|
||||
"# create message sequence\n",
|
||||
"system_message = SystemMessage(content=\"You are an AI assistant\")\n",
|
||||
"user_message = HumanMessage(content=\"What is the capital of Norway?\")\n",
|
||||
"messages = [\n",
|
||||
" system_message,\n",
|
||||
" user_message,\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"output = \"\"\n",
|
||||
"for chunk in chat.stream(messages):\n",
|
||||
" # print(chunk.content, end=\"\", flush=True)\n",
|
||||
" output += chunk.content\n",
|
||||
"\n",
|
||||
"print(f\"[Bot] {output}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -1,85 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Wasm Chat\n",
|
||||
"\n",
|
||||
"`Wasm-chat` allows you to chat with LLMs of [GGUF](https://github.com/ggerganov/llama.cpp/blob/master/gguf-py/README.md) format both locally and via chat service.\n",
|
||||
"\n",
|
||||
"- `WasmChatService` provides developers an OpenAI API compatible service to chat with LLMs via HTTP requests.\n",
|
||||
"\n",
|
||||
"- `WasmChatLocal` enables developers to chat with LLMs locally (coming soon).\n",
|
||||
"\n",
|
||||
"Both `WasmChatService` and `WasmChatLocal` run on the infrastructure driven by [WasmEdge Runtime](https://wasmedge.org/), which provides a lightweight and portable WebAssembly container environment for LLM inference tasks.\n",
|
||||
"\n",
|
||||
"## Chat via API Service\n",
|
||||
"\n",
|
||||
"`WasmChatService` provides chat services by the `llama-api-server`. Following the steps in [llama-api-server quick-start](https://github.com/second-state/llama-utils/tree/main/api-server#readme), you can host your own API service so that you can chat with any models you like on any device you have anywhere as long as the internet is available."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models.wasm_chat import WasmChatService\n",
|
||||
"from langchain_core.messages import AIMessage, HumanMessage, SystemMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Bot] Paris\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# service url\n",
|
||||
"service_url = \"https://b008-54-186-154-209.ngrok-free.app\"\n",
|
||||
"\n",
|
||||
"# create wasm-chat service instance\n",
|
||||
"chat = WasmChatService(service_url=service_url)\n",
|
||||
"\n",
|
||||
"# create message sequence\n",
|
||||
"system_message = SystemMessage(content=\"You are an AI assistant\")\n",
|
||||
"user_message = HumanMessage(content=\"What is the capital of France?\")\n",
|
||||
"messages = [system_message, user_message]\n",
|
||||
"\n",
|
||||
"# chat with wasm-chat service\n",
|
||||
"response = chat(messages)\n",
|
||||
"\n",
|
||||
"print(f\"[Bot] {response.content}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
241
docs/docs/integrations/document_loaders/cassandra.ipynb
Normal file
241
docs/docs/integrations/document_loaders/cassandra.ipynb
Normal file
@@ -0,0 +1,241 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "vm8vn9t8DvC_"
|
||||
},
|
||||
"source": [
|
||||
"# Cassandra"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[Cassandra](https://cassandra.apache.org/) is a NoSQL, row-oriented, highly scalable and highly available database.Starting with version 5.0, the database ships with [vector search capabilities](https://cassandra.apache.org/doc/trunk/cassandra/vector-search/overview.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "5WjXERXzFEhg"
|
||||
},
|
||||
"source": [
|
||||
"## Overview"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "juAmbgoWD17u"
|
||||
},
|
||||
"source": [
|
||||
"The Cassandra Document Loader returns a list of Langchain Documents from a Cassandra database.\n",
|
||||
"\n",
|
||||
"You must either provide a CQL query or a table name to retrieve the documents.\n",
|
||||
"The Loader takes the following parameters:\n",
|
||||
"\n",
|
||||
"* table: (Optional) The table to load the data from.\n",
|
||||
"* session: (Optional) The cassandra driver session. If not provided, the cassio resolved session will be used.\n",
|
||||
"* keyspace: (Optional) The keyspace of the table. If not provided, the cassio resolved keyspace will be used.\n",
|
||||
"* query: (Optional) The query used to load the data.\n",
|
||||
"* page_content_mapper: (Optional) a function to convert a row to string page content. The default converts the row to JSON.\n",
|
||||
"* metadata_mapper: (Optional) a function to convert a row to metadata dict.\n",
|
||||
"* query_parameters: (Optional) The query parameters used when calling session.execute .\n",
|
||||
"* query_timeout: (Optional) The query timeout used when calling session.execute .\n",
|
||||
"* query_custom_payload: (Optional) The query custom_payload used when calling `session.execute`.\n",
|
||||
"* query_execution_profile: (Optional) The query execution_profile used when calling `session.execute`.\n",
|
||||
"* query_host: (Optional) The query host used when calling `session.execute`.\n",
|
||||
"* query_execute_as: (Optional) The query execute_as used when calling `session.execute`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load documents with the Document Loader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import CassandraLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Init from a cassandra driver Session\n",
|
||||
"\n",
|
||||
"You need to create a `cassandra.cluster.Session` object, as described in the [Cassandra driver documentation](https://docs.datastax.com/en/developer/python-driver/latest/api/cassandra/cluster/#module-cassandra.cluster). The details vary (e.g. with network settings and authentication), but this might be something like:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from cassandra.cluster import Cluster\n",
|
||||
"\n",
|
||||
"cluster = Cluster()\n",
|
||||
"session = cluster.connect()"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"You need to provide the name of an existing keyspace of the Cassandra instance:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"CASSANDRA_KEYSPACE = input(\"CASSANDRA_KEYSPACE = \")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"execution_count": null
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Creating the document loader:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-19T15:47:25.893037Z",
|
||||
"start_time": "2024-01-19T15:47:25.889398Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = CassandraLoader(\n",
|
||||
" table=\"movie_reviews\",\n",
|
||||
" session=session,\n",
|
||||
" keyspace=CASSANDRA_KEYSPACE,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-19T15:47:26.399472Z",
|
||||
"start_time": "2024-01-19T15:47:26.389145Z"
|
||||
}
|
||||
},
|
||||
"execution_count": 17
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-19T15:47:33.287783Z",
|
||||
"start_time": "2024-01-19T15:47:33.277862Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "Document(page_content='Row(_id=\\'659bdffa16cbc4586b11a423\\', title=\\'Dangerous Men\\', reviewtext=\\'\"Dangerous Men,\" the picture\\\\\\'s production notes inform, took 26 years to reach the big screen. After having seen it, I wonder: What was the rush?\\')', metadata={'table': 'movie_reviews', 'keyspace': 'default_keyspace'})"
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Init from cassio\n",
|
||||
"\n",
|
||||
"It's also possible to use cassio to configure the session and keyspace."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cassio\n",
|
||||
"\n",
|
||||
"cassio.init(contact_points=\"127.0.0.1\", keyspace=CASSANDRA_KEYSPACE)\n",
|
||||
"\n",
|
||||
"loader = CassandraLoader(\n",
|
||||
" table=\"movie_reviews\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"execution_count": null
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"collapsed_sections": [
|
||||
"5WjXERXzFEhg"
|
||||
],
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
BIN
docs/docs/integrations/document_loaders/example_data/fake.vsdx
Normal file
BIN
docs/docs/integrations/document_loaders/example_data/fake.vsdx
Normal file
Binary file not shown.
@@ -8,7 +8,7 @@
|
||||
"This notebook covers how to load documents from `Psychic`. See [here](/docs/integrations/providers/psychic) for more details.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"1. Follow the Quick Start section in [this document](/docs/ecosystem/integrations/psychic)\n",
|
||||
"1. Follow the Quick Start section in [this document](/docs/integrations/providers/psychic)\n",
|
||||
"2. Log into the [Psychic dashboard](https://dashboard.psychic.dev/) and get your secret key\n",
|
||||
"3. Install the frontend react library into your web app and have a user authenticate a connection. The connection will be created using the connection id that you specify."
|
||||
]
|
||||
|
||||
236
docs/docs/integrations/document_loaders/surrealdb.ipynb
Normal file
236
docs/docs/integrations/document_loaders/surrealdb.ipynb
Normal file
@@ -0,0 +1,236 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5812b612-3e77-4be2-aefb-fbb16141ab79",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SurrealDB\n",
|
||||
"\n",
|
||||
">[SurrealDB](https://surrealdb.com/) is an end-to-end cloud-native database designed for modern applications, including web, mobile, serverless, Jamstack, backend, and traditional applications. With SurrealDB, you can simplify your database and API infrastructure, reduce development time, and build secure, performant apps quickly and cost-effectively.\n",
|
||||
">\n",
|
||||
">**Key features of SurrealDB include:**\n",
|
||||
">\n",
|
||||
">* **Reduces development time:** SurrealDB simplifies your database and API stack by removing the need for most server-side components, allowing you to build secure, performant apps faster and cheaper.\n",
|
||||
">* **Real-time collaborative API backend service:** SurrealDB functions as both a database and an API backend service, enabling real-time collaboration.\n",
|
||||
">* **Support for multiple querying languages:** SurrealDB supports SQL querying from client devices, GraphQL, ACID transactions, WebSocket connections, structured and unstructured data, graph querying, full-text indexing, and geospatial querying.\n",
|
||||
">* **Granular access control:** SurrealDB provides row-level permissions-based access control, giving you the ability to manage data access with precision.\n",
|
||||
">\n",
|
||||
">View the [features](https://surrealdb.com/features), the latest [releases](https://surrealdb.com/releases), and [documentation](https://surrealdb.com/docs).\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `SurrealDBLoader`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f56ccec5-24b3-4762-91a6-91385e041fee",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"The SurrealDB Document Loader returns a list of Langchain Documents from a SurrealDB database.\n",
|
||||
"\n",
|
||||
"The Document Loader takes the following optional parameters:\n",
|
||||
"\n",
|
||||
"* `dburl`: connection string to the websocket endpoint. default: `ws://localhost:8000/rpc`\n",
|
||||
"* `ns`: name of the namespace. default: `langchain`\n",
|
||||
"* `db`: name of the database. default: `database`\n",
|
||||
"* `table`: name of the table. default: `documents`\n",
|
||||
"* `db_user`: SurrealDB credentials if needed: db username.\n",
|
||||
"* `db_pass`: SurrealDB credentails if needed: db password.\n",
|
||||
"* `filter_criteria`: dictionary to construct the `WHERE` clause for filtering results from table.\n",
|
||||
"\n",
|
||||
"The output `Document` takes the following shape:\n",
|
||||
"```\n",
|
||||
"Document(\n",
|
||||
" page_content=<json encoded string containing the result document>,\n",
|
||||
" metadata={\n",
|
||||
" 'id': <document id>,\n",
|
||||
" 'ns': <namespace name>,\n",
|
||||
" 'db': <database_name>,\n",
|
||||
" 'table': <table name>,\n",
|
||||
" ... <additional fields from metadata property of the document>\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77b024e0-c804-4b19-9f5e-0099eb61ba79",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"Uncomment the below cells to install surrealdb and langchain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "508bc4f3-3aa2-45d3-8e59-cd7d0ffec379",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# %pip install --upgrade --quiet surrealdb langchain langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "3ee3d767-b9ba-4be4-9e80-8fa6376beaba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# add this import for running in jupyter notebook\n",
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "1ec629f4-b99a-44f1-a938-29de7439f121",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"from langchain_community.document_loaders.surrealdb import SurrealDBLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "8deb90ac-7d4e-422c-a87a-8e6e41390a6d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"42"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = SurrealDBLoader(\n",
|
||||
" dburl=\"ws://localhost:8000/rpc\",\n",
|
||||
" ns=\"langchain\",\n",
|
||||
" db=\"database\",\n",
|
||||
" table=\"documents\",\n",
|
||||
" db_user=\"root\",\n",
|
||||
" db_pass=\"root\",\n",
|
||||
" filter_criteria={},\n",
|
||||
")\n",
|
||||
"docs = loader.load()\n",
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "0aa9d3f7-56b3-464d-9d3d-1df7164122ba",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'id': 'documents:zzz434sa584xl3b4ohvk',\n",
|
||||
" 'source': '../../modules/state_of_the_union.txt',\n",
|
||||
" 'ns': 'langchain',\n",
|
||||
" 'db': 'database',\n",
|
||||
" 'table': 'documents'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"doc = docs[-1]\n",
|
||||
"doc.metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "0378dd34-c690-4b8e-8816-90a8acc2f227",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"18078"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(doc.page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "f30f1141-329b-4674-acb4-36d9d5a9ef0a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page_content = json.loads(doc.page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "2a58496f-a831-40ec-be6b-92ce70f78133",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'When we use taxpayer dollars to rebuild America – we are going to Buy American: buy American products to support American jobs. \\n\\nThe federal government spends about $600 Billion a year to keep the country safe and secure. \\n\\nThere’s been a law on the books for almost a century \\nto make sure taxpayers’ dollars support American jobs and businesses. \\n\\nEvery Administration says they’ll do it, but we are actually doing it. \\n\\nWe will buy American to make sure everything from the deck of an aircraft carrier to the steel on highway guardrails are made in America. \\n\\nBut to compete for the best jobs of the future, we also need to level the playing field with China and other competitors. \\n\\nThat’s why it is so important to pass the Bipartisan Innovation Act sitting in Congress that will make record investments in emerging technologies and American manufacturing. \\n\\nLet me give you one example of why it’s so important to pass it.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page_content[\"text\"]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
486
docs/docs/integrations/document_loaders/vsdx.ipynb
Normal file
486
docs/docs/integrations/document_loaders/vsdx.ipynb
Normal file
@@ -0,0 +1,486 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Vsdx"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"> A [visio file](https://fr.wikipedia.org/wiki/Microsoft_Visio) (with extension .vsdx) is associated with Microsoft Visio, a diagram creation software. It stores information about the structure, layout, and graphical elements of a diagram. This format facilitates the creation and sharing of visualizations in areas such as business, engineering, and computer science."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"A Visio file can contain multiple pages. Some of them may serve as the background for others, and this can occur across multiple layers. This **loader** extracts the textual content from each page and its associated pages, enabling the extraction of all visible text from each page, similar to what an OCR algorithm would do."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**WARNING** : Only Visio files with the **.vsdx** extension are compatible with this loader. Files with extensions such as .vsd, ... are not compatible because they cannot be converted to compressed XML."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import VsdxLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = VsdxLoader(file_path=\"./example_data/fake.vsdx\")\n",
|
||||
"documents = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Display loaded documents**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"------ Page 0 ------\n",
|
||||
"Title page : Summary\n",
|
||||
"Source : ./example_data/fake.vsdx\n",
|
||||
"\n",
|
||||
"==> CONTENT <== \n",
|
||||
"Created by\n",
|
||||
"Created the\n",
|
||||
"Modified by\n",
|
||||
"Modified the\n",
|
||||
"Version\n",
|
||||
"Title\n",
|
||||
"Florian MOREL\n",
|
||||
"2024-01-14\n",
|
||||
"FLORIAN Morel\n",
|
||||
"Today\n",
|
||||
"0.0.0.0.0.1\n",
|
||||
"This is a title\n",
|
||||
"Best Caption of the worl\n",
|
||||
"This is an arrow\n",
|
||||
"This is Earth\n",
|
||||
"This is a bounded arrow\n",
|
||||
"\n",
|
||||
"------ Page 1 ------\n",
|
||||
"Title page : Glossary\n",
|
||||
"Source : ./example_data/fake.vsdx\n",
|
||||
"\n",
|
||||
"==> CONTENT <== \n",
|
||||
"Created by\n",
|
||||
"Created the\n",
|
||||
"Modified by\n",
|
||||
"Modified the\n",
|
||||
"Version\n",
|
||||
"Title\n",
|
||||
"Florian MOREL\n",
|
||||
"2024-01-14\n",
|
||||
"FLORIAN Morel\n",
|
||||
"Today\n",
|
||||
"0.0.0.0.0.1\n",
|
||||
"This is a title\n",
|
||||
"\n",
|
||||
"------ Page 2 ------\n",
|
||||
"Title page : blanket page\n",
|
||||
"Source : ./example_data/fake.vsdx\n",
|
||||
"\n",
|
||||
"==> CONTENT <== \n",
|
||||
"Created by\n",
|
||||
"Created the\n",
|
||||
"Modified by\n",
|
||||
"Modified the\n",
|
||||
"Version\n",
|
||||
"Title\n",
|
||||
"Florian MOREL\n",
|
||||
"2024-01-14\n",
|
||||
"FLORIAN Morel\n",
|
||||
"Today\n",
|
||||
"0.0.0.0.0.1\n",
|
||||
"This is a title\n",
|
||||
"This file is a vsdx file\n",
|
||||
"First text\n",
|
||||
"Second text\n",
|
||||
"Third text\n",
|
||||
"\n",
|
||||
"------ Page 3 ------\n",
|
||||
"Title page : BLABLABLA\n",
|
||||
"Source : ./example_data/fake.vsdx\n",
|
||||
"\n",
|
||||
"==> CONTENT <== \n",
|
||||
"Created by\n",
|
||||
"Created the\n",
|
||||
"Modified by\n",
|
||||
"Modified the\n",
|
||||
"Version\n",
|
||||
"Title\n",
|
||||
"Florian MOREL\n",
|
||||
"2024-01-14\n",
|
||||
"FLORIAN Morel\n",
|
||||
"Today\n",
|
||||
"0.0.0.0.0.1\n",
|
||||
"This is a title\n",
|
||||
"Another RED arrow wow\n",
|
||||
"Arrow with point but red\n",
|
||||
"Green line\n",
|
||||
"User\n",
|
||||
"Captions\n",
|
||||
"Red arrow magic !\n",
|
||||
"Something white\n",
|
||||
"Something Red\n",
|
||||
"This a a completly useless diagramm, cool !!\n",
|
||||
"\n",
|
||||
"But this is for example !\n",
|
||||
"This diagramm is a base of many pages in this file. But it is editable in file \\\"BG WITH CONTENT\\\"\n",
|
||||
"This is a page with something...\n",
|
||||
"\n",
|
||||
"WAW I have learned something !\n",
|
||||
"This is a page with something...\n",
|
||||
"\n",
|
||||
"WAW I have learned something !\n",
|
||||
"\n",
|
||||
"X2\n",
|
||||
"\n",
|
||||
"------ Page 4 ------\n",
|
||||
"Title page : What a page !!\n",
|
||||
"Source : ./example_data/fake.vsdx\n",
|
||||
"\n",
|
||||
"==> CONTENT <== \n",
|
||||
"Created by\n",
|
||||
"Created the\n",
|
||||
"Modified by\n",
|
||||
"Modified the\n",
|
||||
"Version\n",
|
||||
"Title\n",
|
||||
"Florian MOREL\n",
|
||||
"2024-01-14\n",
|
||||
"FLORIAN Morel\n",
|
||||
"Today\n",
|
||||
"0.0.0.0.0.1\n",
|
||||
"This is a title\n",
|
||||
"Something white\n",
|
||||
"Something Red\n",
|
||||
"This a a completly useless diagramm, cool !!\n",
|
||||
"\n",
|
||||
"But this is for example !\n",
|
||||
"This diagramm is a base of many pages in this file. But it is editable in file \\\"BG WITH CONTENT\\\"\n",
|
||||
"Another RED arrow wow\n",
|
||||
"Arrow with point but red\n",
|
||||
"Green line\n",
|
||||
"User\n",
|
||||
"Captions\n",
|
||||
"Red arrow magic !\n",
|
||||
"\n",
|
||||
"------ Page 5 ------\n",
|
||||
"Title page : next page after previous one\n",
|
||||
"Source : ./example_data/fake.vsdx\n",
|
||||
"\n",
|
||||
"==> CONTENT <== \n",
|
||||
"Created by\n",
|
||||
"Created the\n",
|
||||
"Modified by\n",
|
||||
"Modified the\n",
|
||||
"Version\n",
|
||||
"Title\n",
|
||||
"Florian MOREL\n",
|
||||
"2024-01-14\n",
|
||||
"FLORIAN Morel\n",
|
||||
"Today\n",
|
||||
"0.0.0.0.0.1\n",
|
||||
"This is a title\n",
|
||||
"Another RED arrow wow\n",
|
||||
"Arrow with point but red\n",
|
||||
"Green line\n",
|
||||
"User\n",
|
||||
"Captions\n",
|
||||
"Red arrow magic !\n",
|
||||
"Something white\n",
|
||||
"Something Red\n",
|
||||
"This a a completly useless diagramm, cool !!\n",
|
||||
"\n",
|
||||
"But this is for example !\n",
|
||||
"This diagramm is a base of many pages in this file. But it is editable in file \\\"BG WITH CONTENT\\\"\n",
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor\n",
|
||||
"\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0\\u00a0-\\u00a0incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa\n",
|
||||
"*\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"qui officia deserunt mollit anim id est laborum.\n",
|
||||
"\n",
|
||||
"------ Page 6 ------\n",
|
||||
"Title page : Connector Page\n",
|
||||
"Source : ./example_data/fake.vsdx\n",
|
||||
"\n",
|
||||
"==> CONTENT <== \n",
|
||||
"Created by\n",
|
||||
"Created the\n",
|
||||
"Modified by\n",
|
||||
"Modified the\n",
|
||||
"Version\n",
|
||||
"Title\n",
|
||||
"Florian MOREL\n",
|
||||
"2024-01-14\n",
|
||||
"FLORIAN Morel\n",
|
||||
"Today\n",
|
||||
"0.0.0.0.0.1\n",
|
||||
"This is a title\n",
|
||||
"Something white\n",
|
||||
"Something Red\n",
|
||||
"This a a completly useless diagramm, cool !!\n",
|
||||
"\n",
|
||||
"But this is for example !\n",
|
||||
"This diagramm is a base of many pages in this file. But it is editable in file \\\"BG WITH CONTENT\\\"\n",
|
||||
"\n",
|
||||
"------ Page 7 ------\n",
|
||||
"Title page : Useful ↔ Useless page\n",
|
||||
"Source : ./example_data/fake.vsdx\n",
|
||||
"\n",
|
||||
"==> CONTENT <== \n",
|
||||
"Created by\n",
|
||||
"Created the\n",
|
||||
"Modified by\n",
|
||||
"Modified the\n",
|
||||
"Version\n",
|
||||
"Title\n",
|
||||
"Florian MOREL\n",
|
||||
"2024-01-14\n",
|
||||
"FLORIAN Morel\n",
|
||||
"Today\n",
|
||||
"0.0.0.0.0.1\n",
|
||||
"This is a title\n",
|
||||
"Something white\n",
|
||||
"Something Red\n",
|
||||
"This a a completly useless diagramm, cool !!\n",
|
||||
"\n",
|
||||
"But this is for example !\n",
|
||||
"This diagramm is a base of many pages in this file. But it is editable in file \\\"BG WITH CONTENT\\\"\n",
|
||||
"Title of this document : BLABLABLA\n",
|
||||
"\n",
|
||||
"------ Page 8 ------\n",
|
||||
"Title page : Alone page\n",
|
||||
"Source : ./example_data/fake.vsdx\n",
|
||||
"\n",
|
||||
"==> CONTENT <== \n",
|
||||
"Black cloud\n",
|
||||
"Unidirectional traffic primary path\n",
|
||||
"Unidirectional traffic backup path\n",
|
||||
"Encapsulation\n",
|
||||
"User\n",
|
||||
"Captions\n",
|
||||
"Bidirectional traffic\n",
|
||||
"Alone, sad\n",
|
||||
"Test of another page\n",
|
||||
"This is a \\\"bannier\\\"\n",
|
||||
"Tests of some exotics characters :\\u00a0\\u00e3\\u00e4\\u00e5\\u0101\\u0103 \\u00fc\\u2554\\u00a0 \\u00a0\\u00bc \\u00c7 \\u25d8\\u25cb\\u2642\\u266b\\u2640\\u00ee\\u2665\n",
|
||||
"This is ethernet\n",
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\n",
|
||||
"This is an empty case\n",
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\n",
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor\n",
|
||||
"\\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0 \\u00a0-\\u00a0 incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa \n",
|
||||
"*\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"qui officia deserunt mollit anim id est laborum.\n",
|
||||
"\n",
|
||||
"------ Page 9 ------\n",
|
||||
"Title page : BG\n",
|
||||
"Source : ./example_data/fake.vsdx\n",
|
||||
"\n",
|
||||
"==> CONTENT <== \n",
|
||||
"Best Caption of the worl\n",
|
||||
"This is an arrow\n",
|
||||
"This is Earth\n",
|
||||
"This is a bounded arrow\n",
|
||||
"Created by\n",
|
||||
"Created the\n",
|
||||
"Modified by\n",
|
||||
"Modified the\n",
|
||||
"Version\n",
|
||||
"Title\n",
|
||||
"Florian MOREL\n",
|
||||
"2024-01-14\n",
|
||||
"FLORIAN Morel\n",
|
||||
"Today\n",
|
||||
"0.0.0.0.0.1\n",
|
||||
"This is a title\n",
|
||||
"\n",
|
||||
"------ Page 10 ------\n",
|
||||
"Title page : BG + caption1\n",
|
||||
"Source : ./example_data/fake.vsdx\n",
|
||||
"\n",
|
||||
"==> CONTENT <== \n",
|
||||
"Created by\n",
|
||||
"Created the\n",
|
||||
"Modified by\n",
|
||||
"Modified the\n",
|
||||
"Version\n",
|
||||
"Title\n",
|
||||
"Florian MOREL\n",
|
||||
"2024-01-14\n",
|
||||
"FLORIAN Morel\n",
|
||||
"Today\n",
|
||||
"0.0.0.0.0.1\n",
|
||||
"This is a title\n",
|
||||
"Another RED arrow wow\n",
|
||||
"Arrow with point but red\n",
|
||||
"Green line\n",
|
||||
"User\n",
|
||||
"Captions\n",
|
||||
"Red arrow magic !\n",
|
||||
"Something white\n",
|
||||
"Something Red\n",
|
||||
"This a a completly useless diagramm, cool !!\n",
|
||||
"\n",
|
||||
"But this is for example !\n",
|
||||
"This diagramm is a base of many pages in this file. But it is editable in file \\\"BG WITH CONTENT\\\"\n",
|
||||
"Useful\\u2194 Useless page\\u00a0\n",
|
||||
"\n",
|
||||
"Tests of some exotics characters :\\u00a0\\u00e3\\u00e4\\u00e5\\u0101\\u0103 \\u00fc\\u2554\\u00a0\\u00a0\\u00bc \\u00c7 \\u25d8\\u25cb\\u2642\\u266b\\u2640\\u00ee\\u2665\n",
|
||||
"\n",
|
||||
"------ Page 11 ------\n",
|
||||
"Title page : BG+\n",
|
||||
"Source : ./example_data/fake.vsdx\n",
|
||||
"\n",
|
||||
"==> CONTENT <== \n",
|
||||
"Created by\n",
|
||||
"Created the\n",
|
||||
"Modified by\n",
|
||||
"Modified the\n",
|
||||
"Version\n",
|
||||
"Title\n",
|
||||
"Florian MOREL\n",
|
||||
"2024-01-14\n",
|
||||
"FLORIAN Morel\n",
|
||||
"Today\n",
|
||||
"0.0.0.0.0.1\n",
|
||||
"This is a title\n",
|
||||
"\n",
|
||||
"------ Page 12 ------\n",
|
||||
"Title page : BG WITH CONTENT\n",
|
||||
"Source : ./example_data/fake.vsdx\n",
|
||||
"\n",
|
||||
"==> CONTENT <== \n",
|
||||
"Created by\n",
|
||||
"Created the\n",
|
||||
"Modified by\n",
|
||||
"Modified the\n",
|
||||
"Version\n",
|
||||
"Title\n",
|
||||
"Florian MOREL\n",
|
||||
"2024-01-14\n",
|
||||
"FLORIAN Morel\n",
|
||||
"Today\n",
|
||||
"0.0.0.0.0.1\n",
|
||||
"This is a title\n",
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\n",
|
||||
"\n",
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. - Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.\n",
|
||||
"This is a page with a lot of text\n",
|
||||
"\n",
|
||||
"------ Page 13 ------\n",
|
||||
"Title page : 2nd caption with ____________________________________________________________________ content\n",
|
||||
"Source : ./example_data/fake.vsdx\n",
|
||||
"\n",
|
||||
"==> CONTENT <== \n",
|
||||
"Created by\n",
|
||||
"Created the\n",
|
||||
"Modified by\n",
|
||||
"Modified the\n",
|
||||
"Version\n",
|
||||
"Title\n",
|
||||
"Florian MOREL\n",
|
||||
"2024-01-14\n",
|
||||
"FLORIAN Morel\n",
|
||||
"Today\n",
|
||||
"0.0.0.0.0.1\n",
|
||||
"This is a title\n",
|
||||
"Another RED arrow wow\n",
|
||||
"Arrow with point but red\n",
|
||||
"Green line\n",
|
||||
"User\n",
|
||||
"Captions\n",
|
||||
"Red arrow magic !\n",
|
||||
"Something white\n",
|
||||
"Something Red\n",
|
||||
"This a a completly useless diagramm, cool !!\n",
|
||||
"\n",
|
||||
"But this is for example !\n",
|
||||
"This diagramm is a base of many pages in this file. But it is editable in file \\\"BG WITH CONTENT\\\"\n",
|
||||
"Only connectors on this page. This is the CoNNeCtor page\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i, doc in enumerate(documents):\n",
|
||||
" print(f\"\\n------ Page {doc.metadata['page']} ------\")\n",
|
||||
" print(f\"Title page : {doc.metadata['page_name']}\")\n",
|
||||
" print(f\"Source : {doc.metadata['source']}\")\n",
|
||||
" print(\"\\n==> CONTENT <== \")\n",
|
||||
" print(doc.page_content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -14,12 +14,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "02be122d-04e8-4ec6-84d1-f1d8961d6828",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[33mWARNING: There was an error checking the latest version of pip.\u001b[0m\u001b[33m\n",
|
||||
"\u001b[0mNote: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# install the package:\n",
|
||||
"%pip install --upgrade --quiet ai21"
|
||||
@@ -27,20 +36,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 1,
|
||||
"id": "4229227e-6ca2-41ad-a3c3-5f29e3559091",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get AI21_API_KEY. Use https://studio.ai21.com/account/account\n",
|
||||
"\n",
|
||||
@@ -51,21 +52,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"id": "6fb585dd",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain_community.llms import AI21"
|
||||
"from langchain_community.llms import AI21\n",
|
||||
"from langchain_core.prompts import PromptTemplate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 12,
|
||||
"id": "035dea0f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -76,12 +76,12 @@
|
||||
"\n",
|
||||
"Answer: Let's think step by step.\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
|
||||
"prompt = PromptTemplate.from_template(template)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 9,
|
||||
"id": "3f3458d9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -93,19 +93,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 10,
|
||||
"id": "a641dbd9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)"
|
||||
"llm_chain = prompt | llm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 13,
|
||||
"id": "9f0b1960",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -114,10 +114,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n1. What year was Justin Bieber born?\\nJustin Bieber was born in 1994.\\n2. What team won the Super Bowl in 1994?\\nThe Dallas Cowboys won the Super Bowl in 1994.'"
|
||||
"'\\nThe Super Bowl in the year Justin Beiber was born was in the year 1991.\\nThe Super Bowl in 1991 was won by the Washington Redskins.\\nFinal answer: Washington Redskins'"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -125,7 +125,7 @@
|
||||
"source": [
|
||||
"question = \"What NFL team won the Super Bowl in the year Justin Beiber was born?\"\n",
|
||||
"\n",
|
||||
"llm_chain.run(question)"
|
||||
"llm_chain.invoke({\"question\": question})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -153,7 +153,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -11,29 +11,30 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "xazoWTniN8Xa"
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Cloud Vertex AI\n",
|
||||
"\n",
|
||||
"**Note:** This is separate from the `Google Generative AI` integration, it exposes [Vertex AI Generative API](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/overview) on `Google Cloud`.\n"
|
||||
"**Note:** This is separate from the `Google Generative AI` integration, it exposes [Vertex AI Generative API](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/overview) on `Google Cloud`.\n",
|
||||
"\n",
|
||||
"VertexAI exposes all foundational models available in google cloud:\n",
|
||||
"- Gemini (`gemini-pro` and `gemini-pro-vision`)\n",
|
||||
"- Palm 2 for Text (`text-bison`)\n",
|
||||
"- Codey for Code Generation (`code-bison`)\n",
|
||||
"\n",
|
||||
"For a full and updated list of available models visit [VertexAI documentation](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/overview)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Q_UoF2FKN8Xb"
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting up"
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "8uImJzc4N8Xb"
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"By default, Google Cloud [does not use](https://cloud.google.com/vertex-ai/docs/generative-ai/data-governance#foundation_model_development) customer data to train its foundation models as part of Google Cloud's AI/ML Privacy Commitment. More details about how Google processes data can also be found in [Google's Customer Data Processing Addendum (CDPA)](https://cloud.google.com/terms/data-processing-addendum).\n",
|
||||
"\n",
|
||||
@@ -52,78 +53,29 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-core langchain-google-vertexai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" **Pros of Python:**\n",
|
||||
"\n",
|
||||
"* **Easy to learn and use:** Python is known for its simple syntax and readability, making it a great choice for beginners. It also has a large and supportive community, with many resources available online.\n",
|
||||
"* **Versatile:** Python can be used for a wide variety of tasks, including web development, data science, machine learning, and artificial intelligence.\n",
|
||||
"* **Powerful:** Python has a rich library of built-in functions and modules, making it easy to perform complex tasks without having to write a lot of code.\n",
|
||||
"* **Cross-platform:** Python can be run on a variety of operating systems\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_google_vertexai import VertexAI\n",
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"llm = VertexAI()\n",
|
||||
"print(llm(\"What are some of the pros and cons of Python as a programming language?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "38S1FS3qN8Xc"
|
||||
},
|
||||
"source": [
|
||||
"You can also use Gemini model (in preview) with VertexAI:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"**Pros of Python:**\n",
|
||||
"\n",
|
||||
"* **Easy to learn and use:** Python is known for its simplicity and readability, making it a great choice for beginners and experienced programmers alike. Its syntax is straightforward and intuitive, allowing developers to quickly pick up the language and start writing code.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"* **Versatile:** Python is a general-purpose language that can be used for a wide range of applications, including web development, data science, machine learning, and scripting. Its extensive standard library and vast ecosystem of third-party modules make it suitable for a variety of tasks.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"* **Cross-platform:** Python is compatible with multiple operating systems, including\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = VertexAI(model_name=\"gemini-pro\")\n",
|
||||
"print(llm(\"What are some of the pros and cons of Python as a programming language?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "_-9MhhN8N8Xc"
|
||||
},
|
||||
"source": [
|
||||
"## Using in a chain"
|
||||
"VertexAI supports all [LLM](/docs/modules/model_io/llms/) functionality."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -131,204 +83,199 @@
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_vertexai import VertexAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = VertexAI(model_name=\"gemini-pro\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'**Pros:**\\n\\n* **Easy to learn and use:** Python is known for its simple syntax and readability, making it a great choice for beginners and experienced programmers alike.\\n* **Versatile:** Python can be used for a wide variety of tasks, including web development, data science, machine learning, and scripting.\\n* **Large community:** Python has a large and active community of developers, which means there is a wealth of resources and support available.\\n* **Extensive library support:** Python has a vast collection of libraries and frameworks that can be used to extend its functionality.\\n* **Cross-platform:** Python is available for a'"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"message = \"What are some of the pros and cons of Python as a programming language?\"\n",
|
||||
"model.invoke(message)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'**Pros:**\\n\\n* **Easy to learn and use:** Python is known for its simple syntax and readability, making it a great choice for beginners and experienced programmers alike.\\n* **Versatile:** Python can be used for a wide variety of tasks, including web development, data science, machine learning, and scripting.\\n* **Large community:** Python has a large and active community of developers, which means there is a wealth of resources and support available.\\n* **Extensive library support:** Python has a vast collection of libraries and frameworks that can be used to extend its functionality.\\n* **Cross-platform:** Python is available for a'"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"await model.ainvoke(message)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"**Pros:**\n",
|
||||
"\n",
|
||||
"* **Easy to learn and use:** Python is known for its simple syntax and readability, making it a great choice for beginners and experienced programmers alike.\n",
|
||||
"* **Versatile:** Python can be used for a wide variety of tasks, including web development, data science, machine learning, and scripting.\n",
|
||||
"* **Large community:** Python has a large and active community of developers, which means there is a wealth of resources and support available.\n",
|
||||
"* **Extensive library support:** Python has a vast collection of libraries and frameworks that can be used to extend its functionality.\n",
|
||||
"* **Cross-platform:** Python is available for a"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for chunk in model.stream(message):\n",
|
||||
" print(chunk, end=\"\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['**Pros:**\\n\\n* **Easy to learn and use:** Python is known for its simple syntax and readability, making it a great choice for beginners and experienced programmers alike.\\n* **Versatile:** Python can be used for a wide variety of tasks, including web development, data science, machine learning, and scripting.\\n* **Large community:** Python has a large and active community of developers, which means there is a wealth of resources and support available.\\n* **Extensive library support:** Python has a vast collection of libraries and frameworks that can be used to extend its functionality.\\n* **Cross-platform:** Python is available for a']"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model.batch([message])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can use the `generate` method to get back extra metadata like [safety attributes](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/responsible-ai#safety_attribute_confidence_scoring) and not just text completions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[[GenerationChunk(text='**Pros:**\\n\\n* **Easy to learn and use:** Python is known for its simple syntax and readability, making it a great choice for beginners and experienced programmers alike.\\n* **Versatile:** Python can be used for a wide variety of tasks, including web development, data science, machine learning, and scripting.\\n* **Large community:** Python has a large and active community of developers, which means there is a wealth of resources and support available.\\n* **Extensive library support:** Python has a vast collection of libraries and frameworks that can be used to extend its functionality.\\n* **Cross-platform:** Python is available for a')]]"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = model.generate([message])\n",
|
||||
"result.generations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[[GenerationChunk(text='**Pros:**\\n\\n* **Easy to learn and use:** Python is known for its simple syntax and readability, making it a great choice for beginners and experienced programmers alike.\\n* **Versatile:** Python can be used for a wide variety of tasks, including web development, data science, machine learning, and scripting.\\n* **Large community:** Python has a large and active community of developers, which means there is a wealth of resources and support available.\\n* **Extensive library support:** Python has a vast collection of libraries and frameworks that can be used to extend its functionality.\\n* **Cross-platform:** Python is available for a')]]"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = await model.agenerate([message])\n",
|
||||
"result.generations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also easily combine with a prompt template for easy structuring of user input. We can do this using [LCEL](/docs/expression_language)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1. You start with 5 apples.\n",
|
||||
"2. You throw away 2 apples, so you have 5 - 2 = 3 apples left.\n",
|
||||
"3. You eat 1 apple, so you have 3 - 1 = 2 apples left.\n",
|
||||
"\n",
|
||||
"Therefore, you have 2 apples left.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"template = \"\"\"Question: {question}\n",
|
||||
"\n",
|
||||
"Answer: Let's think step by step.\"\"\"\n",
|
||||
"prompt = PromptTemplate.from_template(template)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = prompt | llm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Justin Bieber was born on March 1, 1994. Bill Clinton was the president of the United States from January 20, 1993, to January 20, 2001.\n",
|
||||
"The final answer is Bill Clinton\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"question = \"Who was the president in the year Justin Beiber was born?\"\n",
|
||||
"prompt = PromptTemplate.from_template(template)\n",
|
||||
"\n",
|
||||
"chain = prompt | model\n",
|
||||
"\n",
|
||||
"question = \"\"\"\n",
|
||||
"I have five apples. I throw two away. I eat one. How many apples do I have left?\n",
|
||||
"\"\"\"\n",
|
||||
"print(chain.invoke({\"question\": question}))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "AV7oXXuHN8Xd"
|
||||
},
|
||||
"source": [
|
||||
"## Code generation example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "3ZzVtF6tN8Xd"
|
||||
},
|
||||
"source": [
|
||||
"You can now leverage the `Codey API` for code generation within `Vertex AI`.\n",
|
||||
"\n",
|
||||
"The model names are:\n",
|
||||
"- `code-bison`: for code suggestion\n",
|
||||
"- `code-gecko`: for code completion"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = VertexAI(model_name=\"code-bison\", max_output_tokens=1000, temperature=0.3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"question = \"Write a python function that checks if a string is a valid email address\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"```python\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"def is_valid_email(email):\n",
|
||||
" pattern = re.compile(r\"[^@]+@[^@]+\\.[^@]+\")\n",
|
||||
" return pattern.match(email)\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(llm(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "0WqyaSC2N8Xd"
|
||||
},
|
||||
"source": [
|
||||
"## Full generation info\n",
|
||||
"\n",
|
||||
"We can use the `generate` method to get back extra metadata like [safety attributes](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/responsible-ai#safety_attribute_confidence_scoring) and not just text completions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[[GenerationChunk(text='```python\\nimport re\\n\\ndef is_valid_email(email):\\n pattern = re.compile(r\"[^@]+@[^@]+\\\\.[^@]+\")\\n return pattern.match(email)\\n```', generation_info={'is_blocked': False, 'safety_attributes': {'Health': 0.1}})]]"
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = llm.generate([question])\n",
|
||||
"result.generations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Wd5M4BBUN8Xd"
|
||||
},
|
||||
"source": [
|
||||
"## Asynchronous calls\n",
|
||||
"\n",
|
||||
"With `agenerate` we can make asynchronous calls"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# If running in a Jupyter notebook you'll need to install nest_asyncio\n",
|
||||
"\n",
|
||||
"%pip install --upgrade --quiet nest_asyncio\n",
|
||||
"\n",
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"LLMResult(generations=[[GenerationChunk(text='```python\\nimport re\\n\\ndef is_valid_email(email):\\n pattern = re.compile(r\"[^@]+@[^@]+\\\\.[^@]+\")\\n return pattern.match(email)\\n```', generation_info={'is_blocked': False, 'safety_attributes': {'Health': 0.1}})]], llm_output=None, run=[RunInfo(run_id=UUID('caf74e91-aefb-48ac-8031-0c505fcbbcc6'))])"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"asyncio.run(llm.agenerate([question]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "VLsy_4bZN8Xd"
|
||||
},
|
||||
"source": [
|
||||
"## Streaming calls\n",
|
||||
"\n",
|
||||
"With `stream` we can stream results from the model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys"
|
||||
"You can use different foundational models for specialized in different tasks. \n",
|
||||
"For an updated list of available models visit [VertexAI documentation](https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/overview)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -354,49 +301,38 @@
|
||||
" True if the string is a valid email address, False otherwise.\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" # Check for a valid email address format.\n",
|
||||
" if not re.match(r\"^[A-Za-z0-9\\.\\+_-]+@[A-Za-z0-9\\._-]+\\.[a-zA-Z]*$\", email):\n",
|
||||
" return False\n",
|
||||
" # Compile the regular expression for an email address.\n",
|
||||
" regex = re.compile(r\"[^@]+@[^@]+\\.[^@]+\")\n",
|
||||
"\n",
|
||||
" # Check if the domain name exists.\n",
|
||||
" try:\n",
|
||||
" domain = email.split(\"@\")[1]\n",
|
||||
" socket.gethostbyname(domain)\n",
|
||||
" except socket.gaierror:\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
" return True\n",
|
||||
"```"
|
||||
" # Check if the string matches the regular expression.\n",
|
||||
" return regex.match(email) is not None\n",
|
||||
"```\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for chunk in llm.stream(question):\n",
|
||||
" sys.stdout.write(chunk)\n",
|
||||
" sys.stdout.flush()"
|
||||
"llm = VertexAI(model_name=\"code-bison\", max_output_tokens=1000, temperature=0.3)\n",
|
||||
"question = \"Write a python function that checks if a string is a valid email address\"\n",
|
||||
"print(model.invoke(question))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "4VJ8GwhaN8Xd"
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Multimodality"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "L7BovARaN8Xe"
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"With Gemini, you can use LLM in a multimodal mode:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -429,16 +365,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "3Vk3gQrrOaL9"
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's double-check it's a cat :)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -448,7 +382,7 @@
|
||||
"<vertexai.generative_models._generative_models.Image at 0x791ded5f1ed0>"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -462,16 +396,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "1uEACSSm8AL2"
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also pass images as bytes:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -506,18 +438,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "AuhF5WQuN8Xe"
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Please, note that you can also use the image stored in GCS (just point the `url` to the full GCS path, starting with `gs://` instead of a local one)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "qaC2UmxS9WtB"
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And you can also pass a history of a previous chat to the LLM:"
|
||||
]
|
||||
@@ -564,18 +492,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "VEYAfdBpN8Xe"
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Vertex Model Garden"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "N3ptjr_LN8Xe"
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Vertex Model Garden [exposes](https://cloud.google.com/vertex-ai/docs/start/explore-models) open-sourced models that can be deployed and served on Vertex AI. If you have successfully deployed a model from Vertex Model Garden, you can find a corresponding Vertex AI [endpoint](https://cloud.google.com/vertex-ai/docs/general/deployment#what_happens_when_you_deploy_a_model) in the console or via API."
|
||||
]
|
||||
@@ -604,14 +528,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(llm(\"What is the meaning of life?\"))"
|
||||
"llm.invoke(\"What is the meaning of life?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "TDXoFZ6YN8Xe"
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Like all LLMs, we can then compose it with other components:"
|
||||
]
|
||||
@@ -643,8 +565,16 @@
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"version": "3.11.4"
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -59,7 +59,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Optional: Validate your Enviroment variables ```GRADIENT_ACCESS_TOKEN``` and ```GRADIENT_WORKSPACE_ID``` to get currently deployed models. Using the `gradientai` Python package."
|
||||
"Optional: Validate your Environment variables ```GRADIENT_ACCESS_TOKEN``` and ```GRADIENT_WORKSPACE_ID``` to get currently deployed models. Using the `gradientai` Python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -316,7 +316,7 @@
|
||||
"prompt = \"\"\"\n",
|
||||
"Question: A rap battle between Stephen Colbert and John Oliver\n",
|
||||
"\"\"\"\n",
|
||||
"llm(prompt)"
|
||||
"llm.invoke(prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -618,7 +618,7 @@
|
||||
],
|
||||
"source": [
|
||||
"%%capture captured --no-stdout\n",
|
||||
"result = llm(\"Describe a person in JSON format:\")"
|
||||
"result = llm.invoke(\"Describe a person in JSON format:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -674,7 +674,7 @@
|
||||
],
|
||||
"source": [
|
||||
"%%capture captured --no-stdout\n",
|
||||
"result = llm(\"List of top-3 my favourite books:\")"
|
||||
"result = llm.invoke(\"List of top-3 my favourite books:\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -318,7 +318,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Standard Cache\n",
|
||||
"Use [Redis](/docs/integrations/partners/redis) to cache prompts and responses."
|
||||
"Use [Redis](/docs/integrations/providers/redis) to cache prompts and responses."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -404,7 +404,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Semantic Cache\n",
|
||||
"Use [Redis](/docs/integrations/partners/redis) to cache prompts and responses and evaluate hits based on semantic similarity."
|
||||
"Use [Redis](/docs/integrations/providers/redis) to cache prompts and responses and evaluate hits based on semantic similarity."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -728,7 +728,7 @@
|
||||
},
|
||||
"source": [
|
||||
"## `Momento` Cache\n",
|
||||
"Use [Momento](/docs/integrations/partners/momento) to cache prompts and responses.\n",
|
||||
"Use [Momento](/docs/integrations/providers/momento) to cache prompts and responses.\n",
|
||||
"\n",
|
||||
"Requires momento to use, uncomment below to install:"
|
||||
]
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -176,7 +176,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"id": "c7d80c05",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -197,17 +197,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"id": "dc076c56",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'How many breeds of dog are there?'"
|
||||
"{'topic': 'dog',\n",
|
||||
" 'text': 'What is the name of the dog that is the most popular in the world?'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -216,7 +217,7 @@
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"\n",
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=watsonx_llm)\n",
|
||||
"llm_chain.run(\"dog\")"
|
||||
"llm_chain.invoke(\"dog\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -248,7 +249,7 @@
|
||||
"source": [
|
||||
"# Calling a single prompt\n",
|
||||
"\n",
|
||||
"watsonx_llm(\"Who is man's best friend?\")"
|
||||
"watsonx_llm.invoke(\"Who is man's best friend?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -327,7 +328,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.18"
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -0,0 +1,147 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "91c6a7ef",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Cloud Firestore\n",
|
||||
"\n",
|
||||
"> [`Cloud Firestore`](https://cloud.google.com/firestore) is a NoSQL document database built for automatic scaling, high performance, and ease of application development.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use Firestore to store chat message history."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2d6ed3c8-b70a-498c-bc9e-41b91797d3b7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting up"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b8eca282",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To run this notebook, you will need a Google Cloud Project, a Firestore database instance in Native Mode, and Google credentials, see [Firestore Quickstarts](https://cloud.google.com/firestore/docs/quickstarts)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5a7f3b3f-d9b8-4577-a7ef-bdd8ecaedb70",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install firebase-admin"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a8e63850-3e14-46fe-a59e-be6d6bf8fe61",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d15e3302",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_message_histories.firestore import (\n",
|
||||
" FirestoreChatMessageHistory,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"message_history = FirestoreChatMessageHistory(\n",
|
||||
" collection_name=\"langchain-chat-history\",\n",
|
||||
" session_id=\"user-session-id\",\n",
|
||||
" user_id=\"user-id\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"message_history.add_user_message(\"hi!\")\n",
|
||||
"message_history.add_ai_message(\"whats up?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "64fc465e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[HumanMessage(content='hi!'),\n",
|
||||
" HumanMessage(content='hi!'),\n",
|
||||
" AIMessage(content='whats up?')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"message_history.messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4be8576e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Custom Firestore Client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "12999273",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import firebase_admin\n",
|
||||
"from firebase_admin import credentials, firestore\n",
|
||||
"\n",
|
||||
"# Use a service account.\n",
|
||||
"cred = credentials.Certificate(\"path/to/serviceAccount.json\")\n",
|
||||
"\n",
|
||||
"app = firebase_admin.initialize_app(cred)\n",
|
||||
"client = firestore.client(app=app)\n",
|
||||
"\n",
|
||||
"message_history = FirestoreChatMessageHistory(\n",
|
||||
" collection_name=\"langchain-chat-history\",\n",
|
||||
" session_id=\"user-session-id\",\n",
|
||||
" user_id=\"user-id\",\n",
|
||||
" firestore_client=client,\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -11,7 +11,7 @@
|
||||
">\n",
|
||||
">`MongoDB` is developed by MongoDB Inc. and licensed under the Server Side Public License (SSPL). - [Wikipedia](https://en.wikipedia.org/wiki/MongoDB)\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use Mongodb to store chat message history.\n"
|
||||
"This notebook goes over how to use the `MongoDBChatMessageHistory` class to store chat message history in a Mongodb database.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -19,76 +19,230 @@
|
||||
"id": "2d6ed3c8-b70a-498c-bc9e-41b91797d3b7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting up"
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"The integration lives in the `langchain-community` package, so we need to install that. We also need to install the `pymongo` package.\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install -U --quiet langchain-community pymongo\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09c33ad3-9ab1-48b5-bead-9a44f3d86eeb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5a7f3b3f-d9b8-4577-a7ef-bdd8ecaedb70",
|
||||
"id": "0976204d-c681-4288-bfe5-a550e0340f35",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet pymongo"
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "71a0a5aa-8f12-462a-bcd0-c611d76566f8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"To use the storage you need to provide only 2 things:\n",
|
||||
"\n",
|
||||
"1. Session Id - a unique identifier of the session, like user name, email, chat id etc.\n",
|
||||
"2. Connection string - a string that specifies the database connection. It will be passed to MongoDB create_engine function.\n",
|
||||
"\n",
|
||||
"If you want to customize where the chat histories go, you can also pass:\n",
|
||||
"1. *database_name* - name of the database to use\n",
|
||||
"1. *collection_name* - collection to use within that database"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "47a601d2",
|
||||
"metadata": {},
|
||||
"id": "0179847d-76b6-43bc-b15c-7fecfcb27ac7",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-28T10:04:38.077748Z",
|
||||
"start_time": "2023-08-28T10:04:36.105894Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Provide the connection string to connect to the MongoDB database\n",
|
||||
"connection_string = \"mongodb://mongo_user:password123@mongo:27017\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a8e63850-3e14-46fe-a59e-be6d6bf8fe61",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example"
|
||||
"from langchain_community.chat_message_histories import MongoDBChatMessageHistory\n",
|
||||
"\n",
|
||||
"chat_message_history = MongoDBChatMessageHistory(\n",
|
||||
" session_id=\"test_session\",\n",
|
||||
" connection_string=\"mongodb://mongo_user:password123@mongo:27017\",\n",
|
||||
" database_name=\"my_db\",\n",
|
||||
" collection_name=\"chat_histories\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chat_message_history.add_user_message(\"Hello\")\n",
|
||||
"chat_message_history.add_ai_message(\"Hi\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "d15e3302",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import MongoDBChatMessageHistory\n",
|
||||
"\n",
|
||||
"message_history = MongoDBChatMessageHistory(\n",
|
||||
" connection_string=connection_string, session_id=\"test-session\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"message_history.add_user_message(\"hi!\")\n",
|
||||
"\n",
|
||||
"message_history.add_ai_message(\"whats up?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "64fc465e",
|
||||
"id": "6e7b8653-a8d2-49a7-97ba-4296f7e717e9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[HumanMessage(content='hi!', additional_kwargs={}, example=False),\n",
|
||||
" AIMessage(content='whats up?', additional_kwargs={}, example=False)]"
|
||||
"[HumanMessage(content='Hello'), AIMessage(content='Hi')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"message_history.messages"
|
||||
"chat_message_history.messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e352d786-0811-48ec-832a-9f1c0b70690e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"We can easily combine this message history class with [LCEL Runnables](/docs/expression_language/how_to/message_history)\n",
|
||||
"\n",
|
||||
"To do this we will want to use OpenAI, so we need to install that. You will also need to set the OPENAI_API_KEY environment variable to your OpenAI key.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "6558418b-0ece-4d01-9661-56d562d78f7a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Optional\n",
|
||||
"\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
|
||||
"from langchain_core.runnables.history import RunnableWithMessageHistory\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "86ddfd3f-e8cf-477a-a7fd-91be3b8aa928",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"assert os.environ[\n",
|
||||
" \"OPENAI_API_KEY\"\n",
|
||||
"], \"Set the OPENAI_API_KEY environment variable with your OpenAI API key.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "82149122-61d3-490d-9bdb-bb98606e8ba1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You are a helpful assistant.\"),\n",
|
||||
" MessagesPlaceholder(variable_name=\"history\"),\n",
|
||||
" (\"human\", \"{question}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | ChatOpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "2df90853-b67c-490f-b7f8-b69d69270b9c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain_with_history = RunnableWithMessageHistory(\n",
|
||||
" chain,\n",
|
||||
" lambda session_id: MongoDBChatMessageHistory(\n",
|
||||
" session_id=\"test_session\",\n",
|
||||
" connection_string=\"mongodb://mongo_user:password123@mongo:27017\",\n",
|
||||
" database_name=\"my_db\",\n",
|
||||
" collection_name=\"chat_histories\",\n",
|
||||
" ),\n",
|
||||
" input_messages_key=\"question\",\n",
|
||||
" history_messages_key=\"history\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "0ce596b8-3b78-48fd-9f92-46dccbbfd58b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# This is where we configure the session id\n",
|
||||
"config = {\"configurable\": {\"session_id\": \"<SESSION_ID>\"}}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "38e1423b-ba86-4496-9151-25932fab1a8b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Hi Bob! How can I assist you today?')"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain_with_history.invoke({\"question\": \"Hi! I'm bob\"}, config=config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "2ee4ee62-a216-4fb1-bf33-57476a84cf16",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Your name is Bob. Is there anything else I can help you with, Bob?')"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain_with_history.invoke({\"question\": \"Whats my name\"}, config=config)"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -12,16 +12,43 @@
|
||||
"This notebook goes over how to use `Redis` to store chat message history."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "897a4682-f9fc-488b-98f3-ae2acad84600",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"First we need to install dependencies, and start a redis instance using commands like: `redis-server`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"id": "cda8b56d-baf7-49a2-91a2-4d424a8519cb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip install -U langchain-community redis"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "20b99474-75ea-422e-9809-fbdb9d103afc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Store and Retrieve Messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "d15e3302",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import RedisChatMessageHistory\n",
|
||||
"from langchain_community.chat_message_histories import RedisChatMessageHistory\n",
|
||||
"\n",
|
||||
"history = RedisChatMessageHistory(\"foo\")\n",
|
||||
"history = RedisChatMessageHistory(\"foo\", url=\"redis://localhost:6379\")\n",
|
||||
"\n",
|
||||
"history.add_user_message(\"hi!\")\n",
|
||||
"\n",
|
||||
@@ -30,18 +57,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 4,
|
||||
"id": "64fc465e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[AIMessage(content='whats up?', additional_kwargs={}),\n",
|
||||
" HumanMessage(content='hi!', additional_kwargs={})]"
|
||||
"[HumanMessage(content='hi!'), AIMessage(content='whats up?')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -50,10 +76,87 @@
|
||||
"history.messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "465fdd8c-b093-4d19-a55a-30f3b646432b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using in the Chains"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8af285f8",
|
||||
"id": "94d65d2f-e9bb-4b47-a86d-dd6b1b5e8247",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip install -U langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "ace3e7b2-5e3e-4966-b549-04952a6a9a09",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Optional\n",
|
||||
"\n",
|
||||
"from langchain_community.chat_message_histories import RedisChatMessageHistory\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
|
||||
"from langchain_core.runnables.history import RunnableWithMessageHistory\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "5c1fba0d-d06a-4695-ba14-c42a3461ada1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Your name is Bob, as you mentioned earlier. Is there anything specific you would like assistance with, Bob?')"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You're an assistant。\"),\n",
|
||||
" MessagesPlaceholder(variable_name=\"history\"),\n",
|
||||
" (\"human\", \"{question}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | ChatOpenAI()\n",
|
||||
"\n",
|
||||
"chain_with_history = RunnableWithMessageHistory(\n",
|
||||
" chain,\n",
|
||||
" lambda session_id: RedisChatMessageHistory(\n",
|
||||
" session_id, url=\"redis://localhost:6379\"\n",
|
||||
" ),\n",
|
||||
" input_messages_key=\"question\",\n",
|
||||
" history_messages_key=\"history\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"config = {\"configurable\": {\"session_id\": \"foo\"}}\n",
|
||||
"\n",
|
||||
"chain_with_history.invoke({\"question\": \"Hi! I'm bob\"}, config=config)\n",
|
||||
"\n",
|
||||
"chain_with_history.invoke({\"question\": \"Whats my name\"}, config=config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "76ce3f6b-f4c7-4d27-8031-60f7dd756695",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -75,7 +178,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.9.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -6,7 +6,7 @@ This page covers how to use the [Remembrall](https://remembrall.dev) ecosystem w
|
||||
|
||||
Remembrall gives your language model long-term memory, retrieval augmented generation, and complete observability with just a few lines of code.
|
||||
|
||||

|
||||

|
||||
|
||||
It works as a light-weight proxy on top of your OpenAI calls and simply augments the context of the chat calls at runtime with relevant facts that have been collected.
|
||||
|
||||
|
||||
@@ -16,172 +16,203 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "d0a07a30-028f-4e16-8b11-45b2416f7b0f",
|
||||
"execution_count": null,
|
||||
"id": "5c923f56-24a9-4f8f-9b91-138cc025c47e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet sqlite3"
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "61fda020-23a2-4605-afad-58260535ec8c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"To use the storage you need to provide only 2 things:\n",
|
||||
"\n",
|
||||
"1. Session Id - a unique identifier of the session, like user name, email, chat id etc.\n",
|
||||
"2. Connection string - a string that specifies the database connection. For SQLite, that string is `slqlite:///` followed by the name of the database file. If that file doesn't exist, it will be created."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "db59b901",
|
||||
"id": "4576e914a866fb40",
|
||||
"metadata": {
|
||||
"id": "2wUMSUoF8ffn"
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-28T10:04:38.077748Z",
|
||||
"start_time": "2023-08-28T10:04:36.105894Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import ConversationChain\n",
|
||||
"from langchain.memory import ConversationEntityMemory\n",
|
||||
"from langchain.memory.entity import SQLiteEntityStore\n",
|
||||
"from langchain.memory.prompt import ENTITY_MEMORY_CONVERSATION_TEMPLATE\n",
|
||||
"from langchain_openai import OpenAI"
|
||||
"from langchain_community.chat_message_histories import SQLChatMessageHistory\n",
|
||||
"\n",
|
||||
"chat_message_history = SQLChatMessageHistory(\n",
|
||||
" session_id=\"test_session_id\", connection_string=\"sqlite:///sqlite.db\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chat_message_history.add_user_message(\"Hello\")\n",
|
||||
"chat_message_history.add_ai_message(\"Hi\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "ca6dee29",
|
||||
"id": "b476688cbb32ba90",
|
||||
"metadata": {
|
||||
"id": "8TpJZti99gxV"
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-08-28T10:04:38.929396Z",
|
||||
"start_time": "2023-08-28T10:04:38.915727Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[HumanMessage(content='Hello'), AIMessage(content='Hi')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"entity_store = SQLiteEntityStore()\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"memory = ConversationEntityMemory(llm=llm, entity_store=entity_store)\n",
|
||||
"conversation = ConversationChain(\n",
|
||||
" llm=llm,\n",
|
||||
" prompt=ENTITY_MEMORY_CONVERSATION_TEMPLATE,\n",
|
||||
" memory=memory,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
"chat_message_history.messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f9b4c3a0",
|
||||
"metadata": {
|
||||
"id": "HEAHG1L79ca1"
|
||||
},
|
||||
"id": "e400509a-1957-4d1d-bbd6-01e8dc3dccb3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Notice the usage of `EntitySqliteStore` as parameter to `entity_store` on the `memory` property."
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"We can easily combine this message history class with [LCEL Runnables](/docs/expression_language/how_to/message_history)\n",
|
||||
"\n",
|
||||
"To do this we will want to use OpenAI, so we need to install that. We will also need to set the OPENAI_API_KEY environment variable to your OpenAI key.\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install -U langchain-openai\n",
|
||||
"\n",
|
||||
"export OPENAI_API_KEY='sk-xxxxxxx'\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "297e78a6",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 437
|
||||
},
|
||||
"id": "BzXphJWf_TAZ",
|
||||
"outputId": "de7fc966-e0fd-4daf-a9bd-4743455ea774"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new ConversationChain chain...\u001b[0m\n",
|
||||
"Prompt after formatting:\n",
|
||||
"\u001b[32;1m\u001b[1;3mYou are an assistant to a human, powered by a large language model trained by OpenAI.\n",
|
||||
"\n",
|
||||
"You are designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, you are able to generate human-like text based on the input you receive, allowing you to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.\n",
|
||||
"\n",
|
||||
"You are constantly learning and improving, and your capabilities are constantly evolving. You are able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. You have access to some personalized information provided by the human in the Context section below. Additionally, you are able to generate your own text based on the input you receive, allowing you to engage in discussions and provide explanations and descriptions on a wide range of topics.\n",
|
||||
"\n",
|
||||
"Overall, you are a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether the human needs help with a specific question or just wants to have a conversation about a particular topic, you are here to assist.\n",
|
||||
"\n",
|
||||
"Context:\n",
|
||||
"{'Deven': 'Deven is working on a hackathon project with Sam.', 'Sam': 'Sam is working on a hackathon project with Deven.'}\n",
|
||||
"\n",
|
||||
"Current conversation:\n",
|
||||
"\n",
|
||||
"Last line:\n",
|
||||
"Human: Deven & Sam are working on a hackathon project\n",
|
||||
"You:\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' That sounds like a great project! What kind of project are they working on?'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"id": "6558418b-0ece-4d01-9661-56d562d78f7a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"conversation.run(\"Deven & Sam are working on a hackathon project\")"
|
||||
"from typing import Optional\n",
|
||||
"\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
|
||||
"from langchain_core.runnables.history import RunnableWithMessageHistory\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "7e71f1dc",
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 35
|
||||
},
|
||||
"id": "YsFE3hBjC6gl",
|
||||
"outputId": "56ab5ca9-e343-41b5-e69d-47541718a9b4"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Deven is working on a hackathon project with Sam.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"id": "82149122-61d3-490d-9bdb-bb98606e8ba1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"conversation.memory.entity_store.get(\"Deven\")"
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You are a helpful assistant.\"),\n",
|
||||
" MessagesPlaceholder(variable_name=\"history\"),\n",
|
||||
" (\"human\", \"{question}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | ChatOpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "316f2e8d",
|
||||
"id": "2df90853-b67c-490f-b7f8-b69d69270b9c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain_with_history = RunnableWithMessageHistory(\n",
|
||||
" chain,\n",
|
||||
" lambda session_id: SQLChatMessageHistory(\n",
|
||||
" session_id=session_id, connection_string=\"sqlite:///sqlite.db\"\n",
|
||||
" ),\n",
|
||||
" input_messages_key=\"question\",\n",
|
||||
" history_messages_key=\"history\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "0ce596b8-3b78-48fd-9f92-46dccbbfd58b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# This is where we configure the session id\n",
|
||||
"config = {\"configurable\": {\"session_id\": \"<SQL_SESSION_ID>\"}}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "38e1423b-ba86-4496-9151-25932fab1a8b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Sam is working on a hackathon project with Deven.'"
|
||||
"AIMessage(content='Hello Bob! How can I assist you today?')"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"conversation.memory.entity_store.get(\"Sam\")"
|
||||
"chain_with_history.invoke({\"question\": \"Hi! I'm bob\"}, config=config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b85f8427",
|
||||
"execution_count": 10,
|
||||
"id": "2ee4ee62-a216-4fb1-bf33-57476a84cf16",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Your name is Bob! Is there anything specific you would like assistance with, Bob?')"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain_with_history.invoke({\"question\": \"Whats my name\"}, config=config)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
">[Streamlit](https://docs.streamlit.io/) is an open-source Python library that makes it easy to create and share beautiful, \n",
|
||||
"custom web apps for machine learning and data science.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This notebook goes over how to store and use chat message history in a `Streamlit` app. `StreamlitChatMessageHistory` will store messages in\n",
|
||||
"[Streamlit session state](https://docs.streamlit.io/library/api-reference/session-state)\n",
|
||||
"at the specified `key=`. The default key is `\"langchain_messages\"`.\n",
|
||||
@@ -20,6 +19,12 @@
|
||||
"- For more on Streamlit check out their\n",
|
||||
"[getting started documentation](https://docs.streamlit.io/library/get-started).\n",
|
||||
"\n",
|
||||
"The integration lives in the `langchain-community` package, so we need to install that. We also need to install `streamlit`.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"pip install -U langchain-community streamlit\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You can see the [full app example running here](https://langchain-st-memory.streamlit.app/), and more examples in\n",
|
||||
"[github.com/langchain-ai/streamlit-agent](https://github.com/langchain-ai/streamlit-agent)."
|
||||
]
|
||||
@@ -31,7 +36,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import StreamlitChatMessageHistory\n",
|
||||
"from langchain_community.chat_message_histories import StreamlitChatMessageHistory\n",
|
||||
"\n",
|
||||
"history = StreamlitChatMessageHistory(key=\"chat_messages\")\n",
|
||||
"\n",
|
||||
@@ -54,7 +59,9 @@
|
||||
"id": "b60dc735",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can integrate `StreamlitChatMessageHistory` into `ConversationBufferMemory` and chains or agents as usual. The history will be persisted across re-runs of the Streamlit app within a given user session. A given `StreamlitChatMessageHistory` will NOT be persisted or shared across user sessions."
|
||||
"We can easily combine this message history class with [LCEL Runnables](https://python.langchain.com/docs/expression_language/how_to/message_history).\n",
|
||||
"\n",
|
||||
"The history will be persisted across re-runs of the Streamlit app within a given user session. A given `StreamlitChatMessageHistory` will NOT be persisted or shared across user sessions."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -64,13 +71,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain_community.chat_message_histories import StreamlitChatMessageHistory\n",
|
||||
"\n",
|
||||
"# Optionally, specify your own session_state key for storing messages\n",
|
||||
"msgs = StreamlitChatMessageHistory(key=\"special_app_key\")\n",
|
||||
"\n",
|
||||
"memory = ConversationBufferMemory(memory_key=\"history\", chat_memory=msgs)\n",
|
||||
"if len(msgs.messages) == 0:\n",
|
||||
" msgs.add_ai_message(\"How can I help you?\")"
|
||||
]
|
||||
@@ -82,19 +87,34 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
|
||||
"from langchain_core.runnables.history import RunnableWithMessageHistory\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"template = \"\"\"You are an AI chatbot having a conversation with a human.\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You are an AI chatbot having a conversation with a human.\"),\n",
|
||||
" MessagesPlaceholder(variable_name=\"history\"),\n",
|
||||
" (\"human\", \"{question}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"{history}\n",
|
||||
"Human: {human_input}\n",
|
||||
"AI: \"\"\"\n",
|
||||
"prompt = PromptTemplate(input_variables=[\"history\", \"human_input\"], template=template)\n",
|
||||
"\n",
|
||||
"# Add the memory to an LLMChain as usual\n",
|
||||
"llm_chain = LLMChain(llm=OpenAI(), prompt=prompt, memory=memory)"
|
||||
"chain = prompt | ChatOpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dac3d94f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain_with_history = RunnableWithMessageHistory(\n",
|
||||
" chain,\n",
|
||||
" lambda session_id: msgs, # Always return the instance created earlier\n",
|
||||
" input_messages_key=\"question\",\n",
|
||||
" history_messages_key=\"history\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -121,8 +141,9 @@
|
||||
" st.chat_message(\"human\").write(prompt)\n",
|
||||
"\n",
|
||||
" # As usual, new messages are added to StreamlitChatMessageHistory when the Chain is called.\n",
|
||||
" response = llm_chain.run(prompt)\n",
|
||||
" st.chat_message(\"ai\").write(response)"
|
||||
" config = {\"configurable\": {\"session_id\": \"any\"}}\n",
|
||||
" response = chain_with_history.invoke({\"question\": prompt}, config)\n",
|
||||
" st.chat_message(\"ai\").write(response.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
266
docs/docs/integrations/memory/tidb_chat_message_history.ipynb
Normal file
266
docs/docs/integrations/memory/tidb_chat_message_history.ipynb
Normal file
@@ -0,0 +1,266 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# TiDB\n",
|
||||
"\n",
|
||||
"> [TiDB](https://github.com/pingcap/tidb) is an open-source, cloud-native, distributed, MySQL-Compatible database for elastic scale and real-time analytics.\n",
|
||||
"\n",
|
||||
"This notebook introduces how to use TiDB to store chat message history. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"Firstly, we will install the following dependencies:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain langchain_openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Configuring your OpenAI Key"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Input your OpenAI API key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, we will configure the connection to a TiDB. In this notebook, we will follow the standard connection method provided by TiDB Cloud to establish a secure and efficient database connection."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# copy from tidb cloud console\n",
|
||||
"tidb_connection_string_template = \"mysql+pymysql://<USER>:<PASSWORD>@<HOST>:4000/<DB>?ssl_ca=/etc/ssl/cert.pem&ssl_verify_cert=true&ssl_verify_identity=true\"\n",
|
||||
"tidb_password = getpass.getpass(\"Input your TiDB password:\")\n",
|
||||
"tidb_connection_string = tidb_connection_string_template.replace(\n",
|
||||
" \"<PASSWORD>\", tidb_password\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Generating historical data\n",
|
||||
"\n",
|
||||
"Creating a set of historical data, which will serve as the foundation for our upcoming demonstrations."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"\n",
|
||||
"from langchain_community.chat_message_histories import TiDBChatMessageHistory\n",
|
||||
"\n",
|
||||
"history = TiDBChatMessageHistory(\n",
|
||||
" connection_string=tidb_connection_string,\n",
|
||||
" session_id=\"code_gen\",\n",
|
||||
" earliest_time=datetime.utcnow(), # Optional to set earliest_time to load messages after this time point.\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"history.add_user_message(\"How's our feature going?\")\n",
|
||||
"history.add_ai_message(\n",
|
||||
" \"It's going well. We are working on testing now. It will be released in Feb.\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[HumanMessage(content=\"How's our feature going?\"),\n",
|
||||
" AIMessage(content=\"It's going well. We are working on testing now. It will be released in Feb.\")]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"history.messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chatting with historical data\n",
|
||||
"\n",
|
||||
"Let’s build upon the historical data generated earlier to create a dynamic chat interaction. \n",
|
||||
"\n",
|
||||
"Firstly, Creating a Chat Chain with LangChain:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You're an assistant who's good at coding. You're helping a startup build\",\n",
|
||||
" ),\n",
|
||||
" MessagesPlaceholder(variable_name=\"history\"),\n",
|
||||
" (\"human\", \"{question}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"chain = prompt | ChatOpenAI()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Building a Runnable on History:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.runnables.history import RunnableWithMessageHistory\n",
|
||||
"\n",
|
||||
"chain_with_history = RunnableWithMessageHistory(\n",
|
||||
" chain,\n",
|
||||
" lambda session_id: TiDBChatMessageHistory(\n",
|
||||
" session_id=session_id, connection_string=tidb_connection_string\n",
|
||||
" ),\n",
|
||||
" input_messages_key=\"question\",\n",
|
||||
" history_messages_key=\"history\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Initiating the Chat:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='There are 31 days in January, so there are 30 days until our feature is released in February.')"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = chain_with_history.invoke(\n",
|
||||
" {\"question\": \"Today is Jan 1st. How many days until our feature is released?\"},\n",
|
||||
" config={\"configurable\": {\"session_id\": \"code_gen\"}},\n",
|
||||
")\n",
|
||||
"response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Checking the history data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[HumanMessage(content=\"How's our feature going?\"),\n",
|
||||
" AIMessage(content=\"It's going well. We are working on testing now. It will be released in Feb.\"),\n",
|
||||
" HumanMessage(content='Today is Jan 1st. How many days until our feature is released?'),\n",
|
||||
" AIMessage(content='There are 31 days in January, so there are 30 days until our feature is released in February.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"history.reload_cache()\n",
|
||||
"history.messages"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -186,7 +186,7 @@ from langchain_community.document_loaders import GoogleSpeechToTextLoader
|
||||
### Google Vertex AI Vector Search
|
||||
|
||||
> [Google Vertex AI Vector Search](https://cloud.google.com/vertex-ai/docs/matching-engine/overview),
|
||||
> formerly known as `Vertex AI Matching Engine`, provides the industry's leading high-scale
|
||||
> formerly known as `Vertex AI Matching Engine`, provides the industry's leading high-scale
|
||||
> low latency vector database. These vector databases are commonly
|
||||
> referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.
|
||||
|
||||
@@ -207,10 +207,14 @@ from langchain_community.vectorstores import MatchingEngine
|
||||
> [Google BigQuery](https://cloud.google.com/bigquery),
|
||||
> BigQuery is a serverless and cost-effective enterprise data warehouse in Google Cloud.
|
||||
>
|
||||
> Google BigQuery Vector Search
|
||||
> Google BigQuery Vector Search
|
||||
> BigQuery vector search lets you use GoogleSQL to do semantic search, using vector indexes for fast but approximate results, or using brute force for exact results.
|
||||
|
||||
> It can calculate Euclidean or Cosine distance. With LangChain, we default to use Euclidean distance.
|
||||
> It can calculate Euclidean or Cosine distance. With LangChain, we default to use Euclidean distance.
|
||||
|
||||
> This is a private preview (experimental) feature. Please submit this
|
||||
> [enrollment form](https://docs.google.com/forms/d/18yndSb4dTf2H0orqA9N7NAchQEDQekwWiD5jYfEkGWk/viewform?edit_requested=true)
|
||||
> if you want to enroll BigQuery Vector Search Experimental.
|
||||
|
||||
We need to install several python packages.
|
||||
|
||||
@@ -228,7 +232,7 @@ from langchain.vectorstores import BigQueryVectorSearch
|
||||
|
||||
>[Google ScaNN](https://github.com/google-research/google-research/tree/master/scann)
|
||||
> (Scalable Nearest Neighbors) is a python package.
|
||||
>
|
||||
>
|
||||
>`ScaNN` is a method for efficient vector similarity search at scale.
|
||||
|
||||
>`ScaNN` includes search space pruning and quantization for Maximum Inner
|
||||
@@ -285,9 +289,9 @@ from langchain.retrievers import GoogleVertexAISearchRetriever
|
||||
|
||||
### Document AI Warehouse
|
||||
> [Google Cloud Document AI Warehouse](https://cloud.google.com/document-ai-warehouse)
|
||||
> allows enterprises to search, store, govern, and manage documents and their AI-extracted
|
||||
> allows enterprises to search, store, govern, and manage documents and their AI-extracted
|
||||
> data and metadata in a single platform.
|
||||
>
|
||||
>
|
||||
|
||||
```python
|
||||
from langchain.retrievers import GoogleDocumentAIWarehouseRetriever
|
||||
@@ -304,9 +308,9 @@ documents = docai_wh_retriever.get_relevant_documents(
|
||||
|
||||
### Google Cloud Text-to-Speech
|
||||
|
||||
>[Google Cloud Text-to-Speech](https://cloud.google.com/text-to-speech) enables developers to
|
||||
> synthesize natural-sounding speech with 100+ voices, available in multiple languages and variants.
|
||||
> It applies DeepMind’s groundbreaking research in WaveNet and Google’s powerful neural networks
|
||||
>[Google Cloud Text-to-Speech](https://cloud.google.com/text-to-speech) enables developers to
|
||||
> synthesize natural-sounding speech with 100+ voices, available in multiple languages and variants.
|
||||
> It applies DeepMind’s groundbreaking research in WaveNet and Google’s powerful neural networks
|
||||
> to deliver the highest fidelity possible.
|
||||
|
||||
We need to install a python package.
|
||||
@@ -354,7 +358,7 @@ from langchain.tools import GooglePlacesTool
|
||||
### Google Search
|
||||
|
||||
- Set up a Custom Search Engine, following [these instructions](https://stackoverflow.com/questions/37083058/programmatically-searching-google-in-python-using-custom-search)
|
||||
- Get an API Key and Custom Search Engine ID from the previous step, and set them as environment variables
|
||||
- Get an API Key and Custom Search Engine ID from the previous step, and set them as environment variables
|
||||
`GOOGLE_API_KEY` and `GOOGLE_CSE_ID` respectively.
|
||||
|
||||
```python
|
||||
@@ -444,12 +448,12 @@ from langchain_community.utilities.google_trends import GoogleTrendsAPIWrapper
|
||||
|
||||
### Google Document AI
|
||||
|
||||
>[Document AI](https://cloud.google.com/document-ai/docs/overview) is a `Google Cloud Platform`
|
||||
> service that transforms unstructured data from documents into structured data, making it easier
|
||||
>[Document AI](https://cloud.google.com/document-ai/docs/overview) is a `Google Cloud Platform`
|
||||
> service that transforms unstructured data from documents into structured data, making it easier
|
||||
> to understand, analyze, and consume.
|
||||
|
||||
We need to set up a [`GCS` bucket and create your own OCR processor](https://cloud.google.com/document-ai/docs/create-processor)
|
||||
The `GCS_OUTPUT_PATH` should be a path to a folder on GCS (starting with `gs://`)
|
||||
We need to set up a [`GCS` bucket and create your own OCR processor](https://cloud.google.com/document-ai/docs/create-processor)
|
||||
The `GCS_OUTPUT_PATH` should be a path to a folder on GCS (starting with `gs://`)
|
||||
and a processor name should look like `projects/PROJECT_NUMBER/locations/LOCATION/processors/PROCESSOR_ID`.
|
||||
We can get it either programmatically or copy from the `Prediction endpoint` section of the `Processor details`
|
||||
tab in the Google Cloud Console.
|
||||
@@ -507,6 +511,23 @@ See a [usage example and authorization instructions](/docs/integrations/toolkits
|
||||
from langchain_community.agent_toolkits import GmailToolkit
|
||||
```
|
||||
|
||||
## Memory
|
||||
|
||||
### Cloud Firestore
|
||||
|
||||
> [`Cloud Firestore`](https://cloud.google.com/firestore) is a NoSQL document database built for automatic scaling, high performance, and ease of application development.
|
||||
|
||||
First, we need to install the python package.
|
||||
|
||||
```bash
|
||||
pip install firebase-admin
|
||||
```
|
||||
|
||||
See a [usage example and authorization instructions](/docs/integrations/memory/firestore_chat_message_history).
|
||||
|
||||
```python
|
||||
from langchain_community.chat_message_histories.firestore import FirestoreChatMessageHistory
|
||||
```
|
||||
|
||||
## Chat Loaders
|
||||
|
||||
@@ -560,7 +581,7 @@ from langchain_community.utilities import GoogleSerperAPIWrapper
|
||||
### YouTube
|
||||
|
||||
>[YouTube Search](https://github.com/joetats/youtube_search) package searches `YouTube` videos avoiding using their heavily rate-limited API.
|
||||
>
|
||||
>
|
||||
>It uses the form on the YouTube homepage and scrapes the resulting page.
|
||||
|
||||
We need to install a python package.
|
||||
|
||||
@@ -10,7 +10,7 @@ All functionality related to `Microsoft Azure` and other `Microsoft` products.
|
||||
>[Azure OpenAI](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/) is an `Azure` service with powerful language models from `OpenAI` including the `GPT-3`, `Codex` and `Embeddings model` series for content generation, summarization, semantic search, and natural language to code translation.
|
||||
|
||||
```bash
|
||||
pip install openai tiktoken
|
||||
pip install langchain-openai
|
||||
```
|
||||
|
||||
Set the environment variables to get access to the `Azure OpenAI` service.
|
||||
|
||||
@@ -14,11 +14,12 @@ All functionality related to OpenAI
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Install the LangChain partner package
|
||||
Install the integration package with
|
||||
```bash
|
||||
pip install langchain-openai
|
||||
```
|
||||
- Get an OpenAI api key and set it as an environment variable (`OPENAI_API_KEY`)
|
||||
|
||||
Get an OpenAI api key and set it as an environment variable (`OPENAI_API_KEY`)
|
||||
|
||||
|
||||
## LLM
|
||||
|
||||
@@ -13,7 +13,7 @@ Activeloop Deep Lake supports SelfQuery Retrieval:
|
||||
|
||||
## More Resources
|
||||
1. [Ultimate Guide to LangChain & Deep Lake: Build ChatGPT to Answer Questions on Your Financial Data](https://www.activeloop.ai/resources/ultimate-guide-to-lang-chain-deep-lake-build-chat-gpt-to-answer-questions-on-your-financial-data/)
|
||||
2. [Twitter the-algorithm codebase analysis with Deep Lake](/docs/use_cases/question_answering/code/twitter-the-algorithm-analysis-deeplake)
|
||||
2. [Twitter the-algorithm codebase analysis with Deep Lake](https://github.com/langchain-ai/langchain/blob/master/cookbook/twitter-the-algorithm-analysis-deeplake.ipynb)
|
||||
3. Here is [whitepaper](https://www.deeplake.ai/whitepaper) and [academic paper](https://arxiv.org/pdf/2209.10785.pdf) for Deep Lake
|
||||
4. Here is a set of additional resources available for review: [Deep Lake](https://github.com/activeloopai/deeplake), [Get started](https://docs.activeloop.ai/getting-started) and [Tutorials](https://docs.activeloop.ai/hub-tutorials)
|
||||
|
||||
|
||||
@@ -1,17 +1,34 @@
|
||||
# Anyscale
|
||||
|
||||
This page covers how to use the Anyscale ecosystem within LangChain.
|
||||
It is broken into two parts: installation and setup, and then references to specific Anyscale wrappers.
|
||||
>[Anyscale](https://www.anyscale.com) is a platform to run, fine tune and scale LLMs via production-ready APIs.
|
||||
> [Anyscale Endpoints](https://docs.anyscale.com/endpoints/overview) serve many open-source models in a cost-effective way.
|
||||
|
||||
`Anyscale` also provides [an example](https://docs.anyscale.com/endpoints/model-serving/examples/langchain-integration)
|
||||
how to setup `LangChain` with `Anyscale` for advanced chat agents.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Get an Anyscale Service URL, route and API key and set them as environment variables (`ANYSCALE_SERVICE_URL`,`ANYSCALE_SERVICE_ROUTE`, `ANYSCALE_SERVICE_TOKEN`).
|
||||
- Please see [the Anyscale docs](https://docs.anyscale.com/productionize/services-v2/get-started) for more details.
|
||||
- Please see [the Anyscale docs](https://www.anyscale.com/get-started) for more details.
|
||||
|
||||
## Wrappers
|
||||
We have to install the `openai` package:
|
||||
|
||||
### LLM
|
||||
|
||||
There exists an Anyscale LLM wrapper, which you can access with
|
||||
```python
|
||||
from langchain_community.llms import Anyscale
|
||||
```bash
|
||||
pip install openai
|
||||
```
|
||||
|
||||
## LLM
|
||||
|
||||
See a [usage example](/docs/integrations/llms/anyscale).
|
||||
|
||||
```python
|
||||
from langchain_community.llms.anyscale import Anyscale
|
||||
```
|
||||
|
||||
## Chat Models
|
||||
|
||||
See a [usage example](/docs/integrations/chat/anyscale).
|
||||
|
||||
```python
|
||||
from langchain_community.chat_models.anyscale import ChatAnyscale
|
||||
```
|
||||
|
||||
@@ -20,10 +20,10 @@ pip install "astrapy>=0.5.3"
|
||||
```python
|
||||
from langchain_community.vectorstores import AstraDB
|
||||
vector_store = AstraDB(
|
||||
embedding=my_embedding,
|
||||
collection_name="my_store",
|
||||
api_endpoint="...",
|
||||
token="...",
|
||||
embedding=my_embedding,
|
||||
collection_name="my_store",
|
||||
api_endpoint="...",
|
||||
token="...",
|
||||
)
|
||||
```
|
||||
|
||||
@@ -40,7 +40,7 @@ set_llm_cache(AstraDBCache(
|
||||
))
|
||||
```
|
||||
|
||||
Learn more in the [example notebook](/docs/integrations/llms/llm_caching) (scroll to the Astra DB section).
|
||||
Learn more in the [example notebook](/docs/integrations/llms/llm_caching#astra-db-caches) (scroll to the Astra DB section).
|
||||
|
||||
|
||||
### Semantic LLM Cache
|
||||
@@ -55,14 +55,14 @@ set_llm_cache(AstraDBSemanticCache(
|
||||
))
|
||||
```
|
||||
|
||||
Learn more in the [example notebook](/docs/integrations/llms/llm_caching) (scroll to the appropriate section).
|
||||
Learn more in the [example notebook](/docs/integrations/llms/llm_caching#astra-db-caches) (scroll to the appropriate section).
|
||||
|
||||
### Chat message history
|
||||
|
||||
```python
|
||||
from langchain.memory import AstraDBChatMessageHistory
|
||||
message_history = AstraDBChatMessageHistory(
|
||||
session_id="test-session"
|
||||
session_id="test-session",
|
||||
api_endpoint="...",
|
||||
token="...",
|
||||
)
|
||||
@@ -70,6 +70,67 @@ message_history = AstraDBChatMessageHistory(
|
||||
|
||||
Learn more in the [example notebook](/docs/integrations/memory/astradb_chat_message_history).
|
||||
|
||||
### Document loader
|
||||
|
||||
```python
|
||||
from langchain_community.document_loaders import AstraDBLoader
|
||||
loader = AstraDBLoader(
|
||||
collection_name="my_collection",
|
||||
api_endpoint="...",
|
||||
token="..."
|
||||
)
|
||||
```
|
||||
|
||||
Learn more in the [example notebook](/docs/integrations/document_loaders/astradb).
|
||||
|
||||
### Self-querying retriever
|
||||
|
||||
```python
|
||||
from langchain_community.vectorstores import AstraDB
|
||||
from langchain.retrievers.self_query.base import SelfQueryRetriever
|
||||
|
||||
vector_store = AstraDB(
|
||||
embedding=my_embedding,
|
||||
collection_name="my_store",
|
||||
api_endpoint="...",
|
||||
token="...",
|
||||
)
|
||||
|
||||
retriever = SelfQueryRetriever.from_llm(
|
||||
my_llm,
|
||||
vector_store,
|
||||
document_content_description,
|
||||
metadata_field_info
|
||||
)
|
||||
```
|
||||
|
||||
Learn more in the [example notebook](/docs/integrations/retrievers/self_query/astradb).
|
||||
|
||||
### Store
|
||||
|
||||
```python
|
||||
from langchain_community.storage import AstraDBStore
|
||||
store = AstraDBStore(
|
||||
collection_name="my_kv_store",
|
||||
api_endpoint="...",
|
||||
token="..."
|
||||
)
|
||||
```
|
||||
|
||||
Learn more in the [example notebook](/docs/integrations/stores/astradb#astradbstore).
|
||||
|
||||
### Byte Store
|
||||
|
||||
```python
|
||||
from langchain_community.storage import AstraDBByteStore
|
||||
store = AstraDBByteStore(
|
||||
collection_name="my_kv_store",
|
||||
api_endpoint="...",
|
||||
token="..."
|
||||
)
|
||||
```
|
||||
|
||||
Learn more in the [example notebook](/docs/integrations/stores/astradb#astradbbytestore).
|
||||
|
||||
## Apache Cassandra and Astra DB through CQL
|
||||
|
||||
@@ -85,12 +146,12 @@ Hence, a different set of connectors, outlined below, shall be used.
|
||||
```python
|
||||
from langchain_community.vectorstores import Cassandra
|
||||
vector_store = Cassandra(
|
||||
embedding=my_embedding,
|
||||
table_name="my_store",
|
||||
embedding=my_embedding,
|
||||
table_name="my_store",
|
||||
)
|
||||
```
|
||||
|
||||
Learn more in the [example notebook](/docs/integrations/vectorstores/astradb) (scroll down to the CQL-specific section).
|
||||
Learn more in the [example notebook](/docs/integrations/vectorstores/astradb#apache-cassandra-and-astra-db-through-cql) (scroll down to the CQL-specific section).
|
||||
|
||||
|
||||
### Memory
|
||||
@@ -110,7 +171,7 @@ from langchain.cache import CassandraCache
|
||||
langchain.llm_cache = CassandraCache()
|
||||
```
|
||||
|
||||
Learn more in the [example notebook](/docs/integrations/llms/llm_caching) (scroll to the Cassandra section).
|
||||
Learn more in the [example notebook](/docs/integrations/llms/llm_caching#cassandra-caches) (scroll to the Cassandra section).
|
||||
|
||||
|
||||
### Semantic LLM Cache
|
||||
@@ -118,9 +179,9 @@ Learn more in the [example notebook](/docs/integrations/llms/llm_caching) (scrol
|
||||
```python
|
||||
from langchain.cache import CassandraSemanticCache
|
||||
cassSemanticCache = CassandraSemanticCache(
|
||||
embedding=my_embedding,
|
||||
table_name="my_store",
|
||||
embedding=my_embedding,
|
||||
table_name="my_store",
|
||||
)
|
||||
```
|
||||
|
||||
Learn more in the [example notebook](/docs/integrations/llms/llm_caching) (scroll to the appropriate section).
|
||||
Learn more in the [example notebook](/docs/integrations/llms/llm_caching#cassandra-caches) (scroll to the appropriate section).
|
||||
|
||||
@@ -18,11 +18,11 @@ whether for semantic search or example selection.
|
||||
from langchain_community.vectorstores import Chroma
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the Chroma wrapper, see [this notebook](/docs/integrations/vectorstores/chroma_self_query)
|
||||
For a more detailed walkthrough of the Chroma wrapper, see [this notebook](/docs/integrations/vectorstores/chroma)
|
||||
|
||||
## Retriever
|
||||
|
||||
See a [usage example](/docs/integrations/retrievers/self_query/chroma).
|
||||
See a [usage example](/docs/integrations/retrievers/self_query/chroma_self_query).
|
||||
|
||||
```python
|
||||
from langchain.retrievers import SelfQueryRetriever
|
||||
|
||||
@@ -17,6 +17,8 @@ google/flan\* models can be viewed [here](https://deepinfra.com/models?type=text
|
||||
|
||||
You can view a [list of request and response parameters](https://deepinfra.com/meta-llama/Llama-2-70b-chat-hf/api).
|
||||
|
||||
Chat models [follow openai api](https://deepinfra.com/meta-llama/Llama-2-70b-chat-hf/api?example=openai-http)
|
||||
|
||||
## Wrappers
|
||||
|
||||
### LLM
|
||||
@@ -34,3 +36,11 @@ There is also an DeepInfra Embeddings wrapper, you can access with
|
||||
```python
|
||||
from langchain_community.embeddings import DeepInfraEmbeddings
|
||||
```
|
||||
|
||||
### Chat Models
|
||||
|
||||
There is a chat-oriented wrapper as well, accessible with
|
||||
|
||||
```python
|
||||
from langchain_community.chat_models import ChatDeepInfra
|
||||
```
|
||||
|
||||
@@ -150,4 +150,4 @@ This command will initiate the execution of the `langchain_llm` task on the Flyt
|
||||
|
||||
The metrics will be displayed on the Flyte UI as follows:
|
||||
|
||||

|
||||

|
||||
|
||||
@@ -6,7 +6,7 @@ This page covers how to use the [Helicone](https://helicone.ai) ecosystem within
|
||||
|
||||
Helicone is an [open-source](https://github.com/Helicone/helicone) observability platform that proxies your OpenAI traffic and provides you key insights into your spend, latency and usage.
|
||||
|
||||

|
||||

|
||||
|
||||
## Quick start
|
||||
|
||||
@@ -18,7 +18,7 @@ export OPENAI_API_BASE="https://oai.hconeai.com/v1"
|
||||
|
||||
Now head over to [helicone.ai](https://helicone.ai/onboarding?step=2) to create your account, and add your OpenAI API key within our dashboard to view your logs.
|
||||
|
||||

|
||||

|
||||
|
||||
## How to enable Helicone caching
|
||||
|
||||
|
||||
25
docs/docs/integrations/providers/lantern.mdx
Normal file
25
docs/docs/integrations/providers/lantern.mdx
Normal file
@@ -0,0 +1,25 @@
|
||||
# Lantern
|
||||
|
||||
This page covers how to use the [Lantern](https://github.com/lanterndata/lantern) within LangChain
|
||||
It is broken into two parts: setup, and then references to specific Lantern wrappers.
|
||||
|
||||
## Setup
|
||||
1. The first step is to create a database with the `lantern` extension installed.
|
||||
|
||||
Follow the steps at [Lantern Installation Guide](https://github.com/lanterndata/lantern#-quick-install) to install the database and the extension. The docker image is the easiest way to get started.
|
||||
|
||||
## Wrappers
|
||||
|
||||
### VectorStore
|
||||
|
||||
There exists a wrapper around Postgres vector databases, allowing you to use it as a vectorstore,
|
||||
whether for semantic search or example selection.
|
||||
|
||||
To import this vectorstore:
|
||||
```python
|
||||
from langchain_community.vectorstores import Lantern
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
For a more detailed walkthrough of the Lantern Wrapper, see [this notebook](/docs/integrations/vectorstores/lantern)
|
||||
@@ -6,7 +6,7 @@ This page covers how to use [Metal](https://getmetal.io) within LangChain.
|
||||
|
||||
Metal is a managed retrieval & memory platform built for production. Easily index your data into `Metal` and run semantic search and retrieval on it.
|
||||
|
||||

|
||||

|
||||
|
||||
## Quick start
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ db = SQLDatabase.from_uri(conn_str)
|
||||
db_chain = SQLDatabaseChain.from_llm(OpenAI(temperature=0), db, verbose=True)
|
||||
```
|
||||
|
||||
From here, see the [SQL Chain](/docs/use_cases/tabular/sqlite) documentation on how to use.
|
||||
From here, see the [SQL Chain](/docs/use_cases/sql/) documentation on how to use.
|
||||
|
||||
|
||||
## LLMCache
|
||||
|
||||
@@ -9,9 +9,7 @@
|
||||
We need to install several python packages.
|
||||
|
||||
```bash
|
||||
pip install openai
|
||||
pip install psycopg2-binary
|
||||
pip install tiktoken
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
@@ -66,7 +66,7 @@
|
||||
"source": [
|
||||
"## Document Compressor\n",
|
||||
"\n",
|
||||
"We can also use RAGatouille off-the-shelf as a reranker. This will allow us to use ColBERT to rerank retrieved results from any generic retriever. The benefits of this are that we can do this on top of any existing index, so that we don't need to create a new idex. We can do this by using the [document compressor](/docs/modules/data_connections/retrievers/contextual_compression) abstraction in LangChain."
|
||||
"We can also use RAGatouille off-the-shelf as a reranker. This will allow us to use ColBERT to rerank retrieved results from any generic retriever. The benefits of this are that we can do this on top of any existing index, so that we don't need to create a new idex. We can do this by using the [document compressor](/docs/modules/data_connection/retrievers/contextual_compression) abstraction in LangChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -5,13 +5,15 @@
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
You need to install `langchain-robocorp` python package, as well as the `robocorp-action-server` package to run the action server locally.
|
||||
You need to install `langchain-robocorp` python package:
|
||||
|
||||
```bash
|
||||
pip install langchain-robocorp robocorp-action-server
|
||||
pip install langchain-robocorp
|
||||
```
|
||||
|
||||
You will need a running instance of Action Server to communicate with from your agent application. You can bootstrap a new project using Action Server `new` command.
|
||||
You will need a running instance of Action Server to communicate with from your agent application. See the [Robocorp Quickstart](https://github.com/robocorp/robocorp#quickstart) on how to setup Action Server and create your Actions.
|
||||
|
||||
You can bootstrap a new project using Action Server `new` command.
|
||||
|
||||
```bash
|
||||
action-server new
|
||||
|
||||
34
docs/docs/integrations/providers/tigergraph.mdx
Normal file
34
docs/docs/integrations/providers/tigergraph.mdx
Normal file
@@ -0,0 +1,34 @@
|
||||
# TigerGraph
|
||||
|
||||
This page covers how to use the TigerGraph ecosystem within LangChain.
|
||||
|
||||
What is TigerGraph?
|
||||
|
||||
**TigerGraph in a nutshell:**
|
||||
|
||||
- TigerGraph is a natively distributed and high-performance graph database.
|
||||
- The storage of data in a graph format of vertices and edges leads to rich relationships, ideal for grouding LLM responses.
|
||||
- Get started quickly with TigerGraph by visiting [their website](https://tigergraph.com/).
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Install the Python SDK with `pip install pyTigerGraph`
|
||||
|
||||
## Wrappers
|
||||
|
||||
### TigerGraph Store
|
||||
To utilize the TigerGraph InquiryAI functionality, you can import `TigerGraph` from `langchain_community.graphs`.
|
||||
|
||||
```python
|
||||
import pyTigerGraph as tg
|
||||
conn = tg.TigerGraphConnection(host="DATABASE_HOST_HERE", graphname="GRAPH_NAME_HERE", username="USERNAME_HERE", password="PASSWORD_HERE")
|
||||
|
||||
### ==== CONFIGURE INQUIRYAI HOST ====
|
||||
conn.ai.configureInquiryAIHost("INQUIRYAI_HOST_HERE")
|
||||
|
||||
from langchain_community.graphs import TigerGraph
|
||||
graph = TigerGraph(conn)
|
||||
result = graph.query("How many servers are there?")
|
||||
print(result)
|
||||
```
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
|
||||
```bash
|
||||
pip install tigrisdb openapi-schema-pydantic openai tiktoken
|
||||
pip install tigrisdb openapi-schema-pydantic
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
|
||||
|
||||
```bash
|
||||
pip install typesense openapi-schema-pydantic openai tiktoken
|
||||
pip install typesense openapi-schema-pydantic
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Also you'll need to create a [Activeloop]((https://activeloop.ai/)) account."
|
||||
"Also you'll need to create a [Activeloop](https://activeloop.ai) account."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -18,6 +18,15 @@
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
322
docs/docs/integrations/retrievers/self_query/astradb.ipynb
Normal file
322
docs/docs/integrations/retrievers/self_query/astradb.ipynb
Normal file
@@ -0,0 +1,322 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Astra DB\n",
|
||||
"\n",
|
||||
"DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Cassandra and made conveniently available through an easy-to-use JSON API.\n",
|
||||
"\n",
|
||||
"In the walkthrough, we'll demo the `SelfQueryRetriever` with an `Astra DB` vector store."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating an Astra DB vector store\n",
|
||||
"First we'll want to create an Astra DB VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n",
|
||||
"\n",
|
||||
"NOTE: The self-query retriever requires you to have `lark` installed (`pip install lark`). We also need the `astrapy` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet lark astrapy langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"from langchain_openai.embeddings import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass(\"OpenAI API Key:\")\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Create the Astra DB VectorStore:\n",
|
||||
"\n",
|
||||
"- the API Endpoint looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n",
|
||||
"- the Token looks like `AstraCS:6gBhNmsk135....`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \")\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain.vectorstores import AstraDB\n",
|
||||
"\n",
|
||||
"docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n",
|
||||
" metadata={\"year\": 1993, \"rating\": 7.7, \"genre\": \"science fiction\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n",
|
||||
" metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"rating\": 8.2},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n",
|
||||
" metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n",
|
||||
" metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Toys come alive and have a blast doing so\",\n",
|
||||
" metadata={\"year\": 1995, \"genre\": \"animated\"},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n",
|
||||
" metadata={\n",
|
||||
" \"year\": 1979,\n",
|
||||
" \"director\": \"Andrei Tarkovsky\",\n",
|
||||
" \"genre\": \"science fiction\",\n",
|
||||
" \"rating\": 9.9,\n",
|
||||
" },\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"vectorstore = AstraDB.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" collection_name=\"astra_self_query_demo\",\n",
|
||||
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
|
||||
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating our self-querying retriever\n",
|
||||
"Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.query_constructor.base import AttributeInfo\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
|
||||
"\n",
|
||||
"metadata_field_info = [\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"genre\",\n",
|
||||
" description=\"The genre of the movie\",\n",
|
||||
" type=\"string or list[string]\",\n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"year\",\n",
|
||||
" description=\"The year the movie was released\",\n",
|
||||
" type=\"integer\",\n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"director\",\n",
|
||||
" description=\"The name of the movie director\",\n",
|
||||
" type=\"string\",\n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"document_content_description = \"Brief summary of a movie\"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"\n",
|
||||
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||
" llm, vectorstore, document_content_description, metadata_field_info, verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Testing it out\n",
|
||||
"And now we can try actually using our retriever!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# This example only specifies a relevant query\n",
|
||||
"retriever.get_relevant_documents(\"What are some movies about dinosaurs?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# This example specifies a filter\n",
|
||||
"retriever.get_relevant_documents(\"I want to watch a movie rated higher than 8.5\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# This example only specifies a query and a filter\n",
|
||||
"retriever.get_relevant_documents(\"Has Greta Gerwig directed any movies about women\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# This example specifies a composite filter\n",
|
||||
"retriever.get_relevant_documents(\n",
|
||||
" \"What's a highly rated (above 8.5), science fiction movie ?\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# This example specifies a query and composite filter\n",
|
||||
"retriever.get_relevant_documents(\n",
|
||||
" \"What's a movie about toys after 1990 but before 2005, and is animated\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Filter k\n",
|
||||
"\n",
|
||||
"We can also use the self query retriever to specify `k`: the number of documents to fetch.\n",
|
||||
"\n",
|
||||
"We can do this by passing `enable_limit=True` to the constructor."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||
" llm,\n",
|
||||
" vectorstore,\n",
|
||||
" document_content_description,\n",
|
||||
" metadata_field_info,\n",
|
||||
" verbose=True,\n",
|
||||
" enable_limit=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# This example only specifies a relevant query\n",
|
||||
"retriever.get_relevant_documents(\"What are two movies about dinosaurs?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"## Cleanup\n",
|
||||
"\n",
|
||||
"If you want to completely delete the collection from your Astra DB instance, run this.\n",
|
||||
"\n",
|
||||
"_(You will lose the data you stored in it.)_"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vectorstore.delete_collection()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
240
docs/docs/integrations/stores/astradb.ipynb
Normal file
240
docs/docs/integrations/stores/astradb.ipynb
Normal file
@@ -0,0 +1,240 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Astra DB\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Astra DB\n",
|
||||
"\n",
|
||||
"DataStax [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) is a serverless vector-capable database built on Cassandra and made conveniently available through an easy-to-use JSON API.\n",
|
||||
"\n",
|
||||
"`AstraDBStore` and `AstraDBByteStore` need the `astrapy` package to be installed:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "plaintext"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet astrapy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The Store takes the following parameters:\n",
|
||||
"\n",
|
||||
"* `api_endpoint`: Astra DB API endpoint. Looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n",
|
||||
"* `token`: Astra DB token. Looks like `AstraCS:6gBhNmsk135....`\n",
|
||||
"* `collection_name` : Astra DB collection name\n",
|
||||
"* `namespace`: (Optional) Astra DB namespace"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## AstraDBStore\n",
|
||||
"\n",
|
||||
"The `AstraDBStore` is an implementation of `BaseStore` that stores everything in your DataStax Astra DB instance.\n",
|
||||
"The store keys must be strings and will be mapped to the `_id` field of the Astra DB document.\n",
|
||||
"The store values can be any object that can be serialized by `json.dumps`.\n",
|
||||
"In the database, entries will have the form:\n",
|
||||
"\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"_id\": \"<key>\",\n",
|
||||
" \"value\": <value>\n",
|
||||
"}\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.storage import AstraDBStore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \")\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"store = AstraDBStore(\n",
|
||||
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
|
||||
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
|
||||
" collection_name=\"my_store\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['v1', [0.1, 0.2, 0.3]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"store.mset([(\"k1\", \"v1\"), (\"k2\", [0.1, 0.2, 0.3])])\n",
|
||||
"print(store.mget([\"k1\", \"k2\"]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Usage with CacheBackedEmbeddings\n",
|
||||
"\n",
|
||||
"You may use the `AstraDBStore` in conjunction with a [`CacheBackedEmbeddings`](/docs/modules/data_connection/text_embedding/caching_embeddings) to cache the result of embeddings computations.\n",
|
||||
"Note that `AstraDBStore` stores the embeddings as a list of floats without converting them first to bytes so we don't use `fromByteStore` there."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import CacheBackedEmbeddings, OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = CacheBackedEmbeddings(\n",
|
||||
" underlying_embeddings=OpenAIEmbeddings(), document_embedding_store=store\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## AstraDBByteStore\n",
|
||||
"\n",
|
||||
"The `AstraDBByteStore` is an implementation of `ByteStore` that stores everything in your DataStax Astra DB instance.\n",
|
||||
"The store keys must be strings and will be mapped to the `_id` field of the Astra DB document.\n",
|
||||
"The store `bytes` values are converted to base64 strings for storage into Astra DB.\n",
|
||||
"In the database, entries will have the form:\n",
|
||||
"\n",
|
||||
"```json\n",
|
||||
"{\n",
|
||||
" \"_id\": \"<key>\",\n",
|
||||
" \"value\": \"bytes encoded in base 64\"\n",
|
||||
"}\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.storage import AstraDBByteStore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \")\n",
|
||||
"ASTRA_DB_APPLICATION_TOKEN = getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"store = AstraDBByteStore(\n",
|
||||
" api_endpoint=ASTRA_DB_API_ENDPOINT,\n",
|
||||
" token=ASTRA_DB_APPLICATION_TOKEN,\n",
|
||||
" collection_name=\"my_store\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[b'v1', b'v2']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n",
|
||||
"print(store.mget([\"k1\", \"k2\"]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -10,6 +10,51 @@
|
||||
"which converts text into a vector form represented by numerical values, and is used in text retrieval, information recommendation, knowledge mining and other scenarios."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Deprecated Warning**\n",
|
||||
"\n",
|
||||
"We recommend users using `langchain_community.embeddings.ErnieEmbeddings` \n",
|
||||
"to use `langchain_community.embeddings.QianfanEmbeddingsEndpoint` instead.\n",
|
||||
"\n",
|
||||
"documentation for `QianfanEmbeddingsEndpoint` is [here](./baidu_qianfan_endpoint).\n",
|
||||
"\n",
|
||||
"they are 2 why we recommend users to use `QianfanEmbeddingsEndpoint`:\n",
|
||||
"\n",
|
||||
"1. `QianfanEmbeddingsEndpoint` support more embedding model in the Qianfan platform.\n",
|
||||
"2. `ErnieEmbeddings` is lack of maintenance and deprecated."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Some tips for migration:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.embeddings import QianfanEmbeddingsEndpoint\n",
|
||||
"\n",
|
||||
"embeddings = QianfanEmbeddingsEndpoint(\n",
|
||||
" qianfan_ak=\"your qianfan ak\",\n",
|
||||
" qianfan_sk=\"your qianfan sk\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
||||
@@ -194,6 +194,19 @@
|
||||
"source": [
|
||||
"In retrieval, relative distance matters. In the image above, you can see the difference in similarity scores between the \"relevant doc\" and \"simil stronger delta between the similar query and relevant doc on the latter case."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2e7857e5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Additional Configuraation\n",
|
||||
"\n",
|
||||
"You can pass the following parameters to ChatGoogleGenerativeAI in order to customize the SDK's behavior:\n",
|
||||
"\n",
|
||||
"- `client_options`: [Client Options](https://googleapis.dev/python/google-api-core/latest/client_options.html#module-google.api_core.client_options) to pass to the Google API Client, such as a custom `client_options[\"api_endpoint\"]`\n",
|
||||
"- `transport`: The transport method to use, such as `rest`, `grpc`, or `grpc_asyncio`."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -106,7 +106,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Enter your HF Inference API Key:\n",
|
||||
@@ -148,6 +148,75 @@
|
||||
"query_result = embeddings.embed_query(text)\n",
|
||||
"query_result[:3]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "19ef2d31",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Hugging Face Hub\n",
|
||||
"We can also generate embeddings locally via the Hugging Face Hub package, which requires us to install ``huggingface_hub ``"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "39e85945",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install huggingface_hub"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c78a2779",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.embeddings import HuggingFaceHubEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "116f3ce7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = HuggingFaceHubEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d6f97ee9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text = \"This is a test document.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fb6adc67",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_result = embeddings.embed_query(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1f42c311",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query_result[:3]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
103
docs/docs/integrations/text_embedding/mistralai.ipynb
Normal file
103
docs/docs/integrations/text_embedding/mistralai.ipynb
Normal file
@@ -0,0 +1,103 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b14a24db",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MistralAI\n",
|
||||
"\n",
|
||||
"This notebook explains how to use MistralAIEmbeddings, which is included in the langchain_mistralai package, to embed texts in langchain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "0ab948fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# pip install -U langchain-mistralai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "67c637ca",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## import the library"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "5709b030",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_mistralai import MistralAIEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "1756b1ba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embedding = MistralAIEmbeddings(mistral_api_key=\"your-api-key\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4a2a098d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Using the Embedding Model\n",
|
||||
"With `MistralAIEmbeddings`, you can directly use the default model 'mistral-embed', or set a different one if available."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "584b9af5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embedding.model = \"mistral-embed\" # or your preferred model if available"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "be18b873",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"res_query = embedding.embed_query(\"The test information\")\n",
|
||||
"res_document = embedding.embed_documents([\"test1\", \"another test\"])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -17,7 +17,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -30,13 +30,18 @@
|
||||
"source": [
|
||||
"## Assign Environmental Variables\n",
|
||||
"\n",
|
||||
"The toolkit will read the AMADEUS_CLIENT_ID and AMADEUS_CLIENT_SECRET environmental variables to authenticate the user so you need to set them here. You will also need to set your OPENAI_API_KEY to use the agent later."
|
||||
"The toolkit will read the AMADEUS_CLIENT_ID and AMADEUS_CLIENT_SECRET environmental variables to authenticate the user, so you need to set them here. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-13T17:45:56.531388579Z",
|
||||
"start_time": "2024-01-13T17:45:56.523533018Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set environmental variables here\n",
|
||||
@@ -44,7 +49,6 @@
|
||||
"\n",
|
||||
"os.environ[\"AMADEUS_CLIENT_ID\"] = \"CLIENT_ID\"\n",
|
||||
"os.environ[\"AMADEUS_CLIENT_SECRET\"] = \"CLIENT_SECRET\"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"API_KEY\"\n",
|
||||
"# os.environ[\"AMADEUS_HOSTNAME\"] = \"production\" or \"test\""
|
||||
]
|
||||
},
|
||||
@@ -57,11 +61,39 @@
|
||||
"To start, you need to create the toolkit, so you can access its tools later."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"By default, `AmadeusToolkit` uses `ChatOpenAI` to identify airports closest to a given location. To use it, just set `OPENAI_API_KEY`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-13T17:45:56.557041160Z",
|
||||
"start_time": "2024-01-13T17:45:56.530682481Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"YOUR_OPENAI_KEY\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"tags": [],
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-13T17:45:58.431168124Z",
|
||||
"start_time": "2024-01-13T17:45:56.536269739Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -71,6 +103,35 @@
|
||||
"tools = toolkit.get_tools()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"Alternatively, you can use any LLM supported by langchain, e.g. `HuggingFaceHub`. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.llms import HuggingFaceHub\n",
|
||||
"\n",
|
||||
"os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = \"YOUR_HF_API_TOKEN\"\n",
|
||||
"\n",
|
||||
"llm = HuggingFaceHub(\n",
|
||||
" repo_id=\"tiiuae/falcon-7b-instruct\",\n",
|
||||
" model_kwargs={\"temperature\": 0.5, \"max_length\": 64},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"toolkit_hf = AmadeusToolkit(llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -78,91 +139,76 @@
|
||||
"## Use Amadeus Toolkit within an Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import AgentType, initialize_agent\n",
|
||||
"from langchain_openai import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools=tools,\n",
|
||||
" llm=llm,\n",
|
||||
" verbose=False,\n",
|
||||
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The closest airport to Cali, Colombia is Alfonso Bonilla Aragón International Airport (CLO).'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"tags": [],
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-13T17:46:00.148691365Z",
|
||||
"start_time": "2024-01-13T17:45:59.317173243Z"
|
||||
}
|
||||
],
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(\"What is the name of the airport in Cali, Colombia?\")"
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor, create_react_agent\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The cheapest flight on August 23, 2023 leaving Dallas, Texas before noon to Lincoln, Nebraska has a departure time of 16:42 and a total price of 276.08 EURO.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"tags": [],
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-13T17:46:01.270044101Z",
|
||||
"start_time": "2024-01-13T17:46:00.148988945Z"
|
||||
}
|
||||
],
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"What is the departure time of the cheapest flight on August 23, 2023 leaving Dallas, Texas before noon to Lincoln, Nebraska?\"\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"\n",
|
||||
"prompt = hub.pull(\"hwchase17/react\")\n",
|
||||
"agent = create_react_agent(llm, tools, prompt)\n",
|
||||
"\n",
|
||||
"agent_executor = AgentExecutor(\n",
|
||||
" agent=agent,\n",
|
||||
" tools=tools,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-13T17:46:06.176227412Z",
|
||||
"start_time": "2024-01-13T17:46:01.272468682Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\u001B[32;1m\u001B[1;3mI should use the closest_airport tool to find the airport in Cali, Colombia.\n",
|
||||
"Action: closest_airport\n",
|
||||
"Action Input: location= \"Cali, Colombia\"\u001B[0m\u001B[36;1m\u001B[1;3mcontent='{\\n \"iataCode\": \"CLO\"\\n}'\u001B[0m\u001B[32;1m\u001B[1;3mThe airport in Cali, Colombia is called CLO.\n",
|
||||
"Final Answer: CLO\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The earliest flight on August 23, 2023 leaving Dallas, Texas to Lincoln, Nebraska lands in Lincoln, Nebraska at 16:07.'"
|
||||
]
|
||||
"text/plain": "{'input': 'What is the name of the airport in Cali, Colombia?',\n 'output': 'CLO'}"
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
@@ -170,52 +216,67 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"At what time does earliest flight on August 23, 2023 leaving Dallas, Texas to Lincoln, Nebraska land in Nebraska?\"\n",
|
||||
"agent_executor.invoke({\"input\": \"What is the name of the airport in Cali, Colombia?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"What is the departure time of the cheapest flight on August 23, 2023 leaving Dallas, Texas before noon to Lincoln, Nebraska?\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The cheapest flight between Portland, Oregon to Dallas, TX on October 3, 2023 is a Spirit Airlines flight with a total price of 84.02 EURO and a total travel time of 8 hours and 43 minutes.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"What is the full travel time for the cheapest flight between Portland, Oregon to Dallas, TX on October 3, 2023?\"\n",
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"At what time does earliest flight on August 23, 2023 leaving Dallas, Texas to Lincoln, Nebraska land in Nebraska?\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Dear Paul,\\n\\nI am writing to request that you book the earliest flight from DFW to DCA on Aug 28, 2023. The flight details are as follows:\\n\\nFlight 1: DFW to ATL, departing at 7:15 AM, arriving at 10:25 AM, flight number 983, carrier Delta Air Lines\\nFlight 2: ATL to DCA, departing at 12:15 PM, arriving at 2:02 PM, flight number 759, carrier Delta Air Lines\\n\\nThank you for your help.\\n\\nSincerely,\\nSantiago'"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"Please draft a concise email from Santiago to Paul, Santiago's travel agent, asking him to book the earliest flight from DFW to DCA on Aug 28, 2023. Include all flight details in the email.\"\n",
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"What is the full travel time for the cheapest flight between Portland, Oregon to Dallas, TX on October 3, 2023?\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"Please draft a concise email from Santiago to Paul, Santiago's travel agent, asking him to book the earliest flight from DFW to DCA on Aug 28, 2023. Include all flight details in the email.\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -294,7 +294,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.1"
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,16 +5,25 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MultiOn\n",
|
||||
" \n",
|
||||
"[MultiON](https://www.multion.ai/blog/multion-building-a-brighter-future-for-humanity-with-ai-agents) has built an AI Agent that can interact with a broad array of web services and applications. \n",
|
||||
"\n",
|
||||
"This notebook walks you through connecting LangChain to the `MultiOn` Client in your browser\n",
|
||||
"This notebook walks you through connecting LangChain to the `MultiOn` Client in your browser. \n",
|
||||
"\n",
|
||||
"To use this toolkit, you will need to add `MultiOn Extension` to your browser as explained in the [MultiOn for Chrome](https://multion.notion.site/Download-MultiOn-ddddcfe719f94ab182107ca2612c07a5)."
|
||||
"This enables custom agentic workflow that utilize the power of MultiON agents.\n",
|
||||
" \n",
|
||||
"To use this toolkit, you will need to add `MultiOn Extension` to your browser: \n",
|
||||
"\n",
|
||||
"* Create a [MultiON account](https://app.multion.ai/login?callbackUrl=%2Fprofile). \n",
|
||||
"* Add [MultiOn extension for Chrome](https://multion.notion.site/Download-MultiOn-ddddcfe719f94ab182107ca2612c07a5)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet multion langchain -q"
|
||||
@@ -22,22 +31,43 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"MultionToolkit()"
|
||||
]
|
||||
},
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.agent_toolkits import MultionToolkit\n",
|
||||
"\n",
|
||||
"toolkit = MultionToolkit()\n",
|
||||
"\n",
|
||||
"toolkit"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[MultionCreateSession(), MultionUpdateSession(), MultionCloseSession()]"
|
||||
]
|
||||
},
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"tools"
|
||||
@@ -49,14 +79,24 @@
|
||||
"source": [
|
||||
"## MultiOn Setup\n",
|
||||
"\n",
|
||||
"Once you have created an account, create an API key at https://app.multion.ai/. \n",
|
||||
"\n",
|
||||
"Login to establish connection with your extension."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Logged in.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Authorize connection to your Browser extention\n",
|
||||
"import multion\n",
|
||||
@@ -68,42 +108,98 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use Multion Toolkit within an Agent"
|
||||
"## Use Multion Toolkit within an Agent\n",
|
||||
"\n",
|
||||
"This will use MultiON chrome extension to perform the desired actions.\n",
|
||||
"\n",
|
||||
"We can run the below, and view the [trace](https://smith.langchain.com/public/34aaf36d-204a-4ce3-a54e-4a0976f09670/r) to see:\n",
|
||||
"\n",
|
||||
"* The agent uses the `create_multion_session` tool\n",
|
||||
"* It then uses MultiON to execute the query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import AgentType, initialize_agent\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"from langchain_community.agent_toolkits import MultionToolkit\n",
|
||||
"\n",
|
||||
"toolkit = MultionToolkit()\n",
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"agent = initialize_agent(\n",
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor, create_openai_functions_agent\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Prompt\n",
|
||||
"instructions = \"\"\"You are an assistant.\"\"\"\n",
|
||||
"base_prompt = hub.pull(\"langchain-ai/openai-functions-template\")\n",
|
||||
"prompt = base_prompt.partial(instructions=instructions)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# LLM\n",
|
||||
"llm = ChatOpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Agent\n",
|
||||
"agent = create_openai_functions_agent(llm, toolkit.get_tools(), prompt)\n",
|
||||
"agent_executor = AgentExecutor(\n",
|
||||
" agent=agent,\n",
|
||||
" tools=toolkit.get_tools(),\n",
|
||||
" llm=llm,\n",
|
||||
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
" verbose=True,\n",
|
||||
" verbose=False,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING: 'new_session' is deprecated and will be removed in a future version. Use 'create_session' instead.\n",
|
||||
"WARNING: 'update_session' is deprecated and will be removed in a future version. Use 'step_session' instead.\n",
|
||||
"WARNING: 'update_session' is deprecated and will be removed in a future version. Use 'step_session' instead.\n",
|
||||
"WARNING: 'update_session' is deprecated and will be removed in a future version. Use 'step_session' instead.\n",
|
||||
"WARNING: 'update_session' is deprecated and will be removed in a future version. Use 'step_session' instead.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'Use multion to how AlphaCodium works, a recently released code language model.',\n",
|
||||
" 'output': 'AlphaCodium is a recently released code language model that is designed to assist developers in writing code more efficiently. It is based on advanced machine learning techniques and natural language processing. AlphaCodium can understand and generate code in multiple programming languages, making it a versatile tool for developers.\\n\\nThe model is trained on a large dataset of code snippets and programming examples, allowing it to learn patterns and best practices in coding. It can provide suggestions and auto-complete code based on the context and the desired outcome.\\n\\nAlphaCodium also has the ability to analyze code and identify potential errors or bugs. It can offer recommendations for improving code quality and performance.\\n\\nOverall, AlphaCodium aims to enhance the coding experience by providing intelligent assistance and reducing the time and effort required to write high-quality code.\\n\\nFor more detailed information, you can visit the official AlphaCodium website or refer to the documentation and resources available online.\\n\\nI hope this helps! Let me know if you have any other questions.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 46,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Tweet 'Hi from MultiOn'\")"
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"Use multion to explain how AlphaCodium works, a recently released code language model.\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -123,7 +219,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
187
docs/docs/integrations/toolkits/polygon.ipynb
Normal file
187
docs/docs/integrations/toolkits/polygon.ipynb
Normal file
@@ -0,0 +1,187 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e6fd05db-21c2-4227-9900-0840bc62cb31",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Polygon IO Toolkit\n",
|
||||
"\n",
|
||||
"This notebook shows how to use agents to interact with the [Polygon IO](https://polygon.io/) toolkit. The toolkit provides access to Polygon's Stock Market Data API."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a4da342d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example Use\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c17b33e0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-community > /dev/null"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3cd00ad2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Get your Polygon IO API key [here](https://polygon.io/), and then set it below.\n",
|
||||
"Note that the tool used in this example requires a \"Stocks Advanced\" subscription"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "a180a2b8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"POLYGON_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ed6f89fa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "56670cf6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7d93e6bd-03d7-4d3c-b915-8b73164e2ad8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initializing the agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "648a2cb2-308e-4b2e-9b73-37109be4e258",
|
||||
"metadata": {
|
||||
"is_executing": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor, create_openai_functions_agent\n",
|
||||
"from langchain_community.agent_toolkits.polygon.toolkit import PolygonToolkit\n",
|
||||
"from langchain_community.utilities.polygon import PolygonAPIWrapper\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"\n",
|
||||
"instructions = \"\"\"You are an assistant.\"\"\"\n",
|
||||
"base_prompt = hub.pull(\"langchain-ai/openai-functions-template\")\n",
|
||||
"prompt = base_prompt.partial(instructions=instructions)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "18650040-0ff8-4c0f-a4f2-be6aad7fe63e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"polygon = PolygonAPIWrapper()\n",
|
||||
"toolkit = PolygonToolkit.from_polygon_api_wrapper(polygon)\n",
|
||||
"agent = create_openai_functions_agent(llm, toolkit.get_tools(), prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "fd7463e4-8716-4d1d-860a-770533eaa742",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor = AgentExecutor(\n",
|
||||
" agent=agent,\n",
|
||||
" tools=toolkit.get_tools(),\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "71f05fc9-d80d-4614-b9a3-e0a5e43cbbbb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Get the last price quote for a stock"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b97409f3-dc87-425d-b555-406cf8466a28",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.invoke({\"input\": \"What is the latest stock price for AAPL?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9e666ee1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -7,36 +7,74 @@
|
||||
"source": [
|
||||
"# Robocorp\n",
|
||||
"\n",
|
||||
"This notebook covers how to get started with [Robocorp Action Server](https://github.com/robocorp/robo/tree/master/action_server/docs) action toolkit and LangChain.\n",
|
||||
"This notebook covers how to get started with [Robocorp Action Server](https://github.com/robocorp/robocorp) action toolkit and LangChain.\n",
|
||||
"\n",
|
||||
"## Installation"
|
||||
"Robocorp is the easiest way to extend the capabilities of AI agents, assistants and copilots with custom actions.\n",
|
||||
"\n",
|
||||
"## Installation\n",
|
||||
"\n",
|
||||
"First, see the [Robocorp Quickstart](https://github.com/robocorp/robocorp#quickstart) on how to setup `Action Server` and create your Actions.\n",
|
||||
"\n",
|
||||
"In your LangChain application, install the `langchain-robocorp` package: "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4c3bef91",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install package and Action Server\n",
|
||||
"%pip install --upgrade --quiet langchain-robocorp robocorp-action-server"
|
||||
"# Install package\n",
|
||||
"%pip install --upgrade --quiet langchain-robocorp"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8e2ca5c5",
|
||||
"id": "dd53ad19-4a62-46d1-a2f7-151cfd282590",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Action Server setup\n",
|
||||
"When you create the new `Action Server` following the above quickstart.\n",
|
||||
"\n",
|
||||
"You will need a running instance of Action Server to communicate with from your agent application. You can bootstrap a new project using Action Server `new` command.\n",
|
||||
"It will create a directory with files, including `action.py`.\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"!action-server new\n",
|
||||
"cd ./your-project-name\n",
|
||||
"We can add python function as actions as shown [here](https://github.com/robocorp/robocorp/tree/master/actions#describe-your-action).\n",
|
||||
"\n",
|
||||
"Let's add a dummy function to `action.py`.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"@action\n",
|
||||
"def get_weather_forecast(city: str, days: int, scale: str = \"celsius\") -> str:\n",
|
||||
" \"\"\"\n",
|
||||
" Returns weather conditions forecast for a given city.\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" city (str): Target city to get the weather conditions for\n",
|
||||
" days: How many day forecast to return\n",
|
||||
" scale (str): Temperature scale to use, should be one of \"celsius\" or \"fahrenheit\"\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" str: The requested weather conditions forecast\n",
|
||||
" \"\"\"\n",
|
||||
" return \"75F and sunny :)\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"We then start the server:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"action-server start\n",
|
||||
"```\n"
|
||||
"```\n",
|
||||
"\n",
|
||||
"And we can see: \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"Found new action: get_weather_forecast\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Test locally by going to the server running at `http://localhost:8080` and use the UI to run the function."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -50,17 +88,47 @@
|
||||
"\n",
|
||||
"- `LANGCHAIN_TRACING_V2=true`: To enable LangSmith log run tracing that can also be bind to respective Action Server action run logs. See [LangSmith documentation](https://docs.smith.langchain.com/tracing#log-runs) for more.\n",
|
||||
"\n",
|
||||
"## Usage"
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"We started the local action server, above, running on `http://localhost:8080`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"id": "62e0dbc3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `robocorp_action_server_get_weather_forecast` with `{'city': 'San Francisco', 'days': 1, 'scale': 'fahrenheit'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[33;1m\u001b[1;3m\"75F and sunny :)\"\u001b[0m\u001b[32;1m\u001b[1;3mThe current weather today in San Francisco is 75F and sunny.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'What is the current weather today in San Francisco in fahrenheit?',\n",
|
||||
" 'output': 'The current weather today in San Francisco is 75F and sunny.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.agents import AgentExecutor, OpenAIFunctionsAgent\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
@@ -81,8 +149,7 @@
|
||||
"\n",
|
||||
"executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"executor.invoke(\"What is the current date?\")"
|
||||
"executor.invoke(\"What is the current weather today in San Francisco in fahrenheit?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -92,12 +159,14 @@
|
||||
"source": [
|
||||
"### Single input tools\n",
|
||||
"\n",
|
||||
"By default `toolkit.get_tools()` will return the actions as Structured Tools. To return single input tools, pass a Chat model to be used for processing the inputs."
|
||||
"By default `toolkit.get_tools()` will return the actions as Structured Tools. \n",
|
||||
"\n",
|
||||
"To return single input tools, pass a Chat model to be used for processing the inputs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 9,
|
||||
"id": "1dc7db86",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -124,7 +193,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"\n",
|
||||
"This notebook goes over how to use the `arxiv` tool with an agent. \n",
|
||||
"\n",
|
||||
"First, you need to install `arxiv` python package."
|
||||
"First, you need to install the `arxiv` python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -36,20 +36,18 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import AgentType, initialize_agent, load_tools\n",
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor, create_react_agent, load_tools\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0.0)\n",
|
||||
"tools = load_tools(\n",
|
||||
" [\"arxiv\"],\n",
|
||||
")\n",
|
||||
"prompt = hub.pull(\"hwchase17/react\")\n",
|
||||
"\n",
|
||||
"agent_chain = initialize_agent(\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
" agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
"agent = create_react_agent(llm, tools, prompt)\n",
|
||||
"agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -67,10 +65,9 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mI need to use Arxiv to search for the paper.\n",
|
||||
"Action: Arxiv\n",
|
||||
"Action Input: \"1605.08386\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mPublished: 2016-05-26\n",
|
||||
"\u001b[32;1m\u001b[1;3mI should use the arxiv tool to search for the paper with the given identifier.\n",
|
||||
"Action: arxiv\n",
|
||||
"Action Input: 1605.08386\u001b[0m\u001b[36;1m\u001b[1;3mPublished: 2016-05-26\n",
|
||||
"Title: Heat-bath random walks with Markov bases\n",
|
||||
"Authors: Caprice Stanley, Tobias Windisch\n",
|
||||
"Summary: Graphs on lattice points are studied whose edges come from a finite set of\n",
|
||||
@@ -79,18 +76,15 @@
|
||||
"then study the mixing behaviour of heat-bath random walks on these graphs. We\n",
|
||||
"also state explicit conditions on the set of moves so that the heat-bath random\n",
|
||||
"walk, a generalization of the Glauber dynamics, is an expander in fixed\n",
|
||||
"dimension.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe paper is about heat-bath random walks with Markov bases on graphs of lattice points.\n",
|
||||
"Final Answer: The paper 1605.08386 is about heat-bath random walks with Markov bases on graphs of lattice points.\u001b[0m\n",
|
||||
"dimension.\u001b[0m\u001b[32;1m\u001b[1;3mThe paper \"1605.08386\" is titled \"Heat-bath random walks with Markov bases\" and is authored by Caprice Stanley and Tobias Windisch. It was published on May 26, 2016. The paper discusses the study of graphs on lattice points with edges coming from a finite set of allowed moves. It explores the diameter of these graphs and the mixing behavior of heat-bath random walks on them. The paper also discusses conditions for the heat-bath random walk to be an expander in fixed dimension.\n",
|
||||
"Final Answer: The paper \"1605.08386\" is about heat-bath random walks with Markov bases.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The paper 1605.08386 is about heat-bath random walks with Markov bases on graphs of lattice points.'"
|
||||
]
|
||||
"text/plain": "{'input': \"What's the paper 1605.08386 about?\",\n 'output': 'The paper \"1605.08386\" is about heat-bath random walks with Markov bases.'}"
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
@@ -98,8 +92,10 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_chain.run(\n",
|
||||
" \"What's the paper 1605.08386 about?\",\n",
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"What's the paper 1605.08386 about?\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -130,15 +126,15 @@
|
||||
"id": "c89c110c-96ac-4fe1-ba3e-6056543d1a59",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run a query to get information about some `scientific article`/articles. The query text is limited to 300 characters.\n",
|
||||
"You can use the ArxivAPIWrapper to get information about a scientific article or articles. The query text is limited to 300 characters.\n",
|
||||
"\n",
|
||||
"It returns these article fields:\n",
|
||||
"The ArxivAPIWrapper returns these article fields:\n",
|
||||
"- Publishing date\n",
|
||||
"- Title\n",
|
||||
"- Authors\n",
|
||||
"- Summary\n",
|
||||
"\n",
|
||||
"Next query returns information about one article with arxiv Id equal \"1605.08386\". "
|
||||
"The following query returns information about one article with the arxiv ID \"1605.08386\". "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -151,9 +147,7 @@
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Published: 2016-05-26\\nTitle: Heat-bath random walks with Markov bases\\nAuthors: Caprice Stanley, Tobias Windisch\\nSummary: Graphs on lattice points are studied whose edges come from a finite set of\\nallowed moves of arbitrary length. We show that the diameter of these graphs on\\nfibers of a fixed integer matrix can be bounded from above by a constant. We\\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\\nalso state explicit conditions on the set of moves so that the heat-bath random\\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\\ndimension.'"
|
||||
]
|
||||
"text/plain": "'Published: 2016-05-26\\nTitle: Heat-bath random walks with Markov bases\\nAuthors: Caprice Stanley, Tobias Windisch\\nSummary: Graphs on lattice points are studied whose edges come from a finite set of\\nallowed moves of arbitrary length. We show that the diameter of these graphs on\\nfibers of a fixed integer matrix can be bounded from above by a constant. We\\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\\nalso state explicit conditions on the set of moves so that the heat-bath random\\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\\ndimension.'"
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
@@ -186,9 +180,7 @@
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Published: 2017-10-10\\nTitle: On Mixing Behavior of a Family of Random Walks Determined by a Linear Recurrence\\nAuthors: Caprice Stanley, Seth Sullivant\\nSummary: We study random walks on the integers mod $G_n$ that are determined by an\\ninteger sequence $\\\\{ G_n \\\\}_{n \\\\geq 1}$ generated by a linear recurrence\\nrelation. Fourier analysis provides explicit formulas to compute the\\neigenvalues of the transition matrices and we use this to bound the mixing time\\nof the random walks.\\n\\nPublished: 2016-05-26\\nTitle: Heat-bath random walks with Markov bases\\nAuthors: Caprice Stanley, Tobias Windisch\\nSummary: Graphs on lattice points are studied whose edges come from a finite set of\\nallowed moves of arbitrary length. We show that the diameter of these graphs on\\nfibers of a fixed integer matrix can be bounded from above by a constant. We\\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\\nalso state explicit conditions on the set of moves so that the heat-bath random\\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\\ndimension.\\n\\nPublished: 2003-03-18\\nTitle: Calculation of fluxes of charged particles and neutrinos from atmospheric showers\\nAuthors: V. Plyaskin\\nSummary: The results on the fluxes of charged particles and neutrinos from a\\n3-dimensional (3D) simulation of atmospheric showers are presented. An\\nagreement of calculated fluxes with data on charged particles from the AMS and\\nCAPRICE detectors is demonstrated. Predictions on neutrino fluxes at different\\nexperimental sites are compared with results from other calculations.'"
|
||||
]
|
||||
"text/plain": "'Published: 2017-10-10\\nTitle: On Mixing Behavior of a Family of Random Walks Determined by a Linear Recurrence\\nAuthors: Caprice Stanley, Seth Sullivant\\nSummary: We study random walks on the integers mod $G_n$ that are determined by an\\ninteger sequence $\\\\{ G_n \\\\}_{n \\\\geq 1}$ generated by a linear recurrence\\nrelation. Fourier analysis provides explicit formulas to compute the\\neigenvalues of the transition matrices and we use this to bound the mixing time\\nof the random walks.\\n\\nPublished: 2016-05-26\\nTitle: Heat-bath random walks with Markov bases\\nAuthors: Caprice Stanley, Tobias Windisch\\nSummary: Graphs on lattice points are studied whose edges come from a finite set of\\nallowed moves of arbitrary length. We show that the diameter of these graphs on\\nfibers of a fixed integer matrix can be bounded from above by a constant. We\\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\\nalso state explicit conditions on the set of moves so that the heat-bath random\\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\\ndimension.\\n\\nPublished: 2003-03-18\\nTitle: Calculation of fluxes of charged particles and neutrinos from atmospheric showers\\nAuthors: V. Plyaskin\\nSummary: The results on the fluxes of charged particles and neutrinos from a\\n3-dimensional (3D) simulation of atmospheric showers are presented. An\\nagreement of calculated fluxes with data on charged particles from the AMS and\\nCAPRICE detectors is demonstrated. Predictions on neutrino fluxes at different\\nexperimental sites are compared with results from other calculations.'"
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
@@ -218,9 +210,7 @@
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'No good Arxiv Result was found'"
|
||||
]
|
||||
"text/plain": "'No good Arxiv Result was found'"
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
|
||||
113
docs/docs/integrations/tools/polygon.ipynb
Normal file
113
docs/docs/integrations/tools/polygon.ipynb
Normal file
@@ -0,0 +1,113 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "245a954a",
|
||||
"metadata": {
|
||||
"id": "245a954a"
|
||||
},
|
||||
"source": [
|
||||
"# Polygon Stock Market API\n",
|
||||
"\n",
|
||||
">[Polygon](https://polygon.io/) The Polygon.io Stocks API provides REST endpoints that let you query the latest market data from all US stock exchanges.\n",
|
||||
"\n",
|
||||
"Use the ``PolygonAPIWrapper`` to get stock market data like the latest quote for a ticker."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "34bb5968",
|
||||
"metadata": {
|
||||
"id": "34bb5968",
|
||||
"is_executing": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"POLYGON_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ac4910f8",
|
||||
"metadata": {
|
||||
"id": "ac4910f8",
|
||||
"is_executing": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.tools.polygon.last_quote import PolygonLastQuote\n",
|
||||
"from langchain_community.utilities.polygon import PolygonAPIWrapper"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "84b8f773",
|
||||
"metadata": {
|
||||
"id": "84b8f773"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tool = PolygonLastQuote(api_wrapper=PolygonAPIWrapper())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "068991a6",
|
||||
"metadata": {
|
||||
"id": "068991a6",
|
||||
"outputId": "c5cdc6ec-03cf-4084-cc6f-6ae792d91d39"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'results': {'P': 185.86, 'S': 1, 'T': 'AAPL', 'X': 11, 'i': [604], 'p': 185.81, 'q': 106551669, 's': 2, 't': 1705098436014023700, 'x': 12, 'y': 1705098436014009300, 'z': 3}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tool.run(\"AAPL\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "venv",
|
||||
"language": "python",
|
||||
"display_name": "venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "53f3bc57609c7a84333bb558594977aa5b4026b1d6070b93987956689e367341"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -113,10 +113,63 @@
|
||||
"requests.get(\"https://www.google.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4b0bf1d0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you need the output to be decoded from JSON, you can use the ``JsonRequestsWrapper``."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "3f27ee3d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"Type - <class 'dict'>\n",
|
||||
"\n",
|
||||
"Content: \n",
|
||||
"```\n",
|
||||
"{'count': 5707, 'name': 'jackson', 'age': 38}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.utilities.requests import JsonRequestsWrapper\n",
|
||||
"\n",
|
||||
"requests = JsonRequestsWrapper()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"rval = requests.get(\"https://api.agify.io/?name=jackson\")\n",
|
||||
"\n",
|
||||
"print(\n",
|
||||
" f\"\"\"\n",
|
||||
"\n",
|
||||
"Type - {type(rval)}\n",
|
||||
"\n",
|
||||
"Content: \n",
|
||||
"```\n",
|
||||
"{rval}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\"\"\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3f27ee3d",
|
||||
"id": "52a1aa15",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -138,7 +191,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -132,7 +132,7 @@
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores.azure_cosmos_db_vector_search import (\n",
|
||||
"from langchain_community.vectorstores.azure_cosmos_db import (\n",
|
||||
" AzureCosmosDBVectorSearch,\n",
|
||||
" CosmosDBSimilarityType,\n",
|
||||
")\n",
|
||||
|
||||
@@ -14,6 +14,15 @@
|
||||
"This tutorial illustrates how to work with an end-to-end data and embedding management system in LangChain, and provide scalable semantic search in BigQuery."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This is a **private preview (experimental)** feature. Please submit this\n",
|
||||
"[enrollment form](https://docs.google.com/forms/d/18yndSb4dTf2H0orqA9N7NAchQEDQekwWiD5jYfEkGWk/viewform?edit_requested=true)\n",
|
||||
"if you want to enroll BigQuery Vector Search Experimental."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
@@ -324,6 +333,24 @@
|
||||
"docs = store.similarity_search_by_vector(query_vector, filter={\"len\": 6})\n",
|
||||
"print(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Explore job satistics with BigQuery Job Id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"job_id = \"\" # @param {type:\"string\"}\n",
|
||||
"# Debug and explore the job statistics with a BigQuery Job id.\n",
|
||||
"store.explore_job_stats(job_id)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -75,7 +75,7 @@
|
||||
" )\n",
|
||||
"```\n",
|
||||
"### Authentication\n",
|
||||
"For production, we recommend you run with security enabled. To connect with login credentials, you can use the parameters `api_key` or `es_user` and `es_password`.\n",
|
||||
"For production, we recommend you run with security enabled. To connect with login credentials, you can use the parameters `es_api_key` or `es_user` and `es_password`.\n",
|
||||
"\n",
|
||||
"Example:\n",
|
||||
"```python\n",
|
||||
|
||||
@@ -28,6 +28,9 @@
|
||||
"1. You must install and set up the JaguarDB server and its HTTP gateway server.\n",
|
||||
" Please refer to the instructions in:\n",
|
||||
" [www.jaguardb.com](http://www.jaguardb.com)\n",
|
||||
" For quick setup in docker environment:\n",
|
||||
" docker pull jaguardb/jaguardb_with_http\n",
|
||||
" docker run -d -p 8888:8888 -p 8080:8080 --name jaguardb_with_http jaguardb/jaguardb_with_http\n",
|
||||
"\n",
|
||||
"2. You must install the http client package for JaguarDB:\n",
|
||||
" ```\n",
|
||||
@@ -126,6 +129,8 @@
|
||||
"Add the texts from the text splitter to our vectorstore\n",
|
||||
"\"\"\"\n",
|
||||
"vectorstore.add_documents(docs)\n",
|
||||
"# or tag the documents:\n",
|
||||
"# vectorstore.add_documents(more_docs, text_tag=\"tags to these documents\")\n",
|
||||
"\n",
|
||||
"\"\"\" Get the retriever object \"\"\"\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
|
||||
659
docs/docs/integrations/vectorstores/lantern.ipynb
Normal file
659
docs/docs/integrations/vectorstores/lantern.ipynb
Normal file
@@ -0,0 +1,659 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Lantern\n",
|
||||
"\n",
|
||||
">[Lantern](https://github.com/lanterndata/lantern) is an open-source vector similarity search for `Postgres`\n",
|
||||
"\n",
|
||||
"It supports:\n",
|
||||
"- Exact and approximate nearest neighbor search\n",
|
||||
"- L2 squared distance, hamming distance, and cosine distance\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the Postgres vector database (`Lantern`)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"See the [installation instruction](https://github.com/lanterndata/lantern#-quick-install)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Pip install necessary package\n",
|
||||
"!pip install openai\n",
|
||||
"!pip install psycopg2-binary\n",
|
||||
"!pip install tiktoken"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:02:16.802456Z",
|
||||
"start_time": "2023-09-09T08:02:07.065604Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"OpenAI API Key: ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:02:19.742896Z",
|
||||
"start_time": "2023-09-09T08:02:19.732527Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"False"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"## Loading Environment Variables\n",
|
||||
"from typing import List, Tuple\n",
|
||||
"\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"\n",
|
||||
"load_dotenv()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:02:23.144824Z",
|
||||
"start_time": "2023-09-09T08:02:22.047801Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain_community.vectorstores import Lantern\n",
|
||||
"from langchain_core.documents import Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:02:25.452472Z",
|
||||
"start_time": "2023-09-09T08:02:25.441563Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:02:28.174088Z",
|
||||
"start_time": "2023-09-09T08:02:28.162698Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"DB Connection String: ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Lantern needs the connection string to the database.\n",
|
||||
"# Example postgresql://postgres:postgres@localhost:5432/postgres\n",
|
||||
"CONNECTION_STRING = getpass.getpass(\"DB Connection String:\")\n",
|
||||
"\n",
|
||||
"# # Alternatively, you can create it from environment variables.\n",
|
||||
"# import os\n",
|
||||
"\n",
|
||||
"# CONNECTION_STRING = Lantern.connection_string_from_db_params(\n",
|
||||
"# driver=os.environ.get(\"LANTERN_DRIVER\", \"psycopg2\"),\n",
|
||||
"# host=os.environ.get(\"LANTERN_HOST\", \"localhost\"),\n",
|
||||
"# port=int(os.environ.get(\"LANTERN_PORT\", \"5432\")),\n",
|
||||
"# database=os.environ.get(\"LANTERN_DATABASE\", \"postgres\"),\n",
|
||||
"# user=os.environ.get(\"LANTERN_USER\", \"postgres\"),\n",
|
||||
"# password=os.environ.get(\"LANTERN_PASSWORD\", \"postgres\"),\n",
|
||||
"# )\n",
|
||||
"\n",
|
||||
"# or you can pass it via `LANTERN_CONNECTION_STRING` env variable"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Similarity Search with Cosine Distance (Default)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:04:16.696625Z",
|
||||
"start_time": "2023-09-09T08:02:31.817790Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The Lantern Module will try to create a table with the name of the collection.\n",
|
||||
"# So, make sure that the collection name is unique and the user has the permission to create a table.\n",
|
||||
"\n",
|
||||
"COLLECTION_NAME = \"state_of_the_union_test\"\n",
|
||||
"\n",
|
||||
"db = Lantern.from_documents(\n",
|
||||
" embedding=embeddings,\n",
|
||||
" documents=docs,\n",
|
||||
" collection_name=COLLECTION_NAME,\n",
|
||||
" connection_string=CONNECTION_STRING,\n",
|
||||
" pre_delete_collection=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:05:11.104135Z",
|
||||
"start_time": "2023-09-09T08:05:10.548998Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs_with_score = db.similarity_search_with_score(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:05:13.532334Z",
|
||||
"start_time": "2023-09-09T08:05:13.523191Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Score: 0.18440479\n",
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Score: 0.21727282\n",
|
||||
"A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n",
|
||||
"\n",
|
||||
"And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n",
|
||||
"\n",
|
||||
"We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \n",
|
||||
"\n",
|
||||
"We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n",
|
||||
"\n",
|
||||
"We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n",
|
||||
"\n",
|
||||
"We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Score: 0.22621095\n",
|
||||
"And for our LGBTQ+ Americans, let’s finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \n",
|
||||
"\n",
|
||||
"As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n",
|
||||
"\n",
|
||||
"While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \n",
|
||||
"\n",
|
||||
"And soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \n",
|
||||
"\n",
|
||||
"So tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \n",
|
||||
"\n",
|
||||
"First, beat the opioid epidemic.\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Score: 0.22654456\n",
|
||||
"Tonight, I’m announcing a crackdown on these companies overcharging American businesses and consumers. \n",
|
||||
"\n",
|
||||
"And as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \n",
|
||||
"\n",
|
||||
"That ends on my watch. \n",
|
||||
"\n",
|
||||
"Medicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \n",
|
||||
"\n",
|
||||
"We’ll also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \n",
|
||||
"\n",
|
||||
"Let’s pass the Paycheck Fairness Act and paid leave. \n",
|
||||
"\n",
|
||||
"Raise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \n",
|
||||
"\n",
|
||||
"Let’s increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls America’s best-kept secret: community colleges.\n",
|
||||
"--------------------------------------------------------------------------------\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for doc, score in docs_with_score:\n",
|
||||
" print(\"-\" * 80)\n",
|
||||
" print(\"Score: \", score)\n",
|
||||
" print(doc.page_content)\n",
|
||||
" print(\"-\" * 80)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"## Maximal Marginal Relevance Search (MMR)\n",
|
||||
"Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:05:23.276819Z",
|
||||
"start_time": "2023-09-09T08:05:21.972256Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs_with_score = db.max_marginal_relevance_search_with_score(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-09-09T08:05:27.478580Z",
|
||||
"start_time": "2023-09-09T08:05:27.470138Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Score: 0.18440479\n",
|
||||
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
|
||||
"\n",
|
||||
"Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
|
||||
"\n",
|
||||
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
|
||||
"\n",
|
||||
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Score: 0.23515457\n",
|
||||
"We can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \n",
|
||||
"\n",
|
||||
"I recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \n",
|
||||
"\n",
|
||||
"They were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \n",
|
||||
"\n",
|
||||
"Officer Mora was 27 years old. \n",
|
||||
"\n",
|
||||
"Officer Rivera was 22. \n",
|
||||
"\n",
|
||||
"Both Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \n",
|
||||
"\n",
|
||||
"I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \n",
|
||||
"\n",
|
||||
"I’ve worked on these issues a long time. \n",
|
||||
"\n",
|
||||
"I know what works: Investing in crime prevention and community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety.\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Score: 0.24478757\n",
|
||||
"One was stationed at bases and breathing in toxic smoke from “burn pits” that incinerated wastes of war—medical and hazard material, jet fuel, and more. \n",
|
||||
"\n",
|
||||
"When they came home, many of the world’s fittest and best trained warriors were never the same. \n",
|
||||
"\n",
|
||||
"Headaches. Numbness. Dizziness. \n",
|
||||
"\n",
|
||||
"A cancer that would put them in a flag-draped coffin. \n",
|
||||
"\n",
|
||||
"I know. \n",
|
||||
"\n",
|
||||
"One of those soldiers was my son Major Beau Biden. \n",
|
||||
"\n",
|
||||
"We don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. \n",
|
||||
"\n",
|
||||
"But I’m committed to finding out everything we can. \n",
|
||||
"\n",
|
||||
"Committed to military families like Danielle Robinson from Ohio. \n",
|
||||
"\n",
|
||||
"The widow of Sergeant First Class Heath Robinson. \n",
|
||||
"\n",
|
||||
"He was born a soldier. Army National Guard. Combat medic in Kosovo and Iraq. \n",
|
||||
"\n",
|
||||
"Stationed near Baghdad, just yards from burn pits the size of football fields. \n",
|
||||
"\n",
|
||||
"Heath’s widow Danielle is here with us tonight. They loved going to Ohio State football games. He loved building Legos with their daughter.\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"--------------------------------------------------------------------------------\n",
|
||||
"Score: 0.25137997\n",
|
||||
"And I’m taking robust action to make sure the pain of our sanctions is targeted at Russia’s economy. And I will use every tool at our disposal to protect American businesses and consumers. \n",
|
||||
"\n",
|
||||
"Tonight, I can announce that the United States has worked with 30 other countries to release 60 Million barrels of oil from reserves around the world. \n",
|
||||
"\n",
|
||||
"America will lead that effort, releasing 30 Million barrels from our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, unified with our allies. \n",
|
||||
"\n",
|
||||
"These steps will help blunt gas prices here at home. And I know the news about what’s happening can seem alarming. \n",
|
||||
"\n",
|
||||
"But I want you to know that we are going to be okay. \n",
|
||||
"\n",
|
||||
"When the history of this era is written Putin’s war on Ukraine will have left Russia weaker and the rest of the world stronger. \n",
|
||||
"\n",
|
||||
"While it shouldn’t have taken something so terrible for people around the world to see what’s at stake now everyone sees it clearly.\n",
|
||||
"--------------------------------------------------------------------------------\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for doc, score in docs_with_score:\n",
|
||||
" print(\"-\" * 80)\n",
|
||||
" print(\"Score: \", score)\n",
|
||||
" print(doc.page_content)\n",
|
||||
" print(\"-\" * 80)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Working with vectorstore\n",
|
||||
"\n",
|
||||
"Above, we created a vectorstore from scratch. However, often times we want to work with an existing vectorstore.\n",
|
||||
"In order to do that, we can initialize it directly."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"store = Lantern(\n",
|
||||
" collection_name=COLLECTION_NAME,\n",
|
||||
" connection_string=CONNECTION_STRING,\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Add documents\n",
|
||||
"We can add documents to the existing vectorstore."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['f8164598-aa28-11ee-a037-acde48001122']"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"store.add_documents([Document(page_content=\"foo\")])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs_with_score = db.similarity_search_with_score(\"foo\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(Document(page_content='foo'), -1.1920929e-07)"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs_with_score[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(Document(page_content='And let’s pass the PRO Act when a majority of workers want to form a union—they shouldn’t be stopped. \\n\\nWhen we invest in our workers, when we build the economy from the bottom up and the middle out together, we can do something we haven’t done in a long time: build a better America. \\n\\nFor more than two years, COVID-19 has impacted every decision in our lives and the life of the nation. \\n\\nAnd I know you’re tired, frustrated, and exhausted. \\n\\nBut I also know this. \\n\\nBecause of the progress we’ve made, because of your resilience and the tools we have, tonight I can say \\nwe are moving forward safely, back to more normal routines. \\n\\nWe’ve reached a new moment in the fight against COVID-19, with severe cases down to a level not seen since last July. \\n\\nJust a few days ago, the Centers for Disease Control and Prevention—the CDC—issued new mask guidelines. \\n\\nUnder these new guidelines, most Americans in most of the country can now be mask free.', metadata={'source': '../../modules/state_of_the_union.txt'}),\n",
|
||||
" 0.24038416)"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs_with_score[1]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Overriding a vectorstore\n",
|
||||
"\n",
|
||||
"If you have an existing collection, you override it by doing `from_documents` and setting `pre_delete_collection` = True \n",
|
||||
"This will delete the collection before re-populating it"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = Lantern.from_documents(\n",
|
||||
" documents=docs,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" collection_name=COLLECTION_NAME,\n",
|
||||
" connection_string=CONNECTION_STRING,\n",
|
||||
" pre_delete_collection=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs_with_score = db.similarity_search_with_score(\"foo\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(Document(page_content='And let’s pass the PRO Act when a majority of workers want to form a union—they shouldn’t be stopped. \\n\\nWhen we invest in our workers, when we build the economy from the bottom up and the middle out together, we can do something we haven’t done in a long time: build a better America. \\n\\nFor more than two years, COVID-19 has impacted every decision in our lives and the life of the nation. \\n\\nAnd I know you’re tired, frustrated, and exhausted. \\n\\nBut I also know this. \\n\\nBecause of the progress we’ve made, because of your resilience and the tools we have, tonight I can say \\nwe are moving forward safely, back to more normal routines. \\n\\nWe’ve reached a new moment in the fight against COVID-19, with severe cases down to a level not seen since last July. \\n\\nJust a few days ago, the Centers for Disease Control and Prevention—the CDC—issued new mask guidelines. \\n\\nUnder these new guidelines, most Americans in most of the country can now be mask free.', metadata={'source': '../../modules/state_of_the_union.txt'}),\n",
|
||||
" 0.2403456)"
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs_with_score[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using a VectorStore as a Retriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = store.as_retriever()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"tags=['Lantern', 'OpenAIEmbeddings'] vectorstore=<langchain_community.vectorstores.lantern.Lantern object at 0x11d02f9d0>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(retriever)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -68,7 +68,44 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 19,
|
||||
"id": "0fda552b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Collecting tika\n",
|
||||
" Downloading tika-2.6.0.tar.gz (27 kB)\n",
|
||||
" Preparing metadata (setup.py) ... \u001b[?25ldone\n",
|
||||
"\u001b[?25hRequirement already satisfied: setuptools in /Users/omaraly/anaconda3/lib/python3.11/site-packages (from tika) (68.2.2)\n",
|
||||
"Requirement already satisfied: requests in /Users/omaraly/anaconda3/lib/python3.11/site-packages (from tika) (2.31.0)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /Users/omaraly/anaconda3/lib/python3.11/site-packages (from requests->tika) (2.1.1)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /Users/omaraly/anaconda3/lib/python3.11/site-packages (from requests->tika) (3.4)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/omaraly/anaconda3/lib/python3.11/site-packages (from requests->tika) (1.26.16)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /Users/omaraly/anaconda3/lib/python3.11/site-packages (from requests->tika) (2022.12.7)\n",
|
||||
"Building wheels for collected packages: tika\n",
|
||||
" Building wheel for tika (setup.py) ... \u001b[?25ldone\n",
|
||||
"\u001b[?25h Created wheel for tika: filename=tika-2.6.0-py3-none-any.whl size=32621 sha256=b3f03c9dbd7f347d712c49027704d48f1a368f31560be9b4ee131f79a52e176f\n",
|
||||
" Stored in directory: /Users/omaraly/Library/Caches/pip/wheels/27/ba/2f/37420d1191bdae5e855d69b8e913673045bfd395cbd78ad697\n",
|
||||
"Successfully built tika\n",
|
||||
"Installing collected packages: tika\n",
|
||||
"Successfully installed tika-2.6.0\n",
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install tika"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"id": "920f4644",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -100,7 +137,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 39,
|
||||
"id": "a8c513ab",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -117,7 +154,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 40,
|
||||
"id": "fc516993",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -131,29 +168,37 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Others may not be democratic but nevertheless depend upon a rules-based international system.\n",
|
||||
"6 N A T I O N A L S E C U R I T Y S T R A T E G Y Page 7 \n",
|
||||
"\n",
|
||||
"Yet what we share in common, and the prospect of a freer and more open world, makes such a broad coalition necessary and worthwhile.\n",
|
||||
"This National Security Strategy lays out our plan to achieve a better future of a free, open, secure, and prosperous world.\n",
|
||||
"\n",
|
||||
"We will listen to and consider ideas that our partners suggest about how to do this.\n",
|
||||
"Our strategy is rooted in our national interests: to protect the security of the American people; to expand economic prosperity and opportunity; and to realize and defend the democratic values at the heart of the American way of life.\n",
|
||||
"\n",
|
||||
"Building this inclusive coalition requires reinforcing the multilateral system to uphold the founding principles of the United Nations, including respect for international law.\n",
|
||||
"We can do none of this alone and we do not have to.\n",
|
||||
"\n",
|
||||
"141 countries expressed support at the United Nations General Assembly for a resolution condemning Russia’s unprovoked aggression against Ukraine.\n",
|
||||
"Most nations around the world define their interests in ways that are compatible with ours.\n",
|
||||
"\n",
|
||||
"We continue to demonstrate this approach by engaging all regions across all issues, not in terms of what we are against but what we are for.\n",
|
||||
"We will build the strongest and broadest possible coalition of nations that seek to cooperate with each other, while competing with those powers that offer a darker vision and thwarting their efforts to threaten our interests.\n",
|
||||
"\n",
|
||||
"This year, we partnered with ASEAN to advance clean energy infrastructure and maritime security in the region.\n",
|
||||
"Our Enduring Role The need for a strong and purposeful American role in the world has never been greater.\n",
|
||||
"\n",
|
||||
"We kickstarted the Prosper Africa Build Together Campaign to fuel economic growth across the continent and bolster trade and investment in the clean energy, health, and digital technology sectors.\n",
|
||||
"The world is becoming more divided and unstable.\n",
|
||||
"\n",
|
||||
"We are working to develop a partnership with countries on the Atlantic Ocean to establish and carry out a shared approach to advancing our joint development, economic, environmental, scientific, and maritime governance goals.\n",
|
||||
"Global increases in inflation since the COVID-19 pandemic began have made life more difficult for many.\n",
|
||||
"\n",
|
||||
"We galvanized regional action to address the core challenges facing the Western Hemisphere by spearheading the Americas Partnership for Economic Prosperity to drive economic recovery and by mobilizing the region behind a bold and unprecedented approach to migration through the Los Angeles Declaration on Migration and Protection.\n",
|
||||
"The basic laws and principles governing relations among nations, including the United Nations Charter and the protection it affords all states from being invaded by their neighbors or having their borders redrawn by force, are under attack.\n",
|
||||
"\n",
|
||||
"In the Middle East, we have worked to enhance deterrence toward Iran, de-escalate regional conflicts, deepen integration among a diverse set of partners in the region, and bolster energy stability.\n",
|
||||
"The risk of conflict between major powers is increasing.\n",
|
||||
"\n",
|
||||
"A prime example of an inclusive coalition is IPEF, which we launched alongside a dozen regional partners that represent 40 percent of the world’s GDP.\n"
|
||||
"Democracies and autocracies are engaged in a contest to show which system of governance can best deliver for their people and the world.\n",
|
||||
"\n",
|
||||
"Competition to develop and deploy foundational technologies that will transform our security and economy is intensifying.\n",
|
||||
"\n",
|
||||
"Global cooperation on shared interests has frayed, even as the need for that cooperation takes on existential importance.\n",
|
||||
"\n",
|
||||
"The scale of these changes grows with each passing year, as do the risks of inaction.\n",
|
||||
"\n",
|
||||
"Although the international environment has become more contested, the United States remains the world’s leading power.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -173,7 +218,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 41,
|
||||
"id": "8804a21d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -192,7 +237,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 42,
|
||||
"id": "756a6887",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -251,7 +296,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 43,
|
||||
"id": "9427195f",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -263,10 +308,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"LLMRailsRetriever(tags=None, metadata=None, vectorstore=<langchain_community.vectorstores.llm_rails.LLMRails object at 0x107b9c040>, search_type='similarity', search_kwargs={'k': 5})"
|
||||
"LLMRailsRetriever(vectorstore=<langchain_community.vectorstores.llm_rails.LLMRails object at 0x1235b0e50>)"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -278,7 +323,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 44,
|
||||
"id": "f3c70c31",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
@@ -290,17 +335,21 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='But we will do so as the last resort and only when the objectives and mission are clear and achievable, consistent with our values and laws, alongside non-military tools, and the mission is undertaken with the informed consent of the American people.\\n\\nOur approach to national defense is described in detail in the 2022 National Defense Strategy.\\n\\nOur starting premise is that a powerful U.S. military helps advance and safeguard vital U.S. national interests by backstopping diplomacy, confronting aggression, deterring conflict, projecting strength, and protecting the American people and their economic interests.\\n\\nAmid intensifying competition, the military’s role is to maintain and gain warfighting advantages while limiting those of our competitors.\\n\\nThe military will act urgently to sustain and strengthen deterrence, with the PRC as its pacing challenge.\\n\\nWe will make disciplined choices regarding our national defense and focus our attention on the military’s primary responsibilities: to defend the homeland, and deter attacks and aggression against the United States, our allies and partners, while being prepared to fight and win the Nation’s wars should diplomacy and deterrence fail.\\n\\nTo do so, we will combine our strengths to achieve maximum effect in deterring acts of aggression—an approach we refer to as integrated deterrence (see text box on page 22).\\n\\nWe will operate our military using a campaigning mindset—sequencing logically linked military activities to advance strategy-aligned priorities.\\n\\nAnd, we will build a resilient force and defense ecosystem to ensure we can perform these functions for decades to come.\\n\\nWe ended America’s longest war in Afghanistan, and with it an era of major military operations to remake other societies, even as we have maintained the capacity to address terrorist threats to the American people as they emerge.\\n\\n20 NATIONAL SECURITY STRATEGY Page 21 \\x90\\x90\\x90\\x90\\x90\\x90\\n\\nA combat-credible military is the foundation of deterrence and America’s ability to prevail in conflict.', metadata={'type': 'file', 'url': 'https://cdn.llmrails.com/dst_d94b490c-4638-4247-ad5e-9aa0e7ef53c1/c2d63a2ea3cd406cb522f8312bc1535d', 'name': 'Biden-Harris-Administrations-National-Security-Strategy-10.2022.pdf'})"
|
||||
"[Document(page_content='But we will do so as the last resort and only when the objectives and mission are clear and achievable, consistent with our values and laws, alongside non-military tools, and the mission is undertaken with the informed consent of the American people.\\n\\nOur approach to national defense is described in detail in the 2022 National Defense Strategy.\\n\\nOur starting premise is that a powerful U.S. military helps advance and safeguard vital U.S. national interests by backstopping diplomacy, confronting aggression, deterring conflict, projecting strength, and protecting the American people and their economic interests.\\n\\nAmid intensifying competition, the military’s role is to maintain and gain warfighting advantages while limiting those of our competitors.\\n\\nThe military will act urgently to sustain and strengthen deterrence, with the PRC as its pacing challenge.\\n\\nWe will make disciplined choices regarding our national defense and focus our attention on the military’s primary responsibilities: to defend the homeland, and deter attacks and aggression against the United States, our allies and partners, while being prepared to fight and win the Nation’s wars should diplomacy and deterrence fail.\\n\\nTo do so, we will combine our strengths to achieve maximum effect in deterring acts of aggression—an approach we refer to as integrated deterrence (see text box on page 22).\\n\\nWe will operate our military using a campaigning mindset—sequencing logically linked military activities to advance strategy-aligned priorities.\\n\\nAnd, we will build a resilient force and defense ecosystem to ensure we can perform these functions for decades to come.\\n\\nWe ended America’s longest war in Afghanistan, and with it an era of major military operations to remake other societies, even as we have maintained the capacity to address terrorist threats to the American people as they emerge.\\n\\n20 NATIONAL SECURITY STRATEGY Page 21 \\x90\\x90\\x90\\x90\\x90\\x90\\n\\nA combat-credible military is the foundation of deterrence and America’s ability to prevail in conflict.', metadata={'type': 'file', 'url': 'https://cdn.llmrails.com/dst_466092be-e79a-49f3-b3e6-50e51ddae186/a63892afdee3469d863520351bd5af9f', 'name': 'Biden-Harris-Administrations-National-Security-Strategy-10.2022.pdf', 'filters': {}}),\n",
|
||||
" Document(page_content='Your text here', metadata={'type': 'text', 'url': 'https://cdn.llmrails.com/dst_466092be-e79a-49f3-b3e6-50e51ddae186/63c17ac6395e4be1967c63a16356818e', 'name': '71370a91-7f58-4cc7-b2e7-546325960330', 'filters': {}}),\n",
|
||||
" Document(page_content='Page 1 NATIONAL SECURITY STRATEGY OCTOBER 2022 Page 2 October 12, 2022 From the earliest days of my Presidency, I have argued that our world is at an inflection point.\\n\\nHow we respond to the tremendous challenges and the unprecedented opportunities we face today will determine the direction of our world and impact the security and prosperity of the American people for generations to come.\\n\\nThe 2022 National Security Strategy outlines how my Administration will seize this decisive decade to advance America’s vital interests, position the United States to outmaneuver our geopolitical competitors, tackle shared challenges, and set our world firmly on a path toward a brighter and more hopeful tomorrow.\\n\\nAround the world, the need for American leadership is as great as it has ever been.\\n\\nWe are in the midst of a strategic competition to shape the future of the international order.\\n\\nMeanwhile, shared challenges that impact people everywhere demand increased global cooperation and nations stepping up to their responsibilities at a moment when this has become more difficult.\\n\\nIn response, the United States will lead with our values, and we will work in lockstep with our allies and partners and with all those who share our interests.\\n\\nWe will not leave our future vulnerable to the whims of those who do not share our vision for a world that is free, open, prosperous, and secure.\\n\\nAs the world continues to navigate the lingering impacts of the pandemic and global economic uncertainty, there is no nation better positioned to lead with strength and purpose than the United States of America.\\n\\nFrom the moment I took the oath of office, my Administration has focused on investing in America’s core strategic advantages.\\n\\nOur economy has added 10 million jobs and unemployment rates have reached near record lows.\\n\\nManufacturing jobs have come racing back to the United States.\\n\\nWe’re rebuilding our economy from the bottom up and the middle out.', metadata={'type': 'file', 'url': 'https://cdn.llmrails.com/dst_466092be-e79a-49f3-b3e6-50e51ddae186/a63892afdee3469d863520351bd5af9f', 'name': 'Biden-Harris-Administrations-National-Security-Strategy-10.2022.pdf', 'filters': {}}),\n",
|
||||
" Document(page_content='Your text here', metadata={'type': 'text', 'url': 'https://cdn.llmrails.com/dst_466092be-e79a-49f3-b3e6-50e51ddae186/8c414a9306e04d47a300f0289ba6e9cf', 'name': 'dacc29f5-8c63-46e0-b5aa-cab2d3c99fb7', 'filters': {}}),\n",
|
||||
" Document(page_content='To ensure our nuclear deterrent remains responsive to the threats we face, we are modernizing the nuclear Triad, nuclear command, control, and communications, and our nuclear weapons infrastructure, as well as strengthening our extended deterrence commitments to our Allies.\\n\\nWe remain equally committed to reducing the risks of nuclear war.\\n\\nThis includes taking further steps to reduce the role of nuclear weapons in our strategy and pursuing realistic goals for mutual, verifiable arms control, which contribute to our deterrence strategy and strengthen the global non-proliferation regime.\\n\\nThe most important investments are those made in the extraordinary All-Volunteer Force of the Army, Marine Corps, Navy, Air Force, Space Force, Coast Guard—together with our Department of Defense civilian workforce.\\n\\nOur service members are the backbone of America’s national defense and we are committed to their wellbeing and their families while in service and beyond.\\n\\nWe will maintain our foundational principle of civilian control of the military, recognizing that healthy civil-military relations rooted in mutual respect are essential to military effectiveness.\\n\\nWe will strengthen the effectiveness of the force by promoting diversity and inclusion; intensifying our suicide prevention efforts; eliminating the scourges of sexual assault, harassment, and other forms of violence, abuse, and discrimination; and rooting out violent extremism.\\n\\nWe will also uphold our Nation’s sacred obligation to care for veterans and their families when our troops return home.\\n\\nNATIONAL SECURITY STRATEGY 21 Page 22 \\x90\\x90\\x90\\x90\\x90\\x90\\n\\nIntegrated Deterrence The United States has a vital interest in deterring aggression by the PRC, Russia, and other states.\\n\\nMore capable competitors and new strategies of threatening behavior below and above the traditional threshold of conflict mean we cannot afford to rely solely on conventional forces and nuclear deterrence.\\n\\nOur defense strategy must sustain and strengthen deterrence, with the PRC as our pacing challenge.', metadata={'type': 'file', 'url': 'https://cdn.llmrails.com/dst_466092be-e79a-49f3-b3e6-50e51ddae186/a63892afdee3469d863520351bd5af9f', 'name': 'Biden-Harris-Administrations-National-Security-Strategy-10.2022.pdf', 'filters': {}})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What is your approach to national defense\"\n",
|
||||
"retriever.get_relevant_documents(query)[0]"
|
||||
"retriever.invoke(query)"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -320,7 +369,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -201,6 +201,120 @@
|
||||
"source": [
|
||||
"After retreival you can go on querying it as usual."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Per-User Retrieval\n",
|
||||
"\n",
|
||||
"When building a retrieval app, you often have to build it with multiple users in mind. This means that you may be storing data not just for one user, but for many different users, and they should not be able to see eachother’s data.\n",
|
||||
"\n",
|
||||
"Milvus recommends using [partition_key](https://milvus.io/docs/multi_tenancy.md#Partition-key-based-multi-tenancy) to implement multi-tenancy, here is an example."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"docs = [\n",
|
||||
" Document(page_content=\"i worked at kensho\", metadata={\"namespace\": \"harrison\"}),\n",
|
||||
" Document(page_content=\"i worked at facebook\", metadata={\"namespace\": \"ankush\"}),\n",
|
||||
"]\n",
|
||||
"vectorstore = Milvus.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" connection_args={\"host\": \"127.0.0.1\", \"port\": \"19530\"},\n",
|
||||
" drop_old=True,\n",
|
||||
" partition_key_field=\"namespace\", # Use the \"namespace\" field as the partition key\n",
|
||||
")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"To conduct a search using the partition key, you should include either of the following in the boolean expression of the search request:\n",
|
||||
"\n",
|
||||
"`search_kwargs={\"expr\": '<partition_key> == \"xxxx\"'}`\n",
|
||||
"\n",
|
||||
"`search_kwargs={\"expr\": '<partition_key> == in [\"xxx\", \"xxx\"]'}`\n",
|
||||
"\n",
|
||||
"Do replace `<partition_key>` with the name of the field that is designated as the partition key.\n",
|
||||
"\n",
|
||||
"Milvus changes to a partition based on the specified partition key, filters entities according to the partition key, and searches among the filtered entities.\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%% md\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[Document(page_content='i worked at facebook', metadata={'namespace': 'ankush'})]"
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This will only get documents for Ankush\n",
|
||||
"vectorstore.as_retriever(\n",
|
||||
" search_kwargs={\"expr\": 'namespace == \"ankush\"'}\n",
|
||||
").get_relevant_documents(\"where did i work?\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[Document(page_content='i worked at kensho', metadata={'namespace': 'harrison'})]"
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This will only get documents for Harrison\n",
|
||||
"vectorstore.as_retriever(\n",
|
||||
" search_kwargs={\"expr\": 'namespace == \"harrison\"'}\n",
|
||||
").get_relevant_documents(\"where did i work?\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -224,4 +338,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
@@ -6,7 +6,7 @@
|
||||
"source": [
|
||||
"# PGVecto.rs\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the Postgres vector database ([pgvecto.rs](https://github.com/tensorchord/pgvecto.rs)). You need to install SQLAlchemy >= 2 manually."
|
||||
"This notebook shows how to use functionality related to the Postgres vector database ([pgvecto.rs](https://github.com/tensorchord/pgvecto.rs))."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -15,10 +15,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Loading Environment Variables\n",
|
||||
"from dotenv import load_dotenv\n",
|
||||
"\n",
|
||||
"load_dotenv()"
|
||||
"%pip install \"pgvecto_rs[sdk]\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -32,8 +29,8 @@
|
||||
"from langchain.docstore.document import Document\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores.pgvecto_rs import PGVecto_rs\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_community.embeddings.fake import FakeEmbeddings\n",
|
||||
"from langchain_community.vectorstores.pgvecto_rs import PGVecto_rs"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -42,12 +39,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
"embeddings = FakeEmbeddings(size=3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -176,7 +173,17 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs: List[Document] = db1.similarity_search(query, k=4)"
|
||||
"docs: List[Document] = db1.similarity_search(query, k=4)\n",
|
||||
"for doc in docs:\n",
|
||||
" print(doc.page_content)\n",
|
||||
" print(\"======================\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Similarity Search with Filter"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -185,6 +192,36 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pgvecto_rs.sdk.filters import meta_contains\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs: List[Document] = db1.similarity_search(\n",
|
||||
" query, k=4, filter=meta_contains({\"source\": \"../../modules/state_of_the_union.txt\"})\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for doc in docs:\n",
|
||||
" print(doc.page_content)\n",
|
||||
" print(\"======================\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Or:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs: List[Document] = db1.similarity_search(\n",
|
||||
" query, k=4, filter={\"source\": \"../../modules/state_of_the_union.txt\"}\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for doc in docs:\n",
|
||||
" print(doc.page_content)\n",
|
||||
" print(\"======================\")"
|
||||
@@ -207,7 +244,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
"id": "b823d64a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting Up Your Environment[](https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/rockset#setting-up-environment)\n",
|
||||
"## Setting Up Your Environment\n",
|
||||
"\n",
|
||||
"1. Leverage the `Rockset` console to create a [collection](https://rockset.com/docs/collections/) with the Write API as your source. In this walkthrough, we create a collection named `langchain_demo`. \n",
|
||||
" \n",
|
||||
@@ -249,14 +249,6 @@
|
||||
"\n",
|
||||
"Keep an eye on https://rockset.com/ for future updates in this space."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "054de494-e6c0-453a-becd-ebfb2fdf541a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet surrealdb langchain langchain-community"
|
||||
"# %pip install --upgrade --quiet surrealdb langchain langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -54,6 +54,19 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "1c2d942d-5d90-4f9f-af96-dff976e4510f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# add this import for running in jupyter notebook\n",
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "e49be085-ddf1-4028-8c0c-97836ce4a873",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -68,7 +81,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "38222aee-adc5-44c2-913c-97977b394cf5",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -92,28 +105,28 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "ff9d0304-1e11-4db2-9454-1350db7907e6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['documents:th7j29cjsx6495wluo7e',\n",
|
||||
" 'documents:qkqhhjnl7ahbhr07euky',\n",
|
||||
" 'documents:8kd6xw8o7y0l171iqry0',\n",
|
||||
" 'documents:33ejf42dlkmavol9si74',\n",
|
||||
" 'documents:f7y4dbs7eitqz58xt1p5']"
|
||||
"['documents:38hz49bv1p58f5lrvrdc',\n",
|
||||
" 'documents:niayw63vzwm2vcbh6w2s',\n",
|
||||
" 'documents:it1fa3ktplbuye43n0ch',\n",
|
||||
" 'documents:il8f7vgbbp9tywmsn98c',\n",
|
||||
" 'documents:vza4c6cqje0avqd58gal']"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db = SurrealDBStore(\n",
|
||||
" dburl=\"http://localhost:8000/rpc\", # url for the hosted SurrealDB database\n",
|
||||
" dburl=\"ws://localhost:8000/rpc\", # url for the hosted SurrealDB database\n",
|
||||
" embedding_function=embeddings,\n",
|
||||
" db_user=\"root\", # SurrealDB credentials if needed: db username\n",
|
||||
" db_pass=\"root\", # SurrealDB credentials if needed: db password\n",
|
||||
@@ -145,7 +158,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "73d66563-4e1f-4edf-9e95-5fc9adcfa2cb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -153,7 +166,7 @@
|
||||
"await db.adelete()\n",
|
||||
"\n",
|
||||
"db = await SurrealDBStore.afrom_documents(\n",
|
||||
" dburl=\"http://localhost:8000/rpc\", # url for the hosted SurrealDB database\n",
|
||||
" dburl=\"ws://localhost:8000/rpc\", # url for the hosted SurrealDB database\n",
|
||||
" embedding=embeddings,\n",
|
||||
" documents=docs,\n",
|
||||
" db_user=\"root\", # SurrealDB credentials if needed: db username\n",
|
||||
@@ -174,7 +187,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "aa28a7f8-41d0-4299-84eb-91d1576e8a63",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -187,7 +200,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "1eb16d2a-b466-456a-b412-5e74bb8523dd",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -229,7 +242,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"id": "8e9eef05-1516-469a-ad36-880c69aef7a9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -241,7 +254,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 9,
|
||||
"id": "bd5fb0e4-2a94-4bb4-af8a-27327ecb1a7f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -250,11 +263,11 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'id': 'documents:639m99rzwqlm9imcwg13'}),\n",
|
||||
" 0.39839545290036454)"
|
||||
"(Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'id': 'documents:slgdlhjkfknhqo15xz0w', 'source': '../../modules/state_of_the_union.txt'}),\n",
|
||||
" 0.39839531721941895)"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -280,7 +293,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
248
docs/docs/integrations/vectorstores/vikingdb.ipynb
Normal file
248
docs/docs/integrations/vectorstores/vikingdb.ipynb
Normal file
@@ -0,0 +1,248 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "96ff9e912bfe9d8",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"# viking DB\n",
|
||||
"\n",
|
||||
">[viking DB](https://www.volcengine.com/docs/6459/1163946) is a database that stores, indexes, and manages massive embedding vectors generated by deep neural networks and other machine learning (ML) models.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the VikingDB vector database.\n",
|
||||
"\n",
|
||||
"To run, you should have a [viking DB instance up and running](https://www.volcengine.com/docs/6459/1165058).\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dd771e02d8a93a0",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install --upgrade volcengine"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "12719205caed0d18",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"We want to use VikingDBEmbeddings so we have to get the VikingDB API Key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "fbfb32665b4a3640",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-12-21T09:53:24.186916Z",
|
||||
"start_time": "2023-12-21T09:53:24.179524Z"
|
||||
},
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d8c983d329237fa4",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain.vectorstores.vikingdb import VikingDB, VikingDBConfig\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1a4aea2eaeb2261",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TextLoader(\"./test.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=10, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bfd593f3deabfaf8",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = VikingDB.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" connection_args=VikingDBConfig(\n",
|
||||
" host=\"host\", region=\"region\", ak=\"ak\", sk=\"sk\", scheme=\"http\"\n",
|
||||
" ),\n",
|
||||
" drop_old=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "50e6ee12ca7eec39",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-12-21T10:01:47.355894Z",
|
||||
"start_time": "2023-12-21T10:01:47.334789Z"
|
||||
},
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"docs = db.similarity_search(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "b6b81f5995c79ef0",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-12-21T10:01:47.771478Z",
|
||||
"start_time": "2023-12-21T10:01:47.731485Z"
|
||||
},
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a2d932c1290478ee",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"### Compartmentalize the data with viking DB Collections\n",
|
||||
"\n",
|
||||
"You can store different unrelated documents in different collections within same viking DB instance to maintain the context"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "907de4eb10626d2a",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"Here's how you can create a new collection"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4f5a59ba40f7985f",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = VikingDB.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" connection_args=VikingDBConfig(\n",
|
||||
" host=\"host\", region=\"region\", ak=\"ak\", sk=\"sk\", scheme=\"http\"\n",
|
||||
" ),\n",
|
||||
" collection_name=\"collection_1\",\n",
|
||||
" drop_old=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7c8eada37b17d992",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"And here is how you retrieve that stored collection"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "883ec678d47c9adc",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db = VikingDB.from_documents(\n",
|
||||
" embeddings,\n",
|
||||
" connection_args=VikingDBConfig(\n",
|
||||
" host=\"host\", region=\"region\", ak=\"ak\", sk=\"sk\", scheme=\"http\"\n",
|
||||
" ),\n",
|
||||
" collection_name=\"collection_1\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2f0be30cfe70083d",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"After retreival you can go on querying it as usual."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -30,13 +30,12 @@ Whether this agent requires the model to support any additional parameters. Some
|
||||
|
||||
Our commentary on when you should consider using this agent type.
|
||||
|
||||
| Agent Type | Intended Model Type | Supports Chat History | Supports Multi-Input Tools | Supports Parallel Function Calling | Required Model Params | When to Use |
|
||||
|--------------------------------------------|---------------------|-----------------------|----------------------------|-------------------------------------|----------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| [OpenAI Tools](./openai_tools) | Chat | ✅ | ✅ | ✅ | `tools` | If you are using a recent OpenAI model (`1106` onwards) |
|
||||
| [OpenAI Functions](./openai_functions_agent)| Chat | ✅ | ✅ | | `functions` | If you are using an OpenAI model, or an open-source model that has been finetuned for function calling and exposes the same `functions` parameters as OpenAI |
|
||||
| [OpenAI Assistants](./openai_assistants) | Chat | ✅ | ✅ | ✅ | `tools` | If you want to use the beta OpenAI assistants API. |
|
||||
| [XML](./xml_agent) | LLM | ✅ | | | | If you are using Anthropic models, or other models good at XML |
|
||||
| [Structured Chat](./structured_chat) | Chat | ✅ | ✅ | | | If you need to support tools with multiple inputs |
|
||||
| [JSON Chat](./json_agent) | Chat | ✅ | | | | If you are using a model good at JSON |
|
||||
| [ReAct](./react) | LLM | ✅ | | | | If you are using a simple model |
|
||||
| [Self Ask With Search](./self_ask_with_search)| LLM | | | | | If you are using a simple model and only have one search tool |
|
||||
| Agent Type | Intended Model Type | Supports Chat History | Supports Multi-Input Tools | Supports Parallel Function Calling | Required Model Params | When to Use | API |
|
||||
|--------------------------------------------|---------------------|-----------------------|----------------------------|-------------------------------------|----------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------|
|
||||
| [OpenAI Tools](./openai_tools) | Chat | ✅ | ✅ | ✅ | `tools` | If you are using a recent OpenAI model (`1106` onwards) | [Ref](https://api.python.langchain.com/en/latest/agents/langchain.agents.openai_tools.base.create_openai_tools_agent.html) |
|
||||
| [OpenAI Functions](./openai_functions_agent)| Chat | ✅ | ✅ | | `functions` | If you are using an OpenAI model, or an open-source model that has been finetuned for function calling and exposes the same `functions` parameters as OpenAI | [Ref](https://api.python.langchain.com/en/latest/agents/langchain.agents.openai_functions_agent.base.create_openai_functions_agent.html) |
|
||||
| [XML](./xml_agent) | LLM | ✅ | | | | If you are using Anthropic models, or other models good at XML | [Ref](https://api.python.langchain.com/en/latest/agents/langchain.agents.xml.base.create_xml_agent.html) |
|
||||
| [Structured Chat](./structured_chat) | Chat | ✅ | ✅ | | | If you need to support tools with multiple inputs | [Ref](https://api.python.langchain.com/en/latest/agents/langchain.agents.structured_chat.base.create_structured_chat_agent.html) |
|
||||
| [JSON Chat](./json_agent) | Chat | ✅ | | | | If you are using a model good at JSON | [Ref](https://api.python.langchain.com/en/latest/agents/langchain.agents.json_chat.base.create_json_chat_agent.html) |
|
||||
| [ReAct](./react) | LLM | ✅ | | | | If you are using a simple model | [Ref](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html) |
|
||||
| [Self Ask With Search](./self_ask_with_search)| LLM | | | | | If you are using a simple model and only have one search tool | [Ref](https://api.python.langchain.com/en/latest/agents/langchain.agents.self_ask_with_search.base.create_self_ask_with_search_agent.html) |
|
||||
@@ -19,9 +19,27 @@
|
||||
"\n",
|
||||
"Certain OpenAI models (like gpt-3.5-turbo-0613 and gpt-4-0613) have been fine-tuned to detect when a function should be called and respond with the inputs that should be passed to the function. In an API call, you can describe functions and have the model intelligently choose to output a JSON object containing arguments to call those functions. The goal of the OpenAI Function APIs is to more reliably return valid and useful function calls than a generic text completion or chat API.\n",
|
||||
"\n",
|
||||
"A number of open source models have adopted the same format for function calls and have also fine-tuned the model to detect when a function should be called.\n",
|
||||
"\n",
|
||||
"The OpenAI Functions Agent is designed to work with these models.\n",
|
||||
"\n",
|
||||
"Install `openai`, `tavily-python` packages which are required as the LangChain packages call them internally."
|
||||
"Install `openai`, `tavily-python` packages which are required as the LangChain packages call them internally.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
":::info\n",
|
||||
"\n",
|
||||
"OpenAI API has deprecated `functions` in favor of `tools`. The difference between the two is that the `tools` API allows the model to request that multiple functions be invoked at once, which can reduce response times in some architectures. It's recommended to use the tools agent for OpenAI models.\n",
|
||||
"\n",
|
||||
"See the following links for more information:\n",
|
||||
"\n",
|
||||
"[OpenAI chat create](https://platform.openai.com/docs/api-reference/chat/create)\n",
|
||||
"\n",
|
||||
"[OpenAI function calling](https://platform.openai.com/docs/guides/function-calling)\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
":::tip\n",
|
||||
"The `functions` format remains relevant for open source models and providers that have adopted it, and this agent is expected to work for such models.\n",
|
||||
":::\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -260,7 +278,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -17,9 +17,26 @@
|
||||
"source": [
|
||||
"# OpenAI tools\n",
|
||||
"\n",
|
||||
"Certain OpenAI models have been finetuned to work with with **tool calling**. This is very similar but different from **function calling**, and thus requires a separate agent type.\n",
|
||||
"Newer OpenAI models have been fine-tuned to detect when **one or more** function(s) should be called and respond with the inputs that should be passed to the function(s). In an API call, you can describe functions and have the model intelligently choose to output a JSON object containing arguments to call these functions. The goal of the OpenAI tools APIs is to more reliably return valid and useful function calls than what can be done using a generic text completion or chat API.\n",
|
||||
"\n",
|
||||
"The key difference between tools and functions lies in their availability and application within the chat environment. **Tools** are specialized capabilities directly accessible in the chat interface, designed for interactive and immediate use, such as generating images or executing code. In contrast, **functions** are more about invoking specific computational tasks or algorithms, typically as part of a larger process or application. This distinction underscores the tailored nature of each tool and function, ensuring they are optimally suited for their respective roles within the chat environment."
|
||||
"OpenAI termed the capability to invoke a **single** function as **functions**, and the capability to invoke **one or more** funcitons as **tools**.\n",
|
||||
"\n",
|
||||
":::tip\n",
|
||||
"\n",
|
||||
"In the OpenAI Chat API, **functions** are now considered a legacy options that is deprecated in favor of **tools**.\n",
|
||||
"\n",
|
||||
"If you're creating agents using OpenAI models, you should be using this OpenAI Tools agent rather than the OpenAI functions agent.\n",
|
||||
"\n",
|
||||
"Using **tools** allows the model to request that more than one function will be called upon when appropriate. \n",
|
||||
"\n",
|
||||
"In some situations, this can help signficantly reduce the time that it takes an agent to achieve its goal.\n",
|
||||
"\n",
|
||||
"See \n",
|
||||
" \n",
|
||||
"* [OpenAI chat create](https://platform.openai.com/docs/api-reference/chat/create) \n",
|
||||
"* [OpenAI function calling](https://platform.openai.com/docs/guides/function-calling)\n",
|
||||
"\n",
|
||||
":::"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -130,14 +147,14 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\u001B[32;1m\u001B[1;3m\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `tavily_search_results_json` with `{'query': 'LangChain'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001B[0m\u001B[36;1m\u001B[1;3m[{'url': 'https://www.ibm.com/topics/langchain', 'content': 'LangChain is essentially a library of abstractions for Python and Javascript, representing common steps and concepts LangChain is an open source orchestration framework for the development of applications using large language models other LangChain features, like the eponymous chains. LangChain provides integrations for over 25 different embedding methods, as well as for over 50 different vector storesLangChain is a tool for building applications using large language models (LLMs) like chatbots and virtual agents. It simplifies the process of programming and integration with external data sources and software workflows. It supports Python and Javascript languages and supports various LLM providers, including OpenAI, Google, and IBM.'}]\u001B[0m\u001B[32;1m\u001B[1;3mLangChain is an open source orchestration framework for the development of applications using large language models. It is essentially a library of abstractions for Python and Javascript, representing common steps and concepts. LangChain simplifies the process of programming and integration with external data sources and software workflows. It supports various large language model providers, including OpenAI, Google, and IBM. You can find more information about LangChain on the IBM website: [LangChain - IBM](https://www.ibm.com/topics/langchain)\u001B[0m\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m[{'url': 'https://www.ibm.com/topics/langchain', 'content': 'LangChain is essentially a library of abstractions for Python and Javascript, representing common steps and concepts LangChain is an open source orchestration framework for the development of applications using large language models other LangChain features, like the eponymous chains. LangChain provides integrations for over 25 different embedding methods, as well as for over 50 different vector storesLangChain is a tool for building applications using large language models (LLMs) like chatbots and virtual agents. It simplifies the process of programming and integration with external data sources and software workflows. It supports Python and Javascript languages and supports various LLM providers, including OpenAI, Google, and IBM.'}]\u001b[0m\u001b[32;1m\u001b[1;3mLangChain is an open source orchestration framework for the development of applications using large language models. It is essentially a library of abstractions for Python and Javascript, representing common steps and concepts. LangChain simplifies the process of programming and integration with external data sources and software workflows. It supports various large language model providers, including OpenAI, Google, and IBM. You can find more information about LangChain on the IBM website: [LangChain - IBM](https://www.ibm.com/topics/langchain)\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n"
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -176,10 +193,10 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\u001B[32;1m\u001B[1;3mYour name is Bob.\u001B[0m\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mYour name is Bob.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n"
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -235,7 +252,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user