mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-05 16:50:03 +00:00
Compare commits
282 Commits
erick/test
...
wfh/log_er
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f1f46afbd8 | ||
|
|
81cbf0f2fd | ||
|
|
2619420df1 | ||
|
|
fb686333ac | ||
|
|
ea141511d8 | ||
|
|
97de498d39 | ||
|
|
32db9e74e4 | ||
|
|
5985454269 | ||
|
|
9a6f7e213b | ||
|
|
b3a0c44838 | ||
|
|
68fc0cf909 | ||
|
|
5b92f962f1 | ||
|
|
15b1770326 | ||
|
|
bb284eebe4 | ||
|
|
691480f491 | ||
|
|
52ac67c5d8 | ||
|
|
b9c0cf9025 | ||
|
|
aa7ac57b67 | ||
|
|
302985fea1 | ||
|
|
ed36f9f604 | ||
|
|
f414f5cdb9 | ||
|
|
4cbfeeb1c2 | ||
|
|
b9f3c7a0c9 | ||
|
|
a4a6978224 | ||
|
|
1100f8de7a | ||
|
|
2d96803ddd | ||
|
|
ae167fb5b2 | ||
|
|
623dfcc55c | ||
|
|
20794bb889 | ||
|
|
81985b31e6 | ||
|
|
f3e28289f6 | ||
|
|
114d64d4a7 | ||
|
|
7d6de96186 | ||
|
|
a6b5d45e31 | ||
|
|
d7dd3cd248 | ||
|
|
29f1619d61 | ||
|
|
ee7a7954b9 | ||
|
|
8bc347c5fc | ||
|
|
080904689c | ||
|
|
dc81dba6cf | ||
|
|
f77f7dc3ec | ||
|
|
539a13dbda | ||
|
|
ad48f55357 | ||
|
|
60c5d964a8 | ||
|
|
f4bb33bbf3 | ||
|
|
81e9ab6e3a | ||
|
|
6a08134661 | ||
|
|
d039dcb6ba | ||
|
|
1569b19191 | ||
|
|
27441555d0 | ||
|
|
e169ee8863 | ||
|
|
1831733c2e | ||
|
|
bd4993141d | ||
|
|
4570b477b9 | ||
|
|
ea51cdaede | ||
|
|
a2779738aa | ||
|
|
e1924b3e93 | ||
|
|
96cd50938a | ||
|
|
fc35262356 | ||
|
|
48e303ea10 | ||
|
|
9da1e0cf34 | ||
|
|
f92f7d2e03 | ||
|
|
449d8781ec | ||
|
|
353248838d | ||
|
|
c8a171a154 | ||
|
|
04d134df17 | ||
|
|
07f23c2d45 | ||
|
|
4ac2cb4adc | ||
|
|
5fc67ca2c7 | ||
|
|
68c1878380 | ||
|
|
eb0756f3ee | ||
|
|
25c7d52140 | ||
|
|
343438e872 | ||
|
|
ca1d42785d | ||
|
|
328a498a78 | ||
|
|
10874d5002 | ||
|
|
dd07eddf24 | ||
|
|
30ccc009e6 | ||
|
|
72ae744588 | ||
|
|
7803b973c7 | ||
|
|
5c387a173f | ||
|
|
63702a2044 | ||
|
|
8461700738 | ||
|
|
6c9177681d | ||
|
|
1c1a3a7415 | ||
|
|
a727eec6ed | ||
|
|
24f9c700f2 | ||
|
|
172499404a | ||
|
|
de2d9447c6 | ||
|
|
1cdb813196 | ||
|
|
1eec67e8fe | ||
|
|
55b69d5ad1 | ||
|
|
73d653324f | ||
|
|
b051bba1a9 | ||
|
|
b89d9fc177 | ||
|
|
7ce2f32c64 | ||
|
|
a63cee04ac | ||
|
|
275877980e | ||
|
|
67375e96e0 | ||
|
|
2087cbae64 | ||
|
|
eb04d0d3e2 | ||
|
|
371bec79bc | ||
|
|
7c2f3f6f95 | ||
|
|
db47b5deee | ||
|
|
74f3908182 | ||
|
|
bc768a12ed | ||
|
|
f96dd57501 | ||
|
|
1fd1ac8e95 | ||
|
|
44b33fcc76 | ||
|
|
57c733e560 | ||
|
|
9fda6ac7e6 | ||
|
|
50abeb7ed9 | ||
|
|
d722525c70 | ||
|
|
b7c71e2e07 | ||
|
|
c3c987dd70 | ||
|
|
9298a0b941 | ||
|
|
c2b1abe91b | ||
|
|
c9153a3fd4 | ||
|
|
f6bfb969ba | ||
|
|
e6e60e2492 | ||
|
|
4adac20d7b | ||
|
|
d883fd4a37 | ||
|
|
6c1989d292 | ||
|
|
a5ccf5d33c | ||
|
|
3438d2cbcc | ||
|
|
08fa38d56d | ||
|
|
43e3244573 | ||
|
|
9f2ab37162 | ||
|
|
e2b901c35b | ||
|
|
b9416dc96a | ||
|
|
1701f7b8e9 | ||
|
|
58a2abf089 | ||
|
|
98380cff9b | ||
|
|
e080281623 | ||
|
|
177f51c7bd | ||
|
|
42341bc787 | ||
|
|
31b4e78174 | ||
|
|
dd6f85caf1 | ||
|
|
e192f6b6eb | ||
|
|
7d962278f6 | ||
|
|
69be82c86d | ||
|
|
c54d6eb5da | ||
|
|
d937fa4f9c | ||
|
|
6afb135baa | ||
|
|
b641be2edf | ||
|
|
760a16ff32 | ||
|
|
cbb65741a7 | ||
|
|
ae471a7dcb | ||
|
|
f61cb8d407 | ||
|
|
a89f007947 | ||
|
|
6d0af4e805 | ||
|
|
8bafd2df5e | ||
|
|
2b93206f02 | ||
|
|
1deb8cadd5 | ||
|
|
9d663f31fa | ||
|
|
590d47bff4 | ||
|
|
3c8a115e21 | ||
|
|
4730ee2766 | ||
|
|
12f19b8a6a | ||
|
|
1317578ad1 | ||
|
|
f220af3dce | ||
|
|
0d7fb5f60a | ||
|
|
51b661cfe8 | ||
|
|
5efb5c099f | ||
|
|
7891934173 | ||
|
|
fdab931fd3 | ||
|
|
c7d5ed6f5c | ||
|
|
f481cbb32d | ||
|
|
eefb49680f | ||
|
|
11cb42c2c1 | ||
|
|
bce0684327 | ||
|
|
7bbff98dc7 | ||
|
|
4e27e66938 | ||
|
|
72bfc1d3db | ||
|
|
412148773c | ||
|
|
df234fb171 | ||
|
|
040271f33a | ||
|
|
87dca8e477 | ||
|
|
dfd9787388 | ||
|
|
9e46535ebc | ||
|
|
5999c4a240 | ||
|
|
15d1b73a00 | ||
|
|
a6f0506aaf | ||
|
|
6782dac420 | ||
|
|
4c62362eab | ||
|
|
cd926ac3dd | ||
|
|
d43fa2eab1 | ||
|
|
68be5a7658 | ||
|
|
43534a4c08 | ||
|
|
6a5b084704 | ||
|
|
68ad3414a2 | ||
|
|
8af4425abd | ||
|
|
1b63530274 | ||
|
|
1d865a7e86 | ||
|
|
0486404a74 | ||
|
|
5ee76fccd5 | ||
|
|
eb0c178d75 | ||
|
|
9bf58ec7dd | ||
|
|
8a81fcd5d3 | ||
|
|
6d863bed51 | ||
|
|
4899a72b56 | ||
|
|
2c42f3a955 | ||
|
|
6b58943917 | ||
|
|
ca4f5e2408 | ||
|
|
23722e3653 | ||
|
|
cd52433ba0 | ||
|
|
a37dc83a9e | ||
|
|
af35e2525a | ||
|
|
b9a495e56e | ||
|
|
6da08d0f22 | ||
|
|
d9fd1194f5 | ||
|
|
7ac74f291e | ||
|
|
b4f6066a57 | ||
|
|
e3211c2b3d | ||
|
|
92c34d4803 | ||
|
|
2e31f1c2f8 | ||
|
|
db643f6283 | ||
|
|
76eb553084 | ||
|
|
d7a77054ed | ||
|
|
be8d2ff5f7 | ||
|
|
ac1d7d9de8 | ||
|
|
a99eb3abf4 | ||
|
|
733367b795 | ||
|
|
b0ccaf5917 | ||
|
|
242af4b5a4 | ||
|
|
7e66d964c6 | ||
|
|
d7c607ca00 | ||
|
|
b3f4de38ae | ||
|
|
c53aa5cd37 | ||
|
|
c673717c2b | ||
|
|
5ab69f907f | ||
|
|
a4896da2a0 | ||
|
|
ce682f5a09 | ||
|
|
9b8f6455b1 | ||
|
|
1e8ab83d7b | ||
|
|
3589a135ef | ||
|
|
69344a0661 | ||
|
|
257879e98d | ||
|
|
28cf3aab45 | ||
|
|
166f3d8351 | ||
|
|
e867557936 | ||
|
|
23fc7c8c90 | ||
|
|
767523f364 | ||
|
|
96bff0ed5d | ||
|
|
cd3ab3703b | ||
|
|
62a30efb12 | ||
|
|
f5cf6975ba | ||
|
|
b1d9ce541d | ||
|
|
935aefa8db | ||
|
|
719a1cde75 | ||
|
|
2716d58603 | ||
|
|
9147a437f1 | ||
|
|
e3b7779926 | ||
|
|
248c5b84ee | ||
|
|
3b5bdbfee8 | ||
|
|
a2d5fa7649 | ||
|
|
a01e8473f8 | ||
|
|
4d6cd5b46a | ||
|
|
e42110f720 | ||
|
|
5afb242161 | ||
|
|
3b08617a89 | ||
|
|
17ecf6e119 | ||
|
|
cc69976860 | ||
|
|
b8b5ce0c8c | ||
|
|
c06a8732aa | ||
|
|
5d2d80a9a8 | ||
|
|
c9eac3287e | ||
|
|
7fc903464a | ||
|
|
3f6bf852ea | ||
|
|
56b955fc31 | ||
|
|
68527b809d | ||
|
|
1c1bb1152e | ||
|
|
e85948d46b | ||
|
|
e566a3077e | ||
|
|
1a3383fba1 | ||
|
|
a05fb19f42 | ||
|
|
e8be34f8c7 | ||
|
|
ee6a773456 | ||
|
|
11cf95e810 | ||
|
|
9ebbca3695 | ||
|
|
b948f6da67 | ||
|
|
22b964f802 |
10
.github/DISCUSSION_TEMPLATE/q-a.yml
vendored
10
.github/DISCUSSION_TEMPLATE/q-a.yml
vendored
@@ -3,18 +3,18 @@ body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for your interest in 🦜️🔗 LangChain!
|
||||
Thanks for your interest in LangChain 🦜️🔗!
|
||||
|
||||
Please follow these instructions, fill every question, and do every step. 🙏
|
||||
|
||||
We're asking for this because answering questions and solving problems in GitHub takes a lot of time --
|
||||
this is time that we cannot spend on adding new features, fixing bugs, write documentation or reviewing pull requests.
|
||||
this is time that we cannot spend on adding new features, fixing bugs, writing documentation or reviewing pull requests.
|
||||
|
||||
By asking questions in a structured way (following this) it will be much easier to help you.
|
||||
By asking questions in a structured way (following this) it will be much easier for us to help you.
|
||||
|
||||
And there's a high chance that you will find the solution along the way and you won't even have to submit it and wait for an answer. 😎
|
||||
There's a high chance that by following this process, you'll find the solution on your own, eliminating the need to submit a question and wait for an answer. 😎
|
||||
|
||||
As there are too many questions, we will **DISCARD** and close the incomplete ones.
|
||||
As there are many questions submitted every day, we will **DISCARD** and close the incomplete ones.
|
||||
|
||||
That will allow us (and others) to focus on helping people like you that follow the whole process. 🤓
|
||||
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
2
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
@@ -35,6 +35,8 @@ body:
|
||||
required: true
|
||||
- label: I am sure that this is a bug in LangChain rather than my code.
|
||||
required: true
|
||||
- label: The bug is not resolved by updating to the latest stable version of LangChain (or the specific integration package).
|
||||
required: true
|
||||
- type: textarea
|
||||
id: reproduction
|
||||
validations:
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/privileged.yml
vendored
2
.github/ISSUE_TEMPLATE/privileged.yml
vendored
@@ -9,7 +9,7 @@ body:
|
||||
If you are not a LangChain maintainer or were not asked directly by a maintainer to create an issue, then please start the conversation in a [Question in GitHub Discussions](https://github.com/langchain-ai/langchain/discussions/categories/q-a) instead.
|
||||
|
||||
You are a LangChain maintainer if you maintain any of the packages inside of the LangChain repository
|
||||
or are a regular contributor to LangChain with previous merged merged pull requests.
|
||||
or are a regular contributor to LangChain with previous merged pull requests.
|
||||
- type: checkboxes
|
||||
id: privileged
|
||||
attributes:
|
||||
|
||||
78
.github/scripts/check_diff.py
vendored
78
.github/scripts/check_diff.py
vendored
@@ -1,17 +1,24 @@
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
from typing import Dict
|
||||
|
||||
LANGCHAIN_DIRS = {
|
||||
LANGCHAIN_DIRS = [
|
||||
"libs/core",
|
||||
"libs/text-splitters",
|
||||
"libs/community",
|
||||
"libs/langchain",
|
||||
"libs/experimental",
|
||||
"libs/community",
|
||||
}
|
||||
]
|
||||
|
||||
if __name__ == "__main__":
|
||||
files = sys.argv[1:]
|
||||
dirs_to_run = set()
|
||||
|
||||
dirs_to_run: Dict[str, set] = {
|
||||
"lint": set(),
|
||||
"test": set(),
|
||||
"extended-test": set(),
|
||||
}
|
||||
|
||||
if len(files) == 300:
|
||||
# max diff length is 300 files - there are likely files missing
|
||||
@@ -24,31 +31,46 @@ if __name__ == "__main__":
|
||||
".github/workflows",
|
||||
".github/tools",
|
||||
".github/actions",
|
||||
"libs/core",
|
||||
".github/scripts/check_diff.py",
|
||||
)
|
||||
):
|
||||
dirs_to_run.update(LANGCHAIN_DIRS)
|
||||
elif "libs/community" in file:
|
||||
dirs_to_run.update(
|
||||
("libs/community", "libs/langchain", "libs/experimental")
|
||||
)
|
||||
elif "libs/partners" in file:
|
||||
partner_dir = file.split("/")[2]
|
||||
if os.path.isdir(f"libs/partners/{partner_dir}"):
|
||||
dirs_to_run.add(f"libs/partners/{partner_dir}")
|
||||
# Skip if the directory was deleted
|
||||
elif "libs/langchain" in file:
|
||||
dirs_to_run.update(("libs/langchain", "libs/experimental"))
|
||||
elif "libs/experimental" in file:
|
||||
dirs_to_run.add("libs/experimental")
|
||||
elif file.startswith("libs/"):
|
||||
dirs_to_run.update(LANGCHAIN_DIRS)
|
||||
else:
|
||||
pass
|
||||
json_output = json.dumps(list(dirs_to_run))
|
||||
print(f"dirs-to-run={json_output}") # noqa: T201
|
||||
# add all LANGCHAIN_DIRS for infra changes
|
||||
dirs_to_run["extended-test"].update(LANGCHAIN_DIRS)
|
||||
dirs_to_run["lint"].add(".")
|
||||
|
||||
extended_test_dirs = [d for d in dirs_to_run if not d.startswith("libs/partners")]
|
||||
json_output_extended = json.dumps(extended_test_dirs)
|
||||
print(f"dirs-to-run-extended={json_output_extended}") # noqa: T201
|
||||
if any(file.startswith(dir_) for dir_ in LANGCHAIN_DIRS):
|
||||
# add that dir and all dirs after in LANGCHAIN_DIRS
|
||||
# for extended testing
|
||||
found = False
|
||||
for dir_ in LANGCHAIN_DIRS:
|
||||
if file.startswith(dir_):
|
||||
found = True
|
||||
if found:
|
||||
dirs_to_run["extended-test"].add(dir_)
|
||||
elif file.startswith("libs/partners"):
|
||||
partner_dir = file.split("/")[2]
|
||||
if os.path.isdir(f"libs/partners/{partner_dir}") and [
|
||||
filename
|
||||
for filename in os.listdir(f"libs/partners/{partner_dir}")
|
||||
if not filename.startswith(".")
|
||||
] != ["README.md"]:
|
||||
dirs_to_run["test"].add(f"libs/partners/{partner_dir}")
|
||||
# Skip if the directory was deleted or is just a tombstone readme
|
||||
elif file.startswith("libs/"):
|
||||
raise ValueError(
|
||||
f"Unknown lib: {file}. check_diff.py likely needs "
|
||||
"an update for this new library!"
|
||||
)
|
||||
elif any(file.startswith(p) for p in ["docs/", "templates/", "cookbook/"]):
|
||||
dirs_to_run["lint"].add(".")
|
||||
|
||||
outputs = {
|
||||
"dirs-to-lint": list(
|
||||
dirs_to_run["lint"] | dirs_to_run["test"] | dirs_to_run["extended-test"]
|
||||
),
|
||||
"dirs-to-test": list(dirs_to_run["test"] | dirs_to_run["extended-test"]),
|
||||
"dirs-to-extended-test": list(dirs_to_run["extended-test"]),
|
||||
}
|
||||
for key, value in outputs.items():
|
||||
json_output = json.dumps(value)
|
||||
print(f"{key}={json_output}") # noqa: T201
|
||||
|
||||
2
.github/scripts/get_min_versions.py
vendored
2
.github/scripts/get_min_versions.py
vendored
@@ -4,7 +4,7 @@ import tomllib
|
||||
from packaging.version import parse as parse_version
|
||||
import re
|
||||
|
||||
MIN_VERSION_LIBS = ["langchain-core", "langchain-community", "langchain"]
|
||||
MIN_VERSION_LIBS = ["langchain-core", "langchain-community", "langchain", "langchain-text-splitters"]
|
||||
|
||||
|
||||
def get_min_version(version: str) -> str:
|
||||
|
||||
4
.github/workflows/_dependencies.yml
vendored
4
.github/workflows/_dependencies.yml
vendored
@@ -63,6 +63,8 @@ jobs:
|
||||
- name: Install the opposite major version of pydantic
|
||||
# If normal tests use pydantic v1, here we'll use v2, and vice versa.
|
||||
shell: bash
|
||||
# airbyte currently doesn't support pydantic v2
|
||||
if: ${{ !startsWith(inputs.working-directory, 'libs/partners/airbyte') }}
|
||||
run: |
|
||||
# Determine the major part of pydantic version
|
||||
REGULAR_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
|
||||
@@ -97,6 +99,8 @@ jobs:
|
||||
fi
|
||||
echo "Found pydantic version ${CURRENT_VERSION}, as expected"
|
||||
- name: Run pydantic compatibility tests
|
||||
# airbyte currently doesn't support pydantic v2
|
||||
if: ${{ !startsWith(inputs.working-directory, 'libs/partners/airbyte') }}
|
||||
shell: bash
|
||||
run: make test
|
||||
|
||||
|
||||
5
.github/workflows/_integration_test.yml
vendored
5
.github/workflows/_integration_test.yml
vendored
@@ -70,6 +70,11 @@ jobs:
|
||||
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
|
||||
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
|
||||
ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }}
|
||||
ES_URL: ${{ secrets.ES_URL }}
|
||||
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
|
||||
ES_API_KEY: ${{ secrets.ES_API_KEY }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # for airbyte
|
||||
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
|
||||
run: |
|
||||
make integration_tests
|
||||
|
||||
|
||||
5
.github/workflows/_release.yml
vendored
5
.github/workflows/_release.yml
vendored
@@ -191,6 +191,11 @@ jobs:
|
||||
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
|
||||
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
|
||||
ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }}
|
||||
ES_URL: ${{ secrets.ES_URL }}
|
||||
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
|
||||
ES_API_KEY: ${{ secrets.ES_API_KEY }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # for airbyte
|
||||
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
|
||||
run: make integration_tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
|
||||
30
.github/workflows/api_doc_build.yml
vendored
30
.github/workflows/api_doc_build.yml
vendored
@@ -15,32 +15,57 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
ref: bagatur/api_docs_build
|
||||
path: langchain
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: langchain-ai/langchain-google
|
||||
path: langchain-google
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: langchain-ai/langchain-datastax
|
||||
path: langchain-datastax
|
||||
|
||||
- name: Set Git config
|
||||
working-directory: langchain
|
||||
run: |
|
||||
git config --local user.email "actions@github.com"
|
||||
git config --local user.name "Github Actions"
|
||||
|
||||
- name: Merge master
|
||||
working-directory: langchain
|
||||
run: |
|
||||
git fetch origin master
|
||||
git merge origin/master -m "Merge master" --allow-unrelated-histories -X theirs
|
||||
|
||||
- name: Move google libs
|
||||
run: |
|
||||
rm -rf \
|
||||
langchain/libs/partners/google-genai \
|
||||
langchain/libs/partners/google-vertexai \
|
||||
langchain/libs/partners/astradb
|
||||
mv langchain-google/libs/genai langchain/libs/partners/google-genai
|
||||
mv langchain-google/libs/vertexai langchain/libs/partners/google-vertexai
|
||||
mv langchain-datastax/libs/astradb langchain/libs/partners/astradb
|
||||
|
||||
- name: Set up Python ${{ env.PYTHON_VERSION }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
uses: "./langchain/.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
cache-key: api-docs
|
||||
working-directory: langchain
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: langchain
|
||||
run: |
|
||||
poetry run python -m pip install --upgrade --no-cache-dir pip setuptools
|
||||
poetry run python -m pip install --upgrade --no-cache-dir sphinx readthedocs-sphinx-ext
|
||||
poetry run python -m pip install ./libs/partners/*
|
||||
# skip airbyte and ibm due to pandas dependency issue
|
||||
poetry run python -m pip install $(ls ./libs/partners | grep -vE "airbyte|ibm" | xargs -I {} echo "./libs/partners/{}")
|
||||
poetry run python -m pip install --exists-action=w --no-cache-dir -r docs/api_reference/requirements.txt
|
||||
|
||||
- name: Build docs
|
||||
working-directory: langchain
|
||||
run: |
|
||||
poetry run python -m pip install --upgrade --no-cache-dir pip setuptools
|
||||
poetry run python docs/api_reference/create_api_rst.py
|
||||
@@ -49,4 +74,5 @@ jobs:
|
||||
# https://github.com/marketplace/actions/add-commit
|
||||
- uses: EndBug/add-and-commit@v9
|
||||
with:
|
||||
cwd: langchain
|
||||
message: 'Update API docs build'
|
||||
|
||||
20
.github/workflows/check_diffs.yml
vendored
20
.github/workflows/check_diffs.yml
vendored
@@ -33,14 +33,16 @@ jobs:
|
||||
run: |
|
||||
python .github/scripts/check_diff.py ${{ steps.files.outputs.all }} >> $GITHUB_OUTPUT
|
||||
outputs:
|
||||
dirs-to-run: ${{ steps.set-matrix.outputs.dirs-to-run }}
|
||||
dirs-to-run-extended: ${{ steps.set-matrix.outputs.dirs-to-run-extended }}
|
||||
dirs-to-lint: ${{ steps.set-matrix.outputs.dirs-to-lint }}
|
||||
dirs-to-test: ${{ steps.set-matrix.outputs.dirs-to-test }}
|
||||
dirs-to-extended-test: ${{ steps.set-matrix.outputs.dirs-to-extended-test }}
|
||||
lint:
|
||||
name: cd ${{ matrix.working-directory }}
|
||||
needs: [ build ]
|
||||
if: ${{ needs.build.outputs.dirs-to-lint != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-run) }}
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-lint) }}
|
||||
uses: ./.github/workflows/_lint.yml
|
||||
with:
|
||||
working-directory: ${{ matrix.working-directory }}
|
||||
@@ -49,9 +51,10 @@ jobs:
|
||||
test:
|
||||
name: cd ${{ matrix.working-directory }}
|
||||
needs: [ build ]
|
||||
if: ${{ needs.build.outputs.dirs-to-test != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-run) }}
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-test) }}
|
||||
uses: ./.github/workflows/_test.yml
|
||||
with:
|
||||
working-directory: ${{ matrix.working-directory }}
|
||||
@@ -60,9 +63,10 @@ jobs:
|
||||
compile-integration-tests:
|
||||
name: cd ${{ matrix.working-directory }}
|
||||
needs: [ build ]
|
||||
if: ${{ needs.build.outputs.dirs-to-test != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-run) }}
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-test) }}
|
||||
uses: ./.github/workflows/_compile_integration_test.yml
|
||||
with:
|
||||
working-directory: ${{ matrix.working-directory }}
|
||||
@@ -71,9 +75,10 @@ jobs:
|
||||
dependencies:
|
||||
name: cd ${{ matrix.working-directory }}
|
||||
needs: [ build ]
|
||||
if: ${{ needs.build.outputs.dirs-to-test != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-run) }}
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-test) }}
|
||||
uses: ./.github/workflows/_dependencies.yml
|
||||
with:
|
||||
working-directory: ${{ matrix.working-directory }}
|
||||
@@ -82,10 +87,11 @@ jobs:
|
||||
extended-tests:
|
||||
name: "cd ${{ matrix.working-directory }} / make extended_tests #${{ matrix.python-version }}"
|
||||
needs: [ build ]
|
||||
if: ${{ needs.build.outputs.dirs-to-extended-test != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
# note different variable for extended test dirs
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-run-extended) }}
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-extended-test) }}
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
|
||||
2
.github/workflows/codespell.yml
vendored
2
.github/workflows/codespell.yml
vendored
@@ -32,6 +32,6 @@ jobs:
|
||||
- name: Codespell
|
||||
uses: codespell-project/actions-codespell@v2
|
||||
with:
|
||||
skip: guide_imports.json,*.ambr
|
||||
skip: guide_imports.json,*.ambr,./cookbook/data/imdb_top_1000.csv,*.lock
|
||||
ignore_words_list: ${{ steps.extract_ignore_words.outputs.ignore_words_list }}
|
||||
exclude_file: libs/community/langchain_community/llms/yuan2.py
|
||||
|
||||
37
.github/workflows/doc_lint.yml
vendored
37
.github/workflows/doc_lint.yml
vendored
@@ -1,37 +0,0 @@
|
||||
---
|
||||
name: CI / cd .
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
paths:
|
||||
- 'docs/**'
|
||||
- 'templates/**'
|
||||
- 'cookbook/**'
|
||||
- '.github/workflows/_lint.yml'
|
||||
- '.github/workflows/doc_lint.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
check:
|
||||
name: Check for "from langchain import x" imports
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Run import check
|
||||
run: |
|
||||
# We should not encourage imports directly from main init file
|
||||
# Expect for hub
|
||||
git grep 'from langchain import' {docs/docs,templates,cookbook} | grep -vE 'from langchain import (hub)' && exit 1 || exit 0
|
||||
|
||||
lint:
|
||||
name: "-"
|
||||
uses:
|
||||
./.github/workflows/_lint.yml
|
||||
with:
|
||||
working-directory: "."
|
||||
secrets: inherit
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -115,13 +115,10 @@ celerybeat.pid
|
||||
# Environments
|
||||
.env
|
||||
.envrc
|
||||
.venv
|
||||
.venvs
|
||||
.venv*
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
|
||||
2
Makefile
2
Makefile
@@ -50,11 +50,13 @@ lint lint_package lint_tests:
|
||||
poetry run ruff docs templates cookbook
|
||||
poetry run ruff format docs templates cookbook --diff
|
||||
poetry run ruff --select I docs templates cookbook
|
||||
git grep 'from langchain import' docs/docs templates cookbook | grep -vE 'from langchain import (hub)' && exit 1 || exit 0
|
||||
|
||||
format format_diff:
|
||||
poetry run ruff format docs templates cookbook
|
||||
poetry run ruff --select I --fix docs templates cookbook
|
||||
|
||||
|
||||
######################
|
||||
# HELP
|
||||
######################
|
||||
|
||||
932
cookbook/Gemma_LangChain.ipynb
Normal file
932
cookbook/Gemma_LangChain.ipynb
Normal file
@@ -0,0 +1,932 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "BYejgj8Zf-LG",
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Getting started with LangChain and Gemma, running locally or in the Cloud"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "2IxjMb9-jIJ8"
|
||||
},
|
||||
"source": [
|
||||
"### Installing dependencies"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"executionInfo": {
|
||||
"elapsed": 9436,
|
||||
"status": "ok",
|
||||
"timestamp": 1708975187360,
|
||||
"user": {
|
||||
"displayName": "",
|
||||
"userId": ""
|
||||
},
|
||||
"user_tz": -60
|
||||
},
|
||||
"id": "XZaTsXfcheTF",
|
||||
"outputId": "eb21d603-d824-46c5-f99f-087fb2f618b1",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install --upgrade langchain langchain-google-vertexai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "IXmAujvC3Kwp"
|
||||
},
|
||||
"source": [
|
||||
"### Running the model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "CI8Elyc5gBQF"
|
||||
},
|
||||
"source": [
|
||||
"Go to the VertexAI Model Garden on Google Cloud [console](https://pantheon.corp.google.com/vertex-ai/publishers/google/model-garden/335), and deploy the desired version of Gemma to VertexAI. It will take a few minutes, and after the endpoint it ready, you need to copy its number."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"id": "gv1j8FrVftsC"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @title Basic parameters\n",
|
||||
"project: str = \"PUT_YOUR_PROJECT_ID_HERE\" # @param {type:\"string\"}\n",
|
||||
"endpoint_id: str = \"PUT_YOUR_ENDPOINT_ID_HERE\" # @param {type:\"string\"}\n",
|
||||
"location: str = \"PUT_YOUR_ENDPOINT_LOCAtION_HERE\" # @param {type:\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"executionInfo": {
|
||||
"elapsed": 3,
|
||||
"status": "ok",
|
||||
"timestamp": 1708975440503,
|
||||
"user": {
|
||||
"displayName": "",
|
||||
"userId": ""
|
||||
},
|
||||
"user_tz": -60
|
||||
},
|
||||
"id": "bhIHsFGYjtFt",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2024-02-27 17:15:10.457149: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
|
||||
"2024-02-27 17:15:10.508925: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
|
||||
"2024-02-27 17:15:10.508957: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
|
||||
"2024-02-27 17:15:10.510289: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
|
||||
"2024-02-27 17:15:10.518898: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
||||
"To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_google_vertexai import (\n",
|
||||
" GemmaChatVertexAIModelGarden,\n",
|
||||
" GemmaVertexAIModelGarden,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"executionInfo": {
|
||||
"elapsed": 351,
|
||||
"status": "ok",
|
||||
"timestamp": 1708975440852,
|
||||
"user": {
|
||||
"displayName": "",
|
||||
"userId": ""
|
||||
},
|
||||
"user_tz": -60
|
||||
},
|
||||
"id": "WJv-UVWwh0lk",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = GemmaVertexAIModelGarden(\n",
|
||||
" endpoint_id=endpoint_id,\n",
|
||||
" project=project,\n",
|
||||
" location=location,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"executionInfo": {
|
||||
"elapsed": 714,
|
||||
"status": "ok",
|
||||
"timestamp": 1708975441564,
|
||||
"user": {
|
||||
"displayName": "",
|
||||
"userId": ""
|
||||
},
|
||||
"user_tz": -60
|
||||
},
|
||||
"id": "6kM7cEFdiN9h",
|
||||
"outputId": "fb420c56-5614-4745-cda8-0ee450a3e539",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Prompt:\n",
|
||||
"What is the meaning of life?\n",
|
||||
"Output:\n",
|
||||
" Who am I? Why do I exist? These are questions I have struggled with\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = llm.invoke(\"What is the meaning of life?\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "zzep9nfmuUcO"
|
||||
},
|
||||
"source": [
|
||||
"We can also use Gemma as a multi-turn chat model:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"executionInfo": {
|
||||
"elapsed": 964,
|
||||
"status": "ok",
|
||||
"timestamp": 1708976298189,
|
||||
"user": {
|
||||
"displayName": "",
|
||||
"userId": ""
|
||||
},
|
||||
"user_tz": -60
|
||||
},
|
||||
"id": "8tPHoM5XiZOl",
|
||||
"outputId": "7b8fb652-9aed-47b0-c096-aa1abfc3a2a9",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content='Prompt:\\n<start_of_turn>user\\nHow much is 2+2?<end_of_turn>\\n<start_of_turn>model\\nOutput:\\n8-years old.<end_of_turn>\\n\\n<start_of'\n",
|
||||
"content='Prompt:\\n<start_of_turn>user\\nHow much is 2+2?<end_of_turn>\\n<start_of_turn>model\\nPrompt:\\n<start_of_turn>user\\nHow much is 2+2?<end_of_turn>\\n<start_of_turn>model\\nOutput:\\n8-years old.<end_of_turn>\\n\\n<start_of<end_of_turn>\\n<start_of_turn>user\\nHow much is 3+3?<end_of_turn>\\n<start_of_turn>model\\nOutput:\\nOutput:\\n3-years old.<end_of_turn>\\n\\n<'\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"\n",
|
||||
"llm = GemmaChatVertexAIModelGarden(\n",
|
||||
" endpoint_id=endpoint_id,\n",
|
||||
" project=project,\n",
|
||||
" location=location,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"message1 = HumanMessage(content=\"How much is 2+2?\")\n",
|
||||
"answer1 = llm.invoke([message1])\n",
|
||||
"print(answer1)\n",
|
||||
"\n",
|
||||
"message2 = HumanMessage(content=\"How much is 3+3?\")\n",
|
||||
"answer2 = llm.invoke([message1, answer1, message2])\n",
|
||||
"\n",
|
||||
"print(answer2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can post-process response to avoid repetitions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content='Output:\\n<<humming>>: 2+2 = 4.\\n<end'\n",
|
||||
"content='Output:\\nOutput:\\n<<humming>>: 3+3 = 6.'\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"answer1 = llm.invoke([message1], parse_response=True)\n",
|
||||
"print(answer1)\n",
|
||||
"\n",
|
||||
"answer2 = llm.invoke([message1, answer1, message2], parse_response=True)\n",
|
||||
"\n",
|
||||
"print(answer2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "VEfjqo7fjARR"
|
||||
},
|
||||
"source": [
|
||||
"## Running Gemma locally from Kaggle"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "gVW8QDzHu7TA"
|
||||
},
|
||||
"source": [
|
||||
"In order to run Gemma locally, you can download it from Kaggle first. In order to do this, you'll need to login into the Kaggle platform, create a API key and download a `kaggle.json` Read more about Kaggle auth [here](https://www.kaggle.com/docs/api)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "S1EsXQ3XvZkQ"
|
||||
},
|
||||
"source": [
|
||||
"### Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"executionInfo": {
|
||||
"elapsed": 335,
|
||||
"status": "ok",
|
||||
"timestamp": 1708976305471,
|
||||
"user": {
|
||||
"displayName": "",
|
||||
"userId": ""
|
||||
},
|
||||
"user_tz": -60
|
||||
},
|
||||
"id": "p8SMwpKRvbef",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/opt/conda/lib/python3.10/pty.py:89: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n",
|
||||
" pid, fd = os.forkpty()\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!mkdir -p ~/.kaggle && cp kaggle.json ~/.kaggle/kaggle.json"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"executionInfo": {
|
||||
"elapsed": 7802,
|
||||
"status": "ok",
|
||||
"timestamp": 1708976363010,
|
||||
"user": {
|
||||
"displayName": "",
|
||||
"userId": ""
|
||||
},
|
||||
"user_tz": -60
|
||||
},
|
||||
"id": "Yr679aePv9Fq",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/opt/conda/lib/python3.10/pty.py:89: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n",
|
||||
" pid, fd = os.forkpty()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
||||
"tensorstore 0.1.54 requires ml-dtypes>=0.3.1, but you have ml-dtypes 0.2.0 which is incompatible.\u001b[0m\u001b[31m\n",
|
||||
"\u001b[0m"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip install keras>=3 keras_nlp"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "E9zn8nYpv3QZ"
|
||||
},
|
||||
"source": [
|
||||
"### Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"executionInfo": {
|
||||
"elapsed": 8536,
|
||||
"status": "ok",
|
||||
"timestamp": 1708976601206,
|
||||
"user": {
|
||||
"displayName": "",
|
||||
"userId": ""
|
||||
},
|
||||
"user_tz": -60
|
||||
},
|
||||
"id": "0LFRmY8TjCkI",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2024-02-27 16:38:40.797559: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
|
||||
"2024-02-27 16:38:40.848444: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
|
||||
"2024-02-27 16:38:40.848478: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
|
||||
"2024-02-27 16:38:40.849728: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
|
||||
"2024-02-27 16:38:40.857936: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
||||
"To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_google_vertexai import GemmaLocalKaggle"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "v-o7oXVavdMQ"
|
||||
},
|
||||
"source": [
|
||||
"You can specify the keras backend (by default it's `tensorflow`, but you can change it be `jax` or `torch`)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"executionInfo": {
|
||||
"elapsed": 9,
|
||||
"status": "ok",
|
||||
"timestamp": 1708976601206,
|
||||
"user": {
|
||||
"displayName": "",
|
||||
"userId": ""
|
||||
},
|
||||
"user_tz": -60
|
||||
},
|
||||
"id": "vvTUH8DNj5SF",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @title Basic parameters\n",
|
||||
"keras_backend: str = \"jax\" # @param {type:\"string\"}\n",
|
||||
"model_name: str = \"gemma_2b_en\" # @param {type:\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"executionInfo": {
|
||||
"elapsed": 40836,
|
||||
"status": "ok",
|
||||
"timestamp": 1708976761257,
|
||||
"user": {
|
||||
"displayName": "",
|
||||
"userId": ""
|
||||
},
|
||||
"user_tz": -60
|
||||
},
|
||||
"id": "YOmrqxo5kHXK",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2024-02-27 16:23:14.661164: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 20549 MB memory: -> device: 0, name: NVIDIA L4, pci bus id: 0000:00:03.0, compute capability: 8.9\n",
|
||||
"normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = GemmaLocalKaggle(model_name=model_name, keras_backend=keras_backend)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"id": "Zu6yPDUgkQtQ",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"W0000 00:00:1709051129.518076 774855 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"What is the meaning of life?\n",
|
||||
"\n",
|
||||
"The question is one of the most important questions in the world.\n",
|
||||
"\n",
|
||||
"It’s the question that has\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = llm.invoke(\"What is the meaning of life?\", max_tokens=30)\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ChatModel"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "MSctpRE4u43N"
|
||||
},
|
||||
"source": [
|
||||
"Same as above, using Gemma locally as a multi-turn chat model. You might need to re-start the notebook and clean your GPU memory in order to avoid OOM errors:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2024-02-27 16:58:22.331067: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
|
||||
"2024-02-27 16:58:22.382948: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
|
||||
"2024-02-27 16:58:22.382978: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
|
||||
"2024-02-27 16:58:22.384312: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
|
||||
"2024-02-27 16:58:22.392767: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
||||
"To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_google_vertexai import GemmaChatLocalKaggle"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @title Basic parameters\n",
|
||||
"keras_backend: str = \"jax\" # @param {type:\"string\"}\n",
|
||||
"model_name: str = \"gemma_2b_en\" # @param {type:\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2024-02-27 16:58:29.001922: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 20549 MB memory: -> device: 0, name: NVIDIA L4, pci bus id: 0000:00:03.0, compute capability: 8.9\n",
|
||||
"normalizer.cc(51) LOG(INFO) precompiled_charsmap is empty. use identity normalization.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = GemmaChatLocalKaggle(model_name=model_name, keras_backend=keras_backend)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"executionInfo": {
|
||||
"elapsed": 3,
|
||||
"status": "aborted",
|
||||
"timestamp": 1708976382957,
|
||||
"user": {
|
||||
"displayName": "",
|
||||
"userId": ""
|
||||
},
|
||||
"user_tz": -60
|
||||
},
|
||||
"id": "JrJmvZqwwLqj"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2024-02-27 16:58:49.848412: I external/local_xla/xla/service/service.cc:168] XLA service 0x55adc0cf2c10 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
||||
"2024-02-27 16:58:49.848458: I external/local_xla/xla/service/service.cc:176] StreamExecutor device (0): NVIDIA L4, Compute Capability 8.9\n",
|
||||
"2024-02-27 16:58:50.116614: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
|
||||
"2024-02-27 16:58:54.389324: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8900\n",
|
||||
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
||||
"I0000 00:00:1709053145.225207 784891 device_compiler.h:186] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
||||
"W0000 00:00:1709053145.284227 784891 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content=\"<start_of_turn>user\\nHi! Who are you?<end_of_turn>\\n<start_of_turn>model\\nI'm a model.\\n Tampoco\\nI'm a model.\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"\n",
|
||||
"message1 = HumanMessage(content=\"Hi! Who are you?\")\n",
|
||||
"answer1 = llm.invoke([message1], max_tokens=30)\n",
|
||||
"print(answer1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content=\"<start_of_turn>user\\nHi! Who are you?<end_of_turn>\\n<start_of_turn>model\\n<start_of_turn>user\\nHi! Who are you?<end_of_turn>\\n<start_of_turn>model\\nI'm a model.\\n Tampoco\\nI'm a model.<end_of_turn>\\n<start_of_turn>user\\nWhat can you help me with?<end_of_turn>\\n<start_of_turn>model\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"message2 = HumanMessage(content=\"What can you help me with?\")\n",
|
||||
"answer2 = llm.invoke([message1, answer1, message2], max_tokens=60)\n",
|
||||
"\n",
|
||||
"print(answer2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can post-process the response if you want to avoid multi-turn statements:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content=\"I'm a model.\\n Tampoco\\nI'm a model.\"\n",
|
||||
"content='I can help you with your modeling.\\n Tampoco\\nI can'\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"answer1 = llm.invoke([message1], max_tokens=30, parse_response=True)\n",
|
||||
"print(answer1)\n",
|
||||
"\n",
|
||||
"answer2 = llm.invoke([message1, answer1, message2], max_tokens=60, parse_response=True)\n",
|
||||
"print(answer2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "EiZnztso7hyF"
|
||||
},
|
||||
"source": [
|
||||
"## Running Gemma locally from HuggingFace"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"id": "qqAqsz5R7nKf",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2024-02-27 17:02:21.832409: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
|
||||
"2024-02-27 17:02:21.883625: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
|
||||
"2024-02-27 17:02:21.883656: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
|
||||
"2024-02-27 17:02:21.884987: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
|
||||
"2024-02-27 17:02:21.893340: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
||||
"To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_google_vertexai import GemmaChatLocalHF, GemmaLocalHF"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "tsyntzI08cOr",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @title Basic parameters\n",
|
||||
"hf_access_token: str = \"PUT_YOUR_TOKEN_HERE\" # @param {type:\"string\"}\n",
|
||||
"model_name: str = \"google/gemma-2b\" # @param {type:\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"id": "JWrqEkOo8sm9",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "a0d6de5542254ed1b6d3ba65465e050e",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = GemmaLocalHF(model_name=\"google/gemma-2b\", hf_access_token=hf_access_token)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"id": "VX96Jf4Y84k-",
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"What is the meaning of life?\n",
|
||||
"\n",
|
||||
"The question is one of the most important questions in the world.\n",
|
||||
"\n",
|
||||
"It’s the question that has been asked by philosophers, theologians, and scientists for centuries.\n",
|
||||
"\n",
|
||||
"And it’s the question that\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"output = llm.invoke(\"What is the meaning of life?\", max_tokens=50)\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Same as above, using Gemma locally as a multi-turn chat model. You might need to re-start the notebook and clean your GPU memory in order to avoid OOM errors:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"id": "9x-jmEBg9Mk1"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "c9a0b8e161d74a6faca83b1be96dee27",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = GemmaChatLocalHF(model_name=model_name, hf_access_token=hf_access_token)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"id": "qv_OSaMm9PVy"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content=\"<start_of_turn>user\\nHi! Who are you?<end_of_turn>\\n<start_of_turn>model\\nI'm a model.\\n<end_of_turn>\\n<start_of_turn>user\\nWhat do you mean\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"\n",
|
||||
"message1 = HumanMessage(content=\"Hi! Who are you?\")\n",
|
||||
"answer1 = llm.invoke([message1], max_tokens=60)\n",
|
||||
"print(answer1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content=\"<start_of_turn>user\\nHi! Who are you?<end_of_turn>\\n<start_of_turn>model\\n<start_of_turn>user\\nHi! Who are you?<end_of_turn>\\n<start_of_turn>model\\nI'm a model.\\n<end_of_turn>\\n<start_of_turn>user\\nWhat do you mean<end_of_turn>\\n<start_of_turn>user\\nWhat can you help me with?<end_of_turn>\\n<start_of_turn>model\\nI can help you with anything.\\n<\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"message2 = HumanMessage(content=\"What can you help me with?\")\n",
|
||||
"answer2 = llm.invoke([message1, answer1, message2], max_tokens=140)\n",
|
||||
"\n",
|
||||
"print(answer2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And the same with posprocessing:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content=\"I'm a model.\\n<end_of_turn>\\n\"\n",
|
||||
"content='I can help you with anything.\\n<end_of_turn>\\n<end_of_turn>\\n'\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"answer1 = llm.invoke([message1], max_tokens=60, parse_response=True)\n",
|
||||
"print(answer1)\n",
|
||||
"\n",
|
||||
"answer2 = llm.invoke([message1, answer1, message2], max_tokens=120, parse_response=True)\n",
|
||||
"print(answer2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"environment": {
|
||||
"kernel": "python3",
|
||||
"name": ".m116",
|
||||
"type": "gcloud",
|
||||
"uri": "gcr.io/deeplearning-platform-release/:m116"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -116,7 +116,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"from unstructured.partition.pdf import partition_pdf\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
747
cookbook/RAPTOR.ipynb
Normal file
747
cookbook/RAPTOR.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -68,7 +68,7 @@
|
||||
"pdf_pages = loader.load()\n",
|
||||
"\n",
|
||||
"# Split\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
"all_splits_pypdf = text_splitter.split_documents(pdf_pages)\n",
|
||||
|
||||
@@ -28,9 +28,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)"
|
||||
]
|
||||
|
||||
200
cookbook/airbyte_github.ipynb
Normal file
200
cookbook/airbyte_github.ipynb
Normal file
@@ -0,0 +1,200 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-airbyte"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"GITHUB_TOKEN = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_airbyte import AirbyteLoader\n",
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"loader = AirbyteLoader(\n",
|
||||
" source=\"source-github\",\n",
|
||||
" stream=\"pull_requests\",\n",
|
||||
" config={\n",
|
||||
" \"credentials\": {\"personal_access_token\": GITHUB_TOKEN},\n",
|
||||
" \"repositories\": [\"langchain-ai/langchain\"],\n",
|
||||
" },\n",
|
||||
" template=PromptTemplate.from_template(\n",
|
||||
" \"\"\"# {title}\n",
|
||||
"by {user[login]}\n",
|
||||
"\n",
|
||||
"{body}\"\"\"\n",
|
||||
" ),\n",
|
||||
" include_metadata=False,\n",
|
||||
")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"# Updated partners/ibm README\n",
|
||||
"by williamdevena\n",
|
||||
"\n",
|
||||
"## PR title\n",
|
||||
"partners: changed the README file for the IBM Watson AI integration in the libs/partners/ibm folder.\n",
|
||||
"\n",
|
||||
"## PR message\n",
|
||||
"Description: Changed the README file of partners/ibm following the docs on https://python.langchain.com/docs/integrations/llms/ibm_watsonx\n",
|
||||
"\n",
|
||||
"The README includes:\n",
|
||||
"\n",
|
||||
"- Brief description\n",
|
||||
"- Installation\n",
|
||||
"- Setting-up instructions (API key, project id, ...)\n",
|
||||
"- Basic usage:\n",
|
||||
" - Loading the model\n",
|
||||
" - Direct inference\n",
|
||||
" - Chain invoking\n",
|
||||
" - Streaming the model output\n",
|
||||
" \n",
|
||||
"Issue: https://github.com/langchain-ai/langchain/issues/17545\n",
|
||||
"\n",
|
||||
"Dependencies: None\n",
|
||||
"\n",
|
||||
"Twitter handle: None\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[-2].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"10283"
|
||||
]
|
||||
},
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tiktoken\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"enc = tiktoken.get_encoding(\"cl100k_base\")\n",
|
||||
"\n",
|
||||
"vectorstore = Chroma.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embedding=OpenAIEmbeddings(\n",
|
||||
" disallowed_special=(enc.special_tokens_set - {\"<|endofprompt|>\"})\n",
|
||||
" ),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = vectorstore.as_retriever()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='# Updated partners/ibm README\\nby williamdevena\\n\\n## PR title\\r\\npartners: changed the README file for the IBM Watson AI integration in the libs/partners/ibm folder.\\r\\n\\r\\n## PR message\\r\\nDescription: Changed the README file of partners/ibm following the docs on https://python.langchain.com/docs/integrations/llms/ibm_watsonx\\r\\n\\r\\nThe README includes:\\r\\n\\r\\n- Brief description\\r\\n- Installation\\r\\n- Setting-up instructions (API key, project id, ...)\\r\\n- Basic usage:\\r\\n - Loading the model\\r\\n - Direct inference\\r\\n - Chain invoking\\r\\n - Streaming the model output\\r\\n \\r\\nIssue: https://github.com/langchain-ai/langchain/issues/17545\\r\\n\\r\\nDependencies: None\\r\\n\\r\\nTwitter handle: None'),\n",
|
||||
" Document(page_content='# Updated partners/ibm README\\nby williamdevena\\n\\n## PR title\\r\\npartners: changed the README file for the IBM Watson AI integration in the `libs/partners/ibm` folder. \\r\\n\\r\\n\\r\\n\\r\\n## PR message\\r\\n- **Description:** Changed the README file of partners/ibm following the docs on https://python.langchain.com/docs/integrations/llms/ibm_watsonx\\r\\n\\r\\n The README includes:\\r\\n - Brief description\\r\\n - Installation\\r\\n - Setting-up instructions (API key, project id, ...)\\r\\n - Basic usage:\\r\\n - Loading the model\\r\\n - Direct inference\\r\\n - Chain invoking\\r\\n - Streaming the model output\\r\\n\\r\\n\\r\\n- **Issue:** #17545\\r\\n- **Dependencies:** None\\r\\n- **Twitter handle:** None'),\n",
|
||||
" Document(page_content='# IBM: added partners package `langchain_ibm`, added llm\\nby MateuszOssGit\\n\\n - **Description:** Added `langchain_ibm` as an langchain partners package of IBM [watsonx.ai](https://www.ibm.com/products/watsonx-ai) LLM provider (`WatsonxLLM`)\\r\\n - **Dependencies:** [ibm-watsonx-ai](https://pypi.org/project/ibm-watsonx-ai/),\\r\\n - **Tag maintainer:** : \\r\\n\\r\\nPlease make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. ✅'),\n",
|
||||
" Document(page_content='# Add WatsonX support\\nby baptistebignaud\\n\\nIt is a connector to use a LLM from WatsonX.\\r\\nIt requires python SDK \"ibm-generative-ai\"\\r\\n\\r\\n(It might not be perfect since it is my first PR on a public repository 😄)')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever.invoke(\"pull requests related to IBM\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -14,9 +14,9 @@
|
||||
"\n",
|
||||
"This notebook shows you how to use LangChain's standard chat features while passing the chat messages back and forth via Apache Kafka.\n",
|
||||
"\n",
|
||||
"This goal is to simulate an architecture where the chat front end and the LLM are running as separate services that need to communicate with one another over an internal nework.\n",
|
||||
"This goal is to simulate an architecture where the chat front end and the LLM are running as separate services that need to communicate with one another over an internal network.\n",
|
||||
"\n",
|
||||
"It's an alternative to typical pattern of requesting a reponse from the model via a REST API (there's more info on why you would want to do this at the end of the notebook)."
|
||||
"It's an alternative to typical pattern of requesting a response from the model via a REST API (there's more info on why you would want to do this at the end of the notebook)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -261,7 +261,7 @@
|
||||
"\n",
|
||||
"Load Llama 2 and set the conversation buffer to 300 tokens using `ConversationTokenBufferMemory`. This value was used for running Llama in a CPU only container, so you can raise it if running in Google Colab. It prevents the container that is hosting the model from running out of memory.\n",
|
||||
"\n",
|
||||
"Here, we're overiding the default system persona so that the chatbot has the personality of Marvin The Paranoid Android from the Hitchhiker's Guide to the Galaxy."
|
||||
"Here, we're overriding the default system persona so that the chatbot has the personality of Marvin The Paranoid Android from the Hitchhiker's Guide to the Galaxy."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -272,7 +272,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load the model with the apporiate parameters:\n",
|
||||
"# Load the model with the appropriate parameters:\n",
|
||||
"llm = LlamaCpp(\n",
|
||||
" model_path=model_path,\n",
|
||||
" max_tokens=250,\n",
|
||||
@@ -551,7 +551,7 @@
|
||||
"\n",
|
||||
" * **Scalability**: Apache Kafka is designed with parallel processing in mind, so many teams prefer to use it to more effectively distribute work to available workers (in this case the \"worker\" is a container running an LLM).\n",
|
||||
"\n",
|
||||
" * **Durability**: Kafka is designed to allow services to pick up where another service left off in the case where that service experienced a memory issue or went offline. This prevents data loss in highly complex, distribuited architectures where multiple systems are communicating with one another (LLMs being just one of many interdependent systems that also include vector databases and traditional databases).\n",
|
||||
" * **Durability**: Kafka is designed to allow services to pick up where another service left off in the case where that service experienced a memory issue or went offline. This prevents data loss in highly complex, distributed architectures where multiple systems are communicating with one another (LLMs being just one of many interdependent systems that also include vector databases and traditional databases).\n",
|
||||
"\n",
|
||||
"For more background on why event streaming is a good fit for Gen AI application architecture, see Kai Waehner's article [\"Apache Kafka + Vector Database + LLM = Real-Time GenAI\"](https://www.kai-waehner.de/blog/2023/11/08/apache-kafka-flink-vector-database-llm-real-time-genai/)."
|
||||
]
|
||||
|
||||
@@ -227,8 +227,8 @@
|
||||
" BaseCombineDocumentsChain,\n",
|
||||
" load_qa_with_sources_chain,\n",
|
||||
")\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain.tools import BaseTool, DuckDuckGoSearchRun\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"from pydantic import Field\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
"source": [
|
||||
"1. Prepare data:\n",
|
||||
" 1. Upload all python project files using the `langchain_community.document_loaders.TextLoader`. We will call these files the **documents**.\n",
|
||||
" 2. Split all documents to chunks using the `langchain.text_splitter.CharacterTextSplitter`.\n",
|
||||
" 2. Split all documents to chunks using the `langchain_text_splitters.CharacterTextSplitter`.\n",
|
||||
" 3. Embed chunks and upload them into the DeepLake using `langchain.embeddings.openai.OpenAIEmbeddings` and `langchain_community.vectorstores.DeepLake`\n",
|
||||
"2. Question-Answering:\n",
|
||||
" 1. Build a chain from `langchain.chat_models.ChatOpenAI` and `langchain.chains.ConversationalRetrievalChain`\n",
|
||||
@@ -621,7 +621,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_documents(docs)\n",
|
||||
|
||||
1001
cookbook/data/imdb_top_1000.csv
Normal file
1001
cookbook/data/imdb_top_1000.csv
Normal file
File diff suppressed because it is too large
Load Diff
@@ -52,12 +52,12 @@
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.text_splitter import (\n",
|
||||
"from langchain_community.vectorstores import DeepLake\n",
|
||||
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import (\n",
|
||||
" CharacterTextSplitter,\n",
|
||||
" RecursiveCharacterTextSplitter,\n",
|
||||
")\n",
|
||||
"from langchain_community.vectorstores import DeepLake\n",
|
||||
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
|
||||
"activeloop_token = getpass.getpass(\"Activeloop Token:\")\n",
|
||||
|
||||
245
cookbook/fireworks_rag.ipynb
Normal file
245
cookbook/fireworks_rag.ipynb
Normal file
@@ -0,0 +1,245 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0fc0309d-4d49-4bb5-bec0-bd92c6fddb28",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fireworks.AI + LangChain + RAG\n",
|
||||
" \n",
|
||||
"[Fireworks AI](https://python.langchain.com/docs/integrations/llms/fireworks) wants to provide the best experience when working with LangChain, and here is an example of Fireworks + LangChain doing RAG\n",
|
||||
"\n",
|
||||
"See [our models page](https://fireworks.ai/models) for the full list of models. We use `accounts/fireworks/models/mixtral-8x7b-instruct` for RAG In this tutorial.\n",
|
||||
"\n",
|
||||
"For the RAG target, we will use the Gemma technical report https://storage.googleapis.com/deepmind-media/gemma/gemma-report.pdf "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "d12fb75a-f707-48d5-82a5-efe2d041813c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||
"Found existing installation: langchain-fireworks 0.0.1\n",
|
||||
"Uninstalling langchain-fireworks-0.0.1:\n",
|
||||
" Successfully uninstalled langchain-fireworks-0.0.1\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||
"Obtaining file:///mnt/disks/data/langchain/libs/partners/fireworks\n",
|
||||
" Installing build dependencies ... \u001b[?25ldone\n",
|
||||
"\u001b[?25h Checking if build backend supports build_editable ... \u001b[?25ldone\n",
|
||||
"\u001b[?25h Getting requirements to build editable ... \u001b[?25ldone\n",
|
||||
"\u001b[?25h Preparing editable metadata (pyproject.toml) ... \u001b[?25ldone\n",
|
||||
"\u001b[?25hRequirement already satisfied: aiohttp<4.0.0,>=3.9.1 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-fireworks==0.0.1) (3.9.3)\n",
|
||||
"Requirement already satisfied: fireworks-ai<0.13.0,>=0.12.0 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-fireworks==0.0.1) (0.12.0)\n",
|
||||
"Requirement already satisfied: langchain-core<0.2,>=0.1 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-fireworks==0.0.1) (0.1.23)\n",
|
||||
"Requirement already satisfied: requests<3,>=2 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-fireworks==0.0.1) (2.31.0)\n",
|
||||
"Requirement already satisfied: aiosignal>=1.1.2 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.9.1->langchain-fireworks==0.0.1) (1.3.1)\n",
|
||||
"Requirement already satisfied: attrs>=17.3.0 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.9.1->langchain-fireworks==0.0.1) (23.1.0)\n",
|
||||
"Requirement already satisfied: frozenlist>=1.1.1 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.9.1->langchain-fireworks==0.0.1) (1.4.0)\n",
|
||||
"Requirement already satisfied: multidict<7.0,>=4.5 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.9.1->langchain-fireworks==0.0.1) (6.0.4)\n",
|
||||
"Requirement already satisfied: yarl<2.0,>=1.0 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.9.1->langchain-fireworks==0.0.1) (1.9.2)\n",
|
||||
"Requirement already satisfied: async-timeout<5.0,>=4.0 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.9.1->langchain-fireworks==0.0.1) (4.0.3)\n",
|
||||
"Requirement already satisfied: httpx in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (0.26.0)\n",
|
||||
"Requirement already satisfied: httpx-sse in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (0.4.0)\n",
|
||||
"Requirement already satisfied: pydantic in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (2.4.2)\n",
|
||||
"Requirement already satisfied: Pillow in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (10.2.0)\n",
|
||||
"Requirement already satisfied: PyYAML>=5.3 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (6.0.1)\n",
|
||||
"Requirement already satisfied: anyio<5,>=3 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (3.7.1)\n",
|
||||
"Requirement already satisfied: jsonpatch<2.0,>=1.33 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (1.33)\n",
|
||||
"Requirement already satisfied: langsmith<0.2.0,>=0.1.0 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (0.1.5)\n",
|
||||
"Requirement already satisfied: packaging<24.0,>=23.2 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (23.2)\n",
|
||||
"Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (8.2.3)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from requests<3,>=2->langchain-fireworks==0.0.1) (3.3.0)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from requests<3,>=2->langchain-fireworks==0.0.1) (3.4)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from requests<3,>=2->langchain-fireworks==0.0.1) (2.0.6)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from requests<3,>=2->langchain-fireworks==0.0.1) (2023.7.22)\n",
|
||||
"Requirement already satisfied: sniffio>=1.1 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from anyio<5,>=3->langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (1.3.0)\n",
|
||||
"Requirement already satisfied: exceptiongroup in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from anyio<5,>=3->langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (1.1.3)\n",
|
||||
"Requirement already satisfied: jsonpointer>=1.9 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (2.4)\n",
|
||||
"Requirement already satisfied: annotated-types>=0.4.0 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from pydantic->fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (0.5.0)\n",
|
||||
"Requirement already satisfied: pydantic-core==2.10.1 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from pydantic->fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (2.10.1)\n",
|
||||
"Requirement already satisfied: typing-extensions>=4.6.1 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from pydantic->fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (4.8.0)\n",
|
||||
"Requirement already satisfied: httpcore==1.* in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from httpx->fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (1.0.2)\n",
|
||||
"Requirement already satisfied: h11<0.15,>=0.13 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from httpcore==1.*->httpx->fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (0.14.0)\n",
|
||||
"Building wheels for collected packages: langchain-fireworks\n",
|
||||
" Building editable for langchain-fireworks (pyproject.toml) ... \u001b[?25ldone\n",
|
||||
"\u001b[?25h Created wheel for langchain-fireworks: filename=langchain_fireworks-0.0.1-py3-none-any.whl size=2228 sha256=564071b120b09ec31f2dc737733448a33bbb26e40b49fcde0c129ad26045259d\n",
|
||||
" Stored in directory: /tmp/pip-ephem-wheel-cache-oz368vdk/wheels/e0/ad/31/d7e76dd73d61905ff7f369f5b0d21a4b5e7af4d3cb7487aece\n",
|
||||
"Successfully built langchain-fireworks\n",
|
||||
"Installing collected packages: langchain-fireworks\n",
|
||||
"Successfully installed langchain-fireworks-0.0.1\n",
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --quiet pypdf chromadb tiktoken openai \n",
|
||||
"%pip uninstall -y langchain-fireworks\n",
|
||||
"%pip install --editable /mnt/disks/data/langchain/libs/partners/fireworks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "cf719376",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<module 'fireworks' from '/mnt/disks/data/langchain/.venv/lib/python3.9/site-packages/fireworks/__init__.py'>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import fireworks\n",
|
||||
"\n",
|
||||
"print(fireworks)\n",
|
||||
"import fireworks.client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9ab49327-0532-4480-804c-d066c302a322",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load\n",
|
||||
"import requests\n",
|
||||
"from langchain_community.document_loaders import PyPDFLoader\n",
|
||||
"\n",
|
||||
"# Download the PDF from a URL and save it to a temporary location\n",
|
||||
"url = \"https://storage.googleapis.com/deepmind-media/gemma/gemma-report.pdf\"\n",
|
||||
"response = requests.get(url, stream=True)\n",
|
||||
"file_name = \"temp_file.pdf\"\n",
|
||||
"with open(file_name, \"wb\") as pdf:\n",
|
||||
" pdf.write(response.content)\n",
|
||||
"\n",
|
||||
"loader = PyPDFLoader(file_name)\n",
|
||||
"data = loader.load()\n",
|
||||
"\n",
|
||||
"# Split\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)\n",
|
||||
"all_splits = text_splitter.split_documents(data)\n",
|
||||
"\n",
|
||||
"# Add to vectorDB\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_fireworks.embeddings import FireworksEmbeddings\n",
|
||||
"\n",
|
||||
"vectorstore = Chroma.from_documents(\n",
|
||||
" documents=all_splits,\n",
|
||||
" collection_name=\"rag-chroma\",\n",
|
||||
" embedding=FireworksEmbeddings(),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"retriever = vectorstore.as_retriever()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "4efaddd9-3dbb-455c-ba54-0ad7f2d2ce0f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.pydantic_v1 import BaseModel\n",
|
||||
"from langchain_core.runnables import RunnableParallel, RunnablePassthrough\n",
|
||||
"\n",
|
||||
"# RAG prompt\n",
|
||||
"template = \"\"\"Answer the question based only on the following context:\n",
|
||||
"{context}\n",
|
||||
"\n",
|
||||
"Question: {question}\n",
|
||||
"\"\"\"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(template)\n",
|
||||
"\n",
|
||||
"# LLM\n",
|
||||
"from langchain_together import Together\n",
|
||||
"\n",
|
||||
"llm = Together(\n",
|
||||
" model=\"mistralai/Mixtral-8x7B-Instruct-v0.1\",\n",
|
||||
" temperature=0.0,\n",
|
||||
" max_tokens=2000,\n",
|
||||
" top_k=1,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# RAG chain\n",
|
||||
"chain = (\n",
|
||||
" RunnableParallel({\"context\": retriever, \"question\": RunnablePassthrough()})\n",
|
||||
" | prompt\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "88b1ee51-1b0f-4ebf-bb32-e50e843f0eeb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\nAnswer: The architectural details of Mixtral are as follows:\\n- Dimension (dim): 4096\\n- Number of layers (n\\\\_layers): 32\\n- Dimension of each head (head\\\\_dim): 128\\n- Hidden dimension (hidden\\\\_dim): 14336\\n- Number of heads (n\\\\_heads): 32\\n- Number of kv heads (n\\\\_kv\\\\_heads): 8\\n- Context length (context\\\\_len): 32768\\n- Vocabulary size (vocab\\\\_size): 32000\\n- Number of experts (num\\\\_experts): 8\\n- Number of top k experts (top\\\\_k\\\\_experts): 2\\n\\nMixtral is based on a transformer architecture and uses the same modifications as described in [18], with the notable exceptions that Mixtral supports a fully dense context length of 32k tokens, and the feedforward block picks from a set of 8 distinct groups of parameters. At every layer, for every token, a router network chooses two of these groups (the “experts”) to process the token and combine their output additively. This technique increases the number of parameters of a model while controlling cost and latency, as the model only uses a fraction of the total set of parameters per token. Mixtral is pretrained with multilingual data using a context size of 32k tokens. It either matches or exceeds the performance of Llama 2 70B and GPT-3.5, over several benchmarks. In particular, Mixtral vastly outperforms Llama 2 70B on mathematics, code generation, and multilingual benchmarks.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke(\"What are the Architectural details of Mixtral?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "755cf871-26b7-4e30-8b91-9ffd698470f4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Trace: \n",
|
||||
"\n",
|
||||
"https://smith.langchain.com/public/935fd642-06a6-4b42-98e3-6074f93115cd/r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -170,8 +170,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"with open(\"../../state_of_the_union.txt\") as f:\n",
|
||||
" state_of_the_union = f.read()\n",
|
||||
|
||||
@@ -50,7 +50,7 @@
|
||||
"\n",
|
||||
"checker_chain = LLMCheckerChain.from_llm(llm, verbose=True)\n",
|
||||
"\n",
|
||||
"checker_chain.run(text)"
|
||||
"checker_chain.invoke(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"llm_math = LLMMathChain.from_llm(llm, verbose=True)\n",
|
||||
"\n",
|
||||
"llm_math.run(\"What is 13 raised to the .3432 power?\")"
|
||||
"llm_math.invoke(\"What is 13 raised to the .3432 power?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -124,7 +124,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = CharacterTextSplitter.from_tiktoken_encoder(\n",
|
||||
" chunk_size=7500, chunk_overlap=100\n",
|
||||
|
||||
@@ -20,10 +20,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
648
cookbook/optimization.ipynb
Normal file
648
cookbook/optimization.ipynb
Normal file
@@ -0,0 +1,648 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c7fe38bc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Optimization\n",
|
||||
"\n",
|
||||
"This notebook goes over how to optimize chains using LangChain and [LangSmith](https://smith.langchain.com)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2f87ccd5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up\n",
|
||||
"\n",
|
||||
"We will set an environment variable for LangSmith, and load the relevant data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "236bedc5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_PROJECT\"] = \"movie-qa\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a3fed0dd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "7cfff337",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pd.read_csv(\"data/imdb_top_1000.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "2d20fb9c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[\"Released_Year\"] = df[\"Released_Year\"].astype(int, errors=\"ignore\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09fc8fe2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the initial retrieval chain\n",
|
||||
"\n",
|
||||
"We will use a self-query retriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "f71e24e2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "8881ea8e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"records = df.to_dict(\"records\")\n",
|
||||
"documents = [Document(page_content=d[\"Overview\"], metadata=d) for d in records]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "8f495423",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vectorstore = Chroma.from_documents(documents, embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "31d33d62",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.query_constructor.base import AttributeInfo\n",
|
||||
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"metadata_field_info = [\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"Released_Year\",\n",
|
||||
" description=\"The year the movie was released\",\n",
|
||||
" type=\"int\",\n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"Series_Title\",\n",
|
||||
" description=\"The title of the movie\",\n",
|
||||
" type=\"str\",\n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"Genre\",\n",
|
||||
" description=\"The genre of the movie\",\n",
|
||||
" type=\"string\",\n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"IMDB_Rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"document_content_description = \"Brief summary of a movie\"\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||
" llm, vectorstore, document_content_description, metadata_field_info, verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "a731533b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.runnables import RunnablePassthrough"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "05181849",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "feed4be6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt = ChatPromptTemplate.from_template(\n",
|
||||
" \"\"\"Answer the user's question based on the below information:\n",
|
||||
"\n",
|
||||
"Information:\n",
|
||||
"\n",
|
||||
"{info}\n",
|
||||
"\n",
|
||||
"Question: {question}\"\"\"\n",
|
||||
")\n",
|
||||
"generator = (prompt | ChatOpenAI() | StrOutputParser()).with_config(\n",
|
||||
" run_name=\"generator\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "eb16cc9a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = (\n",
|
||||
" RunnablePassthrough.assign(info=(lambda x: x[\"question\"]) | retriever) | generator\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c70911cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run examples\n",
|
||||
"\n",
|
||||
"Run examples through the chain. This can either be manually, or using a list of examples, or production traffic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "19a88d13",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'One of the horror movies released in the early 2000s is \"The Ring\" (2002), directed by Gore Verbinski.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke({\"question\": \"what is a horror movie released in early 2000s\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "17f9cdae",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Annotate\n",
|
||||
"\n",
|
||||
"Now, go to LangSmitha and annotate those examples as correct or incorrect"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5e211da6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Dataset\n",
|
||||
"\n",
|
||||
"We can now create a dataset from those runs.\n",
|
||||
"\n",
|
||||
"What we will do is find the runs marked as correct, then grab the sub-chains from them. Specifically, the query generator sub chain and the final generation step"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "e4024267",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langsmith import Client\n",
|
||||
"\n",
|
||||
"client = Client()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "3814efc5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"14"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"runs = list(\n",
|
||||
" client.list_runs(\n",
|
||||
" project_name=\"movie-qa\",\n",
|
||||
" execution_order=1,\n",
|
||||
" filter=\"and(eq(feedback_key, 'correctness'), eq(feedback_score, 1))\",\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"len(runs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "3eb123e0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"gen_runs = []\n",
|
||||
"query_runs = []\n",
|
||||
"for r in runs:\n",
|
||||
" gen_runs.extend(\n",
|
||||
" list(\n",
|
||||
" client.list_runs(\n",
|
||||
" project_name=\"movie-qa\",\n",
|
||||
" filter=\"eq(name, 'generator')\",\n",
|
||||
" trace_id=r.trace_id,\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" query_runs.extend(\n",
|
||||
" list(\n",
|
||||
" client.list_runs(\n",
|
||||
" project_name=\"movie-qa\",\n",
|
||||
" filter=\"eq(name, 'query_constructor')\",\n",
|
||||
" trace_id=r.trace_id,\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "a4397026",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'question': 'what is a high school comedy released in early 2000s'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"runs[0].inputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "3fa6ad2a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output': 'One high school comedy released in the early 2000s is \"Mean Girls\" starring Lindsay Lohan, Rachel McAdams, and Tina Fey.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"runs[0].outputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "1fda5b4b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'query': 'what is a high school comedy released in early 2000s'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query_runs[0].inputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "1a1a51e6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output': {'query': 'high school comedy',\n",
|
||||
" 'filter': {'operator': 'and',\n",
|
||||
" 'arguments': [{'comparator': 'eq', 'attribute': 'Genre', 'value': 'comedy'},\n",
|
||||
" {'operator': 'and',\n",
|
||||
" 'arguments': [{'comparator': 'gte',\n",
|
||||
" 'attribute': 'Released_Year',\n",
|
||||
" 'value': 2000},\n",
|
||||
" {'comparator': 'lt', 'attribute': 'Released_Year', 'value': 2010}]}]}}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query_runs[0].outputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "e9d9966b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'question': 'what is a high school comedy released in early 2000s',\n",
|
||||
" 'info': []}"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"gen_runs[0].inputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "bc113f3d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output': 'One high school comedy released in the early 2000s is \"Mean Girls\" starring Lindsay Lohan, Rachel McAdams, and Tina Fey.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"gen_runs[0].outputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6cca74e5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create datasets\n",
|
||||
"\n",
|
||||
"We can now create datasets for the query generation and final generation step.\n",
|
||||
"We do this so that (1) we can inspect the datapoints, (2) we can edit them if needed, (3) we can add to them over time"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "69966f0e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"client.create_dataset(\"movie-query_constructor\")\n",
|
||||
"\n",
|
||||
"inputs = [r.inputs for r in query_runs]\n",
|
||||
"outputs = [r.outputs for r in query_runs]\n",
|
||||
"\n",
|
||||
"client.create_examples(\n",
|
||||
" inputs=inputs, outputs=outputs, dataset_name=\"movie-query_constructor\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "7e15770e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"client.create_dataset(\"movie-generator\")\n",
|
||||
"\n",
|
||||
"inputs = [r.inputs for r in gen_runs]\n",
|
||||
"outputs = [r.outputs for r in gen_runs]\n",
|
||||
"\n",
|
||||
"client.create_examples(inputs=inputs, outputs=outputs, dataset_name=\"movie-generator\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "61cf9bcd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use as few shot examples\n",
|
||||
"\n",
|
||||
"We can now pull down a dataset and use them as few shot examples in a future chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "d9c79173",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"examples = list(client.list_examples(dataset_name=\"movie-query_constructor\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "a1771dd0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def filter_to_string(_filter):\n",
|
||||
" if \"operator\" in _filter:\n",
|
||||
" args = [filter_to_string(f) for f in _filter[\"arguments\"]]\n",
|
||||
" return f\"{_filter['operator']}({','.join(args)})\"\n",
|
||||
" else:\n",
|
||||
" comparator = _filter[\"comparator\"]\n",
|
||||
" attribute = json.dumps(_filter[\"attribute\"])\n",
|
||||
" value = json.dumps(_filter[\"value\"])\n",
|
||||
" return f\"{comparator}({attribute}, {value})\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "e67a3530",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_examples = []\n",
|
||||
"\n",
|
||||
"for e in examples:\n",
|
||||
" if \"filter\" in e.outputs[\"output\"]:\n",
|
||||
" string_filter = filter_to_string(e.outputs[\"output\"][\"filter\"])\n",
|
||||
" else:\n",
|
||||
" string_filter = \"NO_FILTER\"\n",
|
||||
" model_examples.append(\n",
|
||||
" (\n",
|
||||
" e.inputs[\"query\"],\n",
|
||||
" {\"query\": e.outputs[\"output\"][\"query\"], \"filter\": string_filter},\n",
|
||||
" )\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "84593135",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever1 = SelfQueryRetriever.from_llm(\n",
|
||||
" llm,\n",
|
||||
" vectorstore,\n",
|
||||
" document_content_description,\n",
|
||||
" metadata_field_info,\n",
|
||||
" verbose=True,\n",
|
||||
" chain_kwargs={\"examples\": model_examples},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "4ec9bb92",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain1 = (\n",
|
||||
" RunnablePassthrough.assign(info=(lambda x: x[\"question\"]) | retriever1) | generator\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "64eb88e2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'1. \"Saving Private Ryan\" (1998) - Directed by Steven Spielberg, this war film follows a group of soldiers during World War II as they search for a missing paratrooper.\\n\\n2. \"The Matrix\" (1999) - Directed by the Wachowskis, this science fiction action film follows a computer hacker who discovers the truth about the reality he lives in.\\n\\n3. \"Lethal Weapon 4\" (1998) - Directed by Richard Donner, this action-comedy film follows two mismatched detectives as they investigate a Chinese immigrant smuggling ring.\\n\\n4. \"The Fifth Element\" (1997) - Directed by Luc Besson, this science fiction action film follows a cab driver who must protect a mysterious woman who holds the key to saving the world.\\n\\n5. \"The Rock\" (1996) - Directed by Michael Bay, this action thriller follows a group of rogue military men who take over Alcatraz and threaten to launch missiles at San Francisco.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain1.invoke(\n",
|
||||
" {\"question\": \"what are good action movies made before 2000 but after 1997?\"}\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e1ee8b55",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -59,13 +59,13 @@
|
||||
"from baidubce.auth.bce_credentials import BceCredentials\n",
|
||||
"from baidubce.bce_client_configuration import BceClientConfiguration\n",
|
||||
"from langchain.chains.retrieval_qa import RetrievalQA\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders.baiducloud_bos_directory import (\n",
|
||||
" BaiduBOSDirectoryLoader,\n",
|
||||
")\n",
|
||||
"from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings\n",
|
||||
"from langchain_community.llms.baidu_qianfan_endpoint import QianfanLLMEndpoint\n",
|
||||
"from langchain_community.vectorstores import BESVectorStore"
|
||||
"from langchain_community.vectorstores import BESVectorStore\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -36,9 +36,6 @@
|
||||
"from bs4 import BeautifulSoup as Soup\n",
|
||||
"from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
|
||||
"from langchain.storage import InMemoryByteStore, LocalFileStore\n",
|
||||
"\n",
|
||||
"# For our example, we'll load docs from the web\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter # noqa\n",
|
||||
"from langchain_community.document_loaders.recursive_url_loader import (\n",
|
||||
" RecursiveUrlLoader,\n",
|
||||
")\n",
|
||||
@@ -46,6 +43,9 @@
|
||||
"# noqa\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"\n",
|
||||
"# For our example, we'll load docs from the web\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter # noqa\n",
|
||||
"\n",
|
||||
"DOCSTORE_DIR = \".\"\n",
|
||||
"DOCSTORE_ID_KEY = \"doc_id\""
|
||||
]
|
||||
|
||||
@@ -51,11 +51,11 @@
|
||||
"from langchain.chains.base import Chain\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain.prompts.base import StringPromptTemplate\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.llms import BaseLLM\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_core.agents import AgentAction, AgentFinish\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"from pydantic import BaseModel, Field"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -1083,7 +1083,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.vectorstores import ElasticsearchStore\n",
|
||||
"from langchain_elasticsearch import ElasticsearchStore\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
|
||||
@@ -209,7 +209,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.run({})"
|
||||
"chain.invoke({})"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -39,7 +39,7 @@
|
||||
"data = loader.load()\n",
|
||||
"\n",
|
||||
"# Split\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)\n",
|
||||
"all_splits = text_splitter.split_documents(data)\n",
|
||||
|
||||
@@ -2610,7 +2610,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_documents(docs)"
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
# docker-compose to make it easier to spin up integration tests.
|
||||
# Services should use NON standard ports to avoid collision with
|
||||
# any existing services that might be used for development.
|
||||
# ATTENTION: When adding a service below use a non-standard port
|
||||
# increment by one from the preceding port.
|
||||
# For credentials always use `langchain` and `langchain` for the
|
||||
# username and password.
|
||||
version: "3"
|
||||
name: langchain-tests
|
||||
|
||||
@@ -19,3 +24,34 @@ services:
|
||||
image: graphdb
|
||||
ports:
|
||||
- "6021:7200"
|
||||
mongo:
|
||||
image: mongo:latest
|
||||
container_name: mongo_container
|
||||
ports:
|
||||
- "6022:27017"
|
||||
environment:
|
||||
MONGO_INITDB_ROOT_USERNAME: langchain
|
||||
MONGO_INITDB_ROOT_PASSWORD: langchain
|
||||
postgres:
|
||||
image: postgres:16
|
||||
environment:
|
||||
POSTGRES_DB: langchain
|
||||
POSTGRES_USER: langchain
|
||||
POSTGRES_PASSWORD: langchain
|
||||
ports:
|
||||
- "6023:5432"
|
||||
command: |
|
||||
postgres -c log_statement=all
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD-SHELL",
|
||||
"psql postgresql://langchain:langchain@localhost/langchain --command 'SELECT 1;' || exit 1",
|
||||
]
|
||||
interval: 5s
|
||||
retries: 60
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
|
||||
1
docs/.gitignore
vendored
Normal file
1
docs/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
/.quarto/
|
||||
1
docs/.yarnrc.yml
Normal file
1
docs/.yarnrc.yml
Normal file
@@ -0,0 +1 @@
|
||||
nodeLinker: node-modules
|
||||
@@ -1,4 +1,5 @@
|
||||
"""Configuration file for the Sphinx documentation builder."""
|
||||
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# This file only contains a selection of the most common options. For a full
|
||||
@@ -174,3 +175,6 @@ myst_enable_extensions = ["colon_fence"]
|
||||
|
||||
# generate autosummary even if no references
|
||||
autosummary_generate = True
|
||||
|
||||
html_copy_source = False
|
||||
html_show_sourcelink = False
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import importlib
|
||||
import inspect
|
||||
import os
|
||||
import sys
|
||||
import typing
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
@@ -306,7 +307,14 @@ def _package_namespace(package_name: str) -> str:
|
||||
|
||||
def _package_dir(package_name: str = "langchain") -> Path:
|
||||
"""Return the path to the directory containing the documentation."""
|
||||
if package_name in ("langchain", "experimental", "community", "core", "cli"):
|
||||
if package_name in (
|
||||
"langchain",
|
||||
"experimental",
|
||||
"community",
|
||||
"core",
|
||||
"cli",
|
||||
"text-splitters",
|
||||
):
|
||||
return ROOT_DIR / "libs" / package_name / _package_namespace(package_name)
|
||||
else:
|
||||
return (
|
||||
@@ -344,28 +352,29 @@ def _doc_first_line(package_name: str) -> str:
|
||||
return f".. {package_name.replace('-', '_')}_api_reference:\n\n"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
def main(dirs: Optional[list] = None) -> None:
|
||||
"""Generate the api_reference.rst file for each package."""
|
||||
print("Starting to build API reference files.")
|
||||
for dir in os.listdir(ROOT_DIR / "libs"):
|
||||
if not dirs:
|
||||
dirs = [
|
||||
dir_
|
||||
for dir_ in os.listdir(ROOT_DIR / "libs")
|
||||
if dir_ not in ("cli", "partners")
|
||||
]
|
||||
dirs += os.listdir(ROOT_DIR / "libs" / "partners")
|
||||
for dir_ in dirs:
|
||||
# Skip any hidden directories
|
||||
# Some of these could be present by mistake in the code base
|
||||
# e.g., .pytest_cache from running tests from the wrong location.
|
||||
if dir.startswith("."):
|
||||
print("Skipping dir:", dir)
|
||||
continue
|
||||
|
||||
if dir in ("cli", "partners"):
|
||||
if dir_.startswith("."):
|
||||
print("Skipping dir:", dir_)
|
||||
continue
|
||||
else:
|
||||
print("Building package:", dir)
|
||||
_build_rst_file(package_name=dir)
|
||||
partner_packages = os.listdir(ROOT_DIR / "libs" / "partners")
|
||||
print("Building partner packages:", partner_packages)
|
||||
for dir in partner_packages:
|
||||
_build_rst_file(package_name=dir)
|
||||
print("Building package:", dir_)
|
||||
_build_rst_file(package_name=dir_)
|
||||
print("API reference files built.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
dirs = sys.argv[1:] or None
|
||||
main(dirs=dirs)
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -43,6 +43,9 @@
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('experimental_api_reference') }}">Experimental</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('text_splitters_api_reference') }}">Text splitters</a>
|
||||
</li>
|
||||
{%- for title, pathname in partners %}
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link nav-more-item-mobile-items" href="{{ pathto(pathname) }}">{{ title }}</a>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,155 +1,50 @@
|
||||
# Tutorials
|
||||
|
||||
Below are links to tutorials and courses on LangChain. For written guides on common use cases for LangChain, check out the [use cases guides](/docs/use_cases).
|
||||
## Books and Handbooks
|
||||
|
||||
⛓ icon marks a new addition [last update 2024-02-06]
|
||||
- [Generative AI with LangChain](https://www.amazon.com/Generative-AI-LangChain-language-ChatGPT/dp/1835083463/ref=sr_1_1?crid=1GMOMH0G7GLR&keywords=generative+ai+with+langchain&qid=1703247181&sprefix=%2Caps%2C298&sr=8-1) by [Ben Auffrath](https://www.amazon.com/stores/Ben-Auffarth/author/B08JQKSZ7D?ref=ap_rdr&store_ref=ap_rdr&isDramIntegrated=true&shoppingPortalEnabled=true), ©️ 2023 Packt Publishing
|
||||
- [LangChain AI Handbook](https://www.pinecone.io/learn/langchain/) By **James Briggs** and **Francisco Ingham**
|
||||
- [LangChain Cheatsheet](https://pub.towardsai.net/langchain-cheatsheet-all-secrets-on-a-single-page-8be26b721cde) by **Ivan Reznikov**
|
||||
|
||||
---------------------
|
||||
|
||||
### [LangChain](https://en.wikipedia.org/wiki/LangChain) on Wikipedia
|
||||
|
||||
### Books
|
||||
|
||||
#### [Generative AI with LangChain](https://www.amazon.com/Generative-AI-LangChain-language-ChatGPT/dp/1835083463/ref=sr_1_1?crid=1GMOMH0G7GLR&keywords=generative+ai+with+langchain&qid=1703247181&sprefix=%2Caps%2C298&sr=8-1) by [Ben Auffrath](https://www.amazon.com/stores/Ben-Auffarth/author/B08JQKSZ7D?ref=ap_rdr&store_ref=ap_rdr&isDramIntegrated=true&shoppingPortalEnabled=true), ©️ 2023 Packt Publishing
|
||||
|
||||
|
||||
### DeepLearning.AI courses
|
||||
by [Harrison Chase](https://en.wikipedia.org/wiki/LangChain) and [Andrew Ng](https://en.wikipedia.org/wiki/Andrew_Ng)
|
||||
- [LangChain for LLM Application Development](https://learn.deeplearning.ai/langchain)
|
||||
- [LangChain Chat with Your Data](https://learn.deeplearning.ai/langchain-chat-with-your-data)
|
||||
- [Functions, Tools and Agents with LangChain](https://learn.deeplearning.ai/functions-tools-agents-langchain)
|
||||
|
||||
### Handbook
|
||||
[LangChain AI Handbook](https://www.pinecone.io/learn/langchain/) By **James Briggs** and **Francisco Ingham**
|
||||
|
||||
⛓ [LangChain Cheatsheet](https://pub.towardsai.net/langchain-cheatsheet-all-secrets-on-a-single-page-8be26b721cde) by **Ivan Reznikov**
|
||||
|
||||
### Short Tutorials
|
||||
[LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners](https://youtu.be/aywZrzNaKjs) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
|
||||
|
||||
[LangChain Crash Course: Build an AutoGPT app in 25 minutes](https://youtu.be/MlK6SIjcjE8) by [Nicholas Renotte](https://www.youtube.com/@NicholasRenotte)
|
||||
|
||||
[LangChain Crash Course - Build apps with language models](https://youtu.be/LbT1yp6quS8) by [Patrick Loeber](https://www.youtube.com/@patloeber)
|
||||
|
||||
⛓ [LangChain 101 Course](https://medium.com/@ivanreznikov/langchain-101-course-updated-668f7b41d6cb) by **Ivan Reznikov**
|
||||
|
||||
## Tutorials
|
||||
|
||||
### [LangChain for Gen AI and LLMs](https://www.youtube.com/playlist?list=PLIUOU7oqGTLieV9uTIFMm6_4PXg-hlN6F) by [James Briggs](https://www.youtube.com/@jamesbriggs)
|
||||
- #1 [Getting Started with `GPT-3` vs. Open Source LLMs](https://youtu.be/nE2skSRWTTs)
|
||||
- #2 [Prompt Templates for `GPT 3.5` and other LLMs](https://youtu.be/RflBcK0oDH0)
|
||||
- #3 [LLM Chains using `GPT 3.5` and other LLMs](https://youtu.be/S8j9Tk0lZHU)
|
||||
- [LangChain Data Loaders, Tokenizers, Chunking, and Datasets - Data Prep 101](https://youtu.be/eqOfr4AGLk8)
|
||||
- #4 [Chatbot Memory for `Chat-GPT`, `Davinci` + other LLMs](https://youtu.be/X05uK0TZozM)
|
||||
- #5 [Chat with OpenAI in LangChain](https://youtu.be/CnAgB3A5OlU)
|
||||
- #6 [Fixing LLM Hallucinations with Retrieval Augmentation in LangChain](https://youtu.be/kvdVduIJsc8)
|
||||
- #7 [LangChain Agents Deep Dive with `GPT 3.5`](https://youtu.be/jSP-gSEyVeI)
|
||||
- #8 [Create Custom Tools for Chatbots in LangChain](https://youtu.be/q-HNphrWsDE)
|
||||
- #9 [Build Conversational Agents with Vector DBs](https://youtu.be/H6bCqqw9xyI)
|
||||
- [Using NEW `MPT-7B` in Hugging Face and LangChain](https://youtu.be/DXpk9K7DgMo)
|
||||
- [`MPT-30B` Chatbot with LangChain](https://youtu.be/pnem-EhT6VI)
|
||||
- [Fine-tuning OpenAI's `GPT 3.5` for LangChain Agents](https://youtu.be/boHXgQ5eQic?si=OOOfK-GhsgZGBqSr)
|
||||
- [Chatbots with `RAG`: LangChain Full Walkthrough](https://youtu.be/LhnCsygAvzY?si=N7k6xy4RQksbWwsQ)
|
||||
### [by Greg Kamradt](https://www.youtube.com/playlist?list=PLqZXAkvF1bPNQER9mLmDbntNfSpzdDIU5)
|
||||
### [by Sam Witteveen](https://www.youtube.com/playlist?list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ)
|
||||
### [by James Briggs](https://www.youtube.com/playlist?list=PLIUOU7oqGTLieV9uTIFMm6_4PXg-hlN6F)
|
||||
### [by Prompt Engineering](https://www.youtube.com/playlist?list=PLVEEucA9MYhOu89CX8H3MBZqayTbcCTMr)
|
||||
### [by Mayo Oshin](https://www.youtube.com/@chatwithdata/search?query=langchain)
|
||||
### [by 1 little Coder](https://www.youtube.com/playlist?list=PLpdmBGJ6ELUK-v0MK-t4wZmVEbxM5xk6L)
|
||||
|
||||
|
||||
### [LangChain 101](https://www.youtube.com/playlist?list=PLqZXAkvF1bPNQER9mLmDbntNfSpzdDIU5) by [Greg Kamradt (Data Indy)](https://www.youtube.com/@DataIndependent)
|
||||
- [What Is LangChain? - LangChain + `ChatGPT` Overview](https://youtu.be/_v_fgW2SkkQ)
|
||||
- [Quickstart Guide](https://youtu.be/kYRB-vJFy38)
|
||||
- [Beginner's Guide To 7 Essential Concepts](https://youtu.be/2xxziIWmaSA)
|
||||
- [Beginner's Guide To 9 Use Cases](https://youtu.be/vGP4pQdCocw)
|
||||
- [Agents Overview + Google Searches](https://youtu.be/Jq9Sf68ozk0)
|
||||
- [`OpenAI` + `Wolfram Alpha`](https://youtu.be/UijbzCIJ99g)
|
||||
- [Ask Questions On Your Custom (or Private) Files](https://youtu.be/EnT-ZTrcPrg)
|
||||
- [Connect `Google Drive Files` To `OpenAI`](https://youtu.be/IqqHqDcXLww)
|
||||
- [`YouTube Transcripts` + `OpenAI`](https://youtu.be/pNcQ5XXMgH4)
|
||||
- [Question A 300 Page Book (w/ `OpenAI` + `Pinecone`)](https://youtu.be/h0DHDp1FbmQ)
|
||||
- [Workaround `OpenAI's` Token Limit With Chain Types](https://youtu.be/f9_BWhCI4Zo)
|
||||
- [Build Your Own OpenAI + LangChain Web App in 23 Minutes](https://youtu.be/U_eV8wfMkXU)
|
||||
- [Working With The New `ChatGPT API`](https://youtu.be/e9P7FLi5Zy8)
|
||||
- [OpenAI + LangChain Wrote Me 100 Custom Sales Emails](https://youtu.be/y1pyAQM-3Bo)
|
||||
- [Structured Output From `OpenAI` (Clean Dirty Data)](https://youtu.be/KwAXfey-xQk)
|
||||
- [Connect `OpenAI` To +5,000 Tools (LangChain + `Zapier`)](https://youtu.be/7tNm0yiDigU)
|
||||
- [Use LLMs To Extract Data From Text (Expert Mode)](https://youtu.be/xZzvwR9jdPA)
|
||||
- [Extract Insights From Interview Transcripts Using LLMs](https://youtu.be/shkMOHwJ4SM)
|
||||
- [5 Levels Of LLM Summarizing: Novice to Expert](https://youtu.be/qaPMdcCqtWk)
|
||||
- [Control Tone & Writing Style Of Your LLM Output](https://youtu.be/miBG-a3FuhU)
|
||||
- [Build Your Own `AI Twitter Bot` Using LLMs](https://youtu.be/yLWLDjT01q8)
|
||||
- [ChatGPT made my interview questions for me (`Streamlit` + LangChain)](https://youtu.be/zvoAMx0WKkw)
|
||||
- [Function Calling via ChatGPT API - First Look With LangChain](https://youtu.be/0-zlUy7VUjg)
|
||||
- [Extract Topics From Video/Audio With LLMs (Topic Modeling w/ LangChain)](https://youtu.be/pEkxRQFNAs4)
|
||||
## Courses
|
||||
|
||||
### Featured courses on Deeplearning.AI
|
||||
|
||||
### [LangChain How to and guides](https://www.youtube.com/playlist?list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ) by [Sam Witteveen](https://www.youtube.com/@samwitteveenai)
|
||||
- [LangChain Basics - LLMs & PromptTemplates with Colab](https://youtu.be/J_0qvRt4LNk)
|
||||
- [LangChain Basics - Tools and Chains](https://youtu.be/hI2BY7yl_Ac)
|
||||
- [`ChatGPT API` Announcement & Code Walkthrough with LangChain](https://youtu.be/phHqvLHCwH4)
|
||||
- [Conversations with Memory (explanation & code walkthrough)](https://youtu.be/X550Zbz_ROE)
|
||||
- [Chat with `Flan20B`](https://youtu.be/VW5LBavIfY4)
|
||||
- [Using `Hugging Face Models` locally (code walkthrough)](https://youtu.be/Kn7SX2Mx_Jk)
|
||||
- [`PAL`: Program-aided Language Models with LangChain code](https://youtu.be/dy7-LvDu-3s)
|
||||
- [Building a Summarization System with LangChain and `GPT-3` - Part 1](https://youtu.be/LNq_2s_H01Y)
|
||||
- [Building a Summarization System with LangChain and `GPT-3` - Part 2](https://youtu.be/d-yeHDLgKHw)
|
||||
- [Microsoft's `Visual ChatGPT` using LangChain](https://youtu.be/7YEiEyfPF5U)
|
||||
- [LangChain Agents - Joining Tools and Chains with Decisions](https://youtu.be/ziu87EXZVUE)
|
||||
- [Comparing LLMs with LangChain](https://youtu.be/rFNG0MIEuW0)
|
||||
- [Using `Constitutional AI` in LangChain](https://youtu.be/uoVqNFDwpX4)
|
||||
- [Talking to `Alpaca` with LangChain - Creating an Alpaca Chatbot](https://youtu.be/v6sF8Ed3nTE)
|
||||
- [Talk to your `CSV` & `Excel` with LangChain](https://youtu.be/xQ3mZhw69bc)
|
||||
- [`BabyAGI`: Discover the Power of Task-Driven Autonomous Agents!](https://youtu.be/QBcDLSE2ERA)
|
||||
- [Improve your `BabyAGI` with LangChain](https://youtu.be/DRgPyOXZ-oE)
|
||||
- [Master `PDF` Chat with LangChain - Your essential guide to queries on documents](https://youtu.be/ZzgUqFtxgXI)
|
||||
- [Using LangChain with `DuckDuckGO`, `Wikipedia` & `PythonREPL` Tools](https://youtu.be/KerHlb8nuVc)
|
||||
- [Building Custom Tools and Agents with LangChain (gpt-3.5-turbo)](https://youtu.be/biS8G8x8DdA)
|
||||
- [LangChain Retrieval QA Over Multiple Files with `ChromaDB`](https://youtu.be/3yPBVii7Ct0)
|
||||
- [LangChain Retrieval QA with Instructor Embeddings & `ChromaDB` for PDFs](https://youtu.be/cFCGUjc33aU)
|
||||
- [LangChain + Retrieval Local LLMs for Retrieval QA - No OpenAI!!!](https://youtu.be/9ISVjh8mdlA)
|
||||
- [`Camel` + LangChain for Synthetic Data & Market Research](https://youtu.be/GldMMK6-_-g)
|
||||
- [Information Extraction with LangChain & `Kor`](https://youtu.be/SW1ZdqH0rRQ)
|
||||
- [Converting a LangChain App from OpenAI to OpenSource](https://youtu.be/KUDn7bVyIfc)
|
||||
- [Using LangChain `Output Parsers` to get what you want out of LLMs](https://youtu.be/UVn2NroKQCw)
|
||||
- [Building a LangChain Custom Medical Agent with Memory](https://youtu.be/6UFtRwWnHws)
|
||||
- [Understanding `ReACT` with LangChain](https://youtu.be/Eug2clsLtFs)
|
||||
- [`OpenAI Functions` + LangChain : Building a Multi Tool Agent](https://youtu.be/4KXK6c6TVXQ)
|
||||
- [What can you do with 16K tokens in LangChain?](https://youtu.be/z2aCZBAtWXs)
|
||||
- [Tagging and Extraction - Classification using `OpenAI Functions`](https://youtu.be/a8hMgIcUEnE)
|
||||
- [HOW to Make Conversational Form with LangChain](https://youtu.be/IT93On2LB5k)
|
||||
- [`Claude-2` meets LangChain!](https://youtu.be/Hb_D3p0bK2U?si=j96Kc7oJoeRI5-iC)
|
||||
- [`PaLM 2` Meets LangChain](https://youtu.be/orPwLibLqm4?si=KgJjpEbAD9YBPqT4)
|
||||
- [`LLaMA2` with LangChain - Basics | LangChain TUTORIAL](https://youtu.be/cIRzwSXB4Rc?si=v3Hwxk1m3fksBIHN)
|
||||
- [Serving `LLaMA2` with `Replicate`](https://youtu.be/JIF4nNi26DE?si=dSazFyC4UQmaR-rJ)
|
||||
- [NEW LangChain Expression Language](https://youtu.be/ud7HJ2p3gp0?si=8pJ9O6hGbXrCX5G9)
|
||||
- [Building a RCI Chain for Agents with LangChain Expression Language](https://youtu.be/QaKM5s0TnsY?si=0miEj-o17AHcGfLG)
|
||||
- [How to Run `LLaMA-2-70B` on the `Together AI`](https://youtu.be/Tc2DHfzHeYE?si=Xku3S9dlBxWQukpe)
|
||||
- [`RetrievalQA` with `LLaMA 2 70b` & `Chroma` DB](https://youtu.be/93yueQQnqpM?si=ZMwj-eS_CGLnNMXZ)
|
||||
- [How to use `BGE Embeddings` for LangChain](https://youtu.be/sWRvSG7vL4g?si=85jnvnmTCF9YIWXI)
|
||||
- [How to use Custom Prompts for `RetrievalQA` on `LLaMA-2 7B`](https://youtu.be/PDwUKves9GY?si=sMF99TWU0p4eiK80)
|
||||
- [LangChain for LLM Application Development](https://learn.deeplearning.ai/langchain)
|
||||
- [LangChain Chat with Your Data](https://learn.deeplearning.ai/langchain-chat-with-your-data)
|
||||
- [Functions, Tools and Agents with LangChain](https://learn.deeplearning.ai/functions-tools-agents-langchain)
|
||||
- [Build LLM Apps with LangChain.js](https://learn.deeplearning.ai/courses/build-llm-apps-with-langchain-js)
|
||||
|
||||
### Online courses
|
||||
|
||||
### [LangChain](https://www.youtube.com/playlist?list=PLVEEucA9MYhOu89CX8H3MBZqayTbcCTMr) by [Prompt Engineering](https://www.youtube.com/@engineerprompt)
|
||||
- [LangChain Crash Course — All You Need to Know to Build Powerful Apps with LLMs](https://youtu.be/5-fc4Tlgmro)
|
||||
- [Working with MULTIPLE `PDF` Files in LangChain: `ChatGPT` for your Data](https://youtu.be/s5LhRdh5fu4)
|
||||
- [`ChatGPT` for YOUR OWN `PDF` files with LangChain](https://youtu.be/TLf90ipMzfE)
|
||||
- [Talk to YOUR DATA without OpenAI APIs: LangChain](https://youtu.be/wrD-fZvT6UI)
|
||||
- [LangChain: `PDF` Chat App (GUI) | `ChatGPT` for Your `PDF` FILES](https://youtu.be/RIWbalZ7sTo)
|
||||
- [`LangFlow`: Build Chatbots without Writing Code](https://youtu.be/KJ-ux3hre4s)
|
||||
- [LangChain: Giving Memory to LLMs](https://youtu.be/dxO6pzlgJiY)
|
||||
- [BEST OPEN Alternative to `OPENAI's EMBEDDINGs` for Retrieval QA: LangChain](https://youtu.be/ogEalPMUCSY)
|
||||
- [LangChain: Run Language Models Locally - `Hugging Face Models`](https://youtu.be/Xxxuw4_iCzw)
|
||||
- [Slash API Costs: Mastering Caching for LLM Applications](https://youtu.be/EQOznhaJWR0?si=AXoI7f3-SVFRvQUl)
|
||||
- [Avoid PROMPT INJECTION with `Constitutional AI` - LangChain](https://youtu.be/tyKSkPFHVX8?si=9mgcB5Y1kkotkBGB)
|
||||
- [Udemy](https://www.udemy.com/courses/search/?q=langchain)
|
||||
- [Pluralsight](https://www.pluralsight.com/search?q=langchain)
|
||||
- [Coursera](https://www.coursera.org/search?query=langchain)
|
||||
- [Maven](https://maven.com/courses?query=langchain)
|
||||
- [Udacity](https://www.udacity.com/catalog/all/any-price/any-school/any-skill/any-difficulty/any-duration/any-type/relevance/page-1?searchValue=langchain)
|
||||
- [LinkedIn Learning](https://www.linkedin.com/search/results/learning/?keywords=langchain)
|
||||
- [edX](https://www.edx.org/search?q=langchain)
|
||||
|
||||
## Short Tutorials
|
||||
|
||||
### LangChain by [Chat with data](https://www.youtube.com/@chatwithdata)
|
||||
- [LangChain Beginner's Tutorial for `Typescript`/`Javascript`](https://youtu.be/bH722QgRlhQ)
|
||||
- [`GPT-4` Tutorial: How to Chat With Multiple `PDF` Files (~1000 pages of Tesla's 10-K Annual Reports)](https://youtu.be/Ix9WIZpArm0)
|
||||
- [`GPT-4` & LangChain Tutorial: How to Chat With A 56-Page `PDF` Document (w/`Pinecone`)](https://youtu.be/ih9PBGVVOO4)
|
||||
- [LangChain & `Supabase` Tutorial: How to Build a ChatGPT Chatbot For Your Website](https://youtu.be/R2FMzcsmQY8)
|
||||
- [LangChain Agents: Build Personal Assistants For Your Data (Q&A with Harrison Chase and Mayo Oshin)](https://youtu.be/gVkF8cwfBLI)
|
||||
|
||||
|
||||
### Codebase Analysis
|
||||
- [Codebase Analysis: Langchain Agents](https://carbonated-yacht-2c5.notion.site/Codebase-Analysis-Langchain-Agents-0b0587acd50647ca88aaae7cff5df1f2)
|
||||
- [by Nicholas Renotte](https://youtu.be/MlK6SIjcjE8)
|
||||
- [by Patrick Loeber](https://youtu.be/LbT1yp6quS8)
|
||||
- [by Rabbitmetrics](https://youtu.be/aywZrzNaKjs)
|
||||
- [by Ivan Reznikov](https://medium.com/@ivanreznikov/langchain-101-course-updated-668f7b41d6cb)
|
||||
|
||||
## [Documentation: Use cases](/docs/use_cases)
|
||||
|
||||
---------------------
|
||||
⛓ icon marks a new addition [last update 2024-02-061]
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"cell_type": "raw",
|
||||
"id": "9e45e81c-e16e-4c6c-b6a3-2362e5193827",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -25,53 +25,42 @@
|
||||
"\n",
|
||||
"There are two ways to perform routing:\n",
|
||||
"\n",
|
||||
"1. Using a `RunnableBranch`.\n",
|
||||
"2. Writing custom factory function that takes the input of a previous step and returns a **runnable**. Importantly, this should return a **runnable** and NOT actually execute.\n",
|
||||
"1. Conditionally return runnables from a [`RunnableLambda`](./functions) (recommended)\n",
|
||||
"2. Using a `RunnableBranch`.\n",
|
||||
"\n",
|
||||
"We'll illustrate both methods using a two step sequence where the first step classifies an input question as being about `LangChain`, `Anthropic`, or `Other`, then routes to a corresponding prompt chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f885113d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using a RunnableBranch\n",
|
||||
"\n",
|
||||
"A `RunnableBranch` is initialized with a list of (condition, runnable) pairs and a default runnable. It selects which branch by passing each condition the input it's invoked with. It selects the first condition to evaluate to True, and runs the corresponding runnable to that condition with the input. \n",
|
||||
"\n",
|
||||
"If no provided conditions match, it runs the default runnable.\n",
|
||||
"\n",
|
||||
"Here's an example of what it looks like in action:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "1aa13c1d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain_community.chat_models import ChatAnthropic\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ed84c59a",
|
||||
"id": "c1c6edac",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example Setup\n",
|
||||
"First, let's create a chain that will identify incoming questions as being about `LangChain`, `Anthropic`, or `Other`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "3ec03886",
|
||||
"execution_count": null,
|
||||
"id": "8a8a1967",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Anthropic'"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.chat_models import ChatAnthropic\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"chain = (\n",
|
||||
" PromptTemplate.from_template(\n",
|
||||
" \"\"\"Given the user question below, classify it as either being about `LangChain`, `Anthropic`, or `Other`.\n",
|
||||
@@ -86,33 +75,14 @@
|
||||
" )\n",
|
||||
" | ChatAnthropic()\n",
|
||||
" | StrOutputParser()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "87ae7c1c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Anthropic'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
")\n",
|
||||
"\n",
|
||||
"chain.invoke({\"question\": \"how do I call Anthropic?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8aa0a365",
|
||||
"id": "7655555f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's create three sub chains:"
|
||||
@@ -120,8 +90,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "d479962a",
|
||||
"execution_count": null,
|
||||
"id": "89d7722d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -158,101 +128,12 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "593eab06",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.runnables import RunnableBranch\n",
|
||||
"\n",
|
||||
"branch = RunnableBranch(\n",
|
||||
" (lambda x: \"anthropic\" in x[\"topic\"].lower(), anthropic_chain),\n",
|
||||
" (lambda x: \"langchain\" in x[\"topic\"].lower(), langchain_chain),\n",
|
||||
" general_chain,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "752c732e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"full_chain = {\"topic\": chain, \"question\": lambda x: x[\"question\"]} | branch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "29231bb8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" As Dario Amodei told me, here are some ways to use Anthropic:\\n\\n- Sign up for an account on Anthropic's website to access tools like Claude, Constitutional AI, and Writer. \\n\\n- Use Claude for tasks like email generation, customer service chat, and QA. Claude can understand natural language prompts and provide helpful responses.\\n\\n- Use Constitutional AI if you need an AI assistant that is harmless, honest, and helpful. It is designed to be safe and aligned with human values.\\n\\n- Use Writer to generate natural language content for things like marketing copy, stories, reports, and more. Give it a topic and prompt and it will create high-quality written content.\\n\\n- Check out Anthropic's documentation and blog for tips, tutorials, examples, and announcements about new capabilities as they continue to develop their AI technology.\\n\\n- Follow Anthropic on social media or subscribe to their newsletter to stay up to date on new features and releases.\\n\\n- For most people, the easiest way to leverage Anthropic's technology is through their website - just create an account to get started!\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"full_chain.invoke({\"question\": \"how do I use Anthropic?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "c67d8733",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' As Harrison Chase told me, here is how you use LangChain:\\n\\nLangChain is an AI assistant that can have conversations, answer questions, and generate text. To use LangChain, you simply type or speak your input and LangChain will respond. \\n\\nYou can ask LangChain questions, have discussions, get summaries or explanations about topics, and request it to generate text on a subject. Some examples of interactions:\\n\\n- Ask general knowledge questions and LangChain will try to answer factually. For example \"What is the capital of France?\"\\n\\n- Have conversations on topics by taking turns speaking. You can prompt the start of a conversation by saying something like \"Let\\'s discuss machine learning\"\\n\\n- Ask for summaries or high-level explanations on subjects. For example \"Can you summarize the main themes in Shakespeare\\'s Hamlet?\" \\n\\n- Give creative writing prompts or requests to have LangChain generate text in different styles. For example \"Write a short children\\'s story about a mouse\" or \"Generate a poem in the style of Robert Frost about nature\"\\n\\n- Correct LangChain if it makes an inaccurate statement and provide the right information. This helps train it.\\n\\nThe key is interacting naturally and giving it clear prompts and requests', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"full_chain.invoke({\"question\": \"how do I use LangChain?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "935ad949",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' 2 + 2 = 4', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"full_chain.invoke({\"question\": \"whats 2 + 2\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6d8d042c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using a custom function\n",
|
||||
"## Using a custom function (Recommended)\n",
|
||||
"\n",
|
||||
"You can also use a custom function to route between different outputs. Here's an example:"
|
||||
]
|
||||
@@ -350,13 +231,89 @@
|
||||
"full_chain.invoke({\"question\": \"whats 2 + 2\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5147b827",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using a RunnableBranch\n",
|
||||
"\n",
|
||||
"A `RunnableBranch` is a special type of runnable that allows you to define a set of conditions and runnables to execute based on the input. It does **not** offer anything that you can't achieve in a custom function as described above, so we recommend using a custom function instead.\n",
|
||||
"\n",
|
||||
"A `RunnableBranch` is initialized with a list of (condition, runnable) pairs and a default runnable. It selects which branch by passing each condition the input it's invoked with. It selects the first condition to evaluate to True, and runs the corresponding runnable to that condition with the input. \n",
|
||||
"\n",
|
||||
"If no provided conditions match, it runs the default runnable.\n",
|
||||
"\n",
|
||||
"Here's an example of what it looks like in action:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "46802d04",
|
||||
"id": "2a101418",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" As Dario Amodei told me, here are some ways to use Anthropic:\\n\\n- Sign up for an account on Anthropic's website to access tools like Claude, Constitutional AI, and Writer. \\n\\n- Use Claude for tasks like email generation, customer service chat, and QA. Claude can understand natural language prompts and provide helpful responses.\\n\\n- Use Constitutional AI if you need an AI assistant that is harmless, honest, and helpful. It is designed to be safe and aligned with human values.\\n\\n- Use Writer to generate natural language content for things like marketing copy, stories, reports, and more. Give it a topic and prompt and it will create high-quality written content.\\n\\n- Check out Anthropic's documentation and blog for tips, tutorials, examples, and announcements about new capabilities as they continue to develop their AI technology.\\n\\n- Follow Anthropic on social media or subscribe to their newsletter to stay up to date on new features and releases.\\n\\n- For most people, the easiest way to leverage Anthropic's technology is through their website - just create an account to get started!\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.runnables import RunnableBranch\n",
|
||||
"\n",
|
||||
"branch = RunnableBranch(\n",
|
||||
" (lambda x: \"anthropic\" in x[\"topic\"].lower(), anthropic_chain),\n",
|
||||
" (lambda x: \"langchain\" in x[\"topic\"].lower(), langchain_chain),\n",
|
||||
" general_chain,\n",
|
||||
")\n",
|
||||
"full_chain = {\"topic\": chain, \"question\": lambda x: x[\"question\"]} | branch\n",
|
||||
"full_chain.invoke({\"question\": \"how do I use Anthropic?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8d8caf9b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' As Harrison Chase told me, here is how you use LangChain:\\n\\nLangChain is an AI assistant that can have conversations, answer questions, and generate text. To use LangChain, you simply type or speak your input and LangChain will respond. \\n\\nYou can ask LangChain questions, have discussions, get summaries or explanations about topics, and request it to generate text on a subject. Some examples of interactions:\\n\\n- Ask general knowledge questions and LangChain will try to answer factually. For example \"What is the capital of France?\"\\n\\n- Have conversations on topics by taking turns speaking. You can prompt the start of a conversation by saying something like \"Let\\'s discuss machine learning\"\\n\\n- Ask for summaries or high-level explanations on subjects. For example \"Can you summarize the main themes in Shakespeare\\'s Hamlet?\" \\n\\n- Give creative writing prompts or requests to have LangChain generate text in different styles. For example \"Write a short children\\'s story about a mouse\" or \"Generate a poem in the style of Robert Frost about nature\"\\n\\n- Correct LangChain if it makes an inaccurate statement and provide the right information. This helps train it.\\n\\nThe key is interacting naturally and giving it clear prompts and requests', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"full_chain.invoke({\"question\": \"how do I use LangChain?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "26159af7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' 2 + 2 = 4', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"full_chain.invoke({\"question\": \"whats 2 + 2\"})"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "bb7d49db-04d3-4399-bfe1-09f82bbe6015",
|
||||
"metadata": {},
|
||||
@@ -28,7 +27,7 @@
|
||||
"1. sync `stream` and async `astream`: a **default implementation** of streaming that streams the **final output** from the chain.\n",
|
||||
"2. async `astream_events` and async `astream_log`: these provide a way to stream both **intermediate steps** and **final output** from the chain.\n",
|
||||
"\n",
|
||||
"Let's take a look at both approaches, and try to understand a how to use them. 🥷\n",
|
||||
"Let's take a look at both approaches, and try to understand how to use them. 🥷\n",
|
||||
"\n",
|
||||
"## Using Stream\n",
|
||||
"\n",
|
||||
@@ -48,7 +47,25 @@
|
||||
"\n",
|
||||
"Large language models can take **several seconds** to generate a complete response to a query. This is far slower than the **~200-300 ms** threshold at which an application feels responsive to an end user.\n",
|
||||
"\n",
|
||||
"The key strategy to make the application feel more responsive is to show intermediate progress; e.g., to stream the output from the model **token by token**."
|
||||
"The key strategy to make the application feel more responsive is to show intermediate progress; viz., to stream the output from the model **token by token**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9eb73e8b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will show examples of streaming using the chat model from [Anthropic](https://python.langchain.com/docs/integrations/platforms/anthropic). To use the model, you will need to install the `langchain-anthropic` package. You can do this with the following command:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cd351cf4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pip install -qU langchain-anthropic"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -68,7 +85,7 @@
|
||||
"source": [
|
||||
"# Showing the example using anthropic, but you can use\n",
|
||||
"# your favorite chat model!\n",
|
||||
"from langchain_community.chat_models import ChatAnthropic\n",
|
||||
"from langchain_anthropic import ChatAnthropic\n",
|
||||
"\n",
|
||||
"model = ChatAnthropic()\n",
|
||||
"\n",
|
||||
@@ -152,7 +169,7 @@
|
||||
"We will use `StrOutputParser` to parse the output from the model. This is a simple parser that extracts the `content` field from an `AIMessageChunk`, giving us the `token` returned by the model.\n",
|
||||
"\n",
|
||||
":::{.callout-tip}\n",
|
||||
"LCEL is a *declarative* way to specify a \"program\" by chainining together different LangChain primitives. Chains created using LCEL benefit from an automatic implementation of `stream`, and `astream` allowing streaming of the final output. In fact, chains created with LCEL implement the entire standard Runnable interface.\n",
|
||||
"LCEL is a *declarative* way to specify a \"program\" by chainining together different LangChain primitives. Chains created using LCEL benefit from an automatic implementation of `stream` and `astream` allowing streaming of the final output. In fact, chains created with LCEL implement the entire standard Runnable interface.\n",
|
||||
":::"
|
||||
]
|
||||
},
|
||||
@@ -330,7 +347,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "cab6dca2-2027-414d-a196-2db6e3ebb8a5",
|
||||
"metadata": {},
|
||||
@@ -464,12 +480,12 @@
|
||||
"id": "6fd3e71b-439e-418f-8a8a-5232fba3d9fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Stream just yielded the final result from that component. \n",
|
||||
"Stream just yielded the final result from that component.\n",
|
||||
"\n",
|
||||
"This is OK 🥹! Not all components have to implement streaming -- in some cases streaming is either unnecessary, difficult or just doesn't make sense.\n",
|
||||
"\n",
|
||||
":::{.callout-tip}\n",
|
||||
"An LCEL chain constructed using using non-streaming components, will still be able to stream in a lot of cases, with streaming of partial output starting after the last non-streaming step in the chain.\n",
|
||||
"An LCEL chain constructed using non-streaming components, will still be able to stream in a lot of cases, with streaming of partial output starting after the last non-streaming step in the chain.\n",
|
||||
":::"
|
||||
]
|
||||
},
|
||||
@@ -1397,7 +1413,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -40,26 +40,7 @@
|
||||
"id": "b99b47ec",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-core langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dbeac2b8-c441-4d8d-b313-1de0ab9c7e51",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(\"Tell me a short joke about {topic}\")\n",
|
||||
"model = ChatOpenAI(model=\"gpt-3.5-turbo\")\n",
|
||||
"output_parser = StrOutputParser()\n",
|
||||
"\n",
|
||||
"chain = prompt | model | output_parser"
|
||||
"%pip install --upgrade --quiet langchain-core langchain-openai langchain-anthropic"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -127,6 +108,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -476,7 +460,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models import ChatAnthropic\n",
|
||||
"from langchain_anthropic import ChatAnthropic\n",
|
||||
"\n",
|
||||
"anthropic = ChatAnthropic(model=\"claude-2\")\n",
|
||||
"anthropic_chain = (\n",
|
||||
@@ -1010,7 +994,7 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_community.chat_models import ChatAnthropic\n",
|
||||
"from langchain_anthropic import ChatAnthropic\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
@@ -1070,9 +1054,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "poetry-venv",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "poetry-venv"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -14,7 +14,16 @@ This framework consists of several parts.
|
||||
- **[LangServe](/docs/langserve)**: A library for deploying LangChain chains as a REST API.
|
||||
- **[LangSmith](/docs/langsmith)**: A developer platform that lets you debug, test, evaluate, and monitor chains built on any LLM framework and seamlessly integrates with LangChain.
|
||||
|
||||

|
||||
import ThemedImage from '@theme/ThemedImage';
|
||||
|
||||
<ThemedImage
|
||||
alt="Diagram outlining the hierarchical organization of the LangChain framework, displaying the interconnected parts across multiple layers."
|
||||
sources={{
|
||||
light: '/svg/langchain_stack.svg',
|
||||
dark: '/svg/langchain_stack_dark.svg',
|
||||
}}
|
||||
title="LangChain Framework Overview"
|
||||
/>
|
||||
|
||||
Together, these products simplify the entire application lifecycle:
|
||||
- **Develop**: Write your applications in LangChain/LangChain.js. Hit the ground running using Templates for reference.
|
||||
|
||||
@@ -65,7 +65,7 @@ We will link to relevant docs.
|
||||
|
||||
## LLM Chain
|
||||
|
||||
For this getting started guide, we will provide two options: using OpenAI (a popular model available via API) or using a local open source model.
|
||||
We'll show how to use models available via API, like OpenAI, and local open source models, using integrations like Ollama.
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="openai" label="OpenAI" default>
|
||||
@@ -99,7 +99,7 @@ llm = ChatOpenAI(openai_api_key="...")
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="local" label="Local">
|
||||
<TabItem value="local" label="Local (using Ollama)">
|
||||
|
||||
[Ollama](https://ollama.ai/) allows you to run open-source large language models, such as Llama 2, locally.
|
||||
|
||||
@@ -112,6 +112,66 @@ Then, make sure the Ollama server is running. After that, you can do:
|
||||
```python
|
||||
from langchain_community.llms import Ollama
|
||||
llm = Ollama(model="llama2")
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="anthropic" label="Anthropic">
|
||||
|
||||
First we'll need to import the LangChain x Anthropic package.
|
||||
|
||||
```shell
|
||||
pip install langchain-anthropic
|
||||
```
|
||||
|
||||
Accessing the API requires an API key, which you can get by creating an account [here](https://claude.ai/login). Once we have a key we'll want to set it as an environment variable by running:
|
||||
|
||||
```shell
|
||||
export ANTHROPIC_API_KEY="..."
|
||||
```
|
||||
|
||||
We can then initialize the model:
|
||||
|
||||
```python
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
|
||||
llm = ChatAnthropic(model="claude-3-sonnet-20240229", temperature=0.2, max_tokens=1024)
|
||||
```
|
||||
|
||||
If you'd prefer not to set an environment variable you can pass the key in directly via the `anthropic_api_key` named parameter when initiating the Anthropic Chat Model class:
|
||||
|
||||
```python
|
||||
llm = ChatAnthropic(anthropic_api_key="...")
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="cohere" label="Cohere">
|
||||
|
||||
First we'll need to import the Cohere SDK package.
|
||||
|
||||
```shell
|
||||
pip install cohere
|
||||
```
|
||||
|
||||
Accessing the API requires an API key, which you can get by creating an account and heading [here](https://dashboard.cohere.com/api-keys). Once we have a key we'll want to set it as an environment variable by running:
|
||||
|
||||
```shell
|
||||
export COHERE_API_KEY="..."
|
||||
```
|
||||
|
||||
We can then initialize the model:
|
||||
|
||||
```python
|
||||
from langchain_community.chat_models import ChatCohere
|
||||
|
||||
llm = ChatCohere()
|
||||
```
|
||||
|
||||
If you'd prefer not to set an environment variable you can pass the key in directly via the `cohere_api_key` named parameter when initiating the Cohere LLM class:
|
||||
|
||||
```python
|
||||
from langchain_community.chat_models import ChatCohere
|
||||
|
||||
llm = ChatCohere(cohere_api_key="...")
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
@@ -193,17 +253,17 @@ After that, we can import and use WebBaseLoader.
|
||||
|
||||
```python
|
||||
from langchain_community.document_loaders import WebBaseLoader
|
||||
loader = WebBaseLoader("https://docs.smith.langchain.com")
|
||||
loader = WebBaseLoader("https://docs.smith.langchain.com/user_guide")
|
||||
|
||||
docs = loader.load()
|
||||
```
|
||||
|
||||
Next, we need to index it into a vectorstore. This requires a few components, namely an [embedding model](/docs/modules/data_connection/text_embedding) and a [vectorstore](/docs/modules/data_connection/vectorstores).
|
||||
|
||||
For embedding models, we once again provide examples for accessing via OpenAI or via local models.
|
||||
For embedding models, we once again provide examples for accessing via API or by running local models.
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="openai" label="OpenAI" default>
|
||||
<TabItem value="openai" label="OpenAI (API)" default>
|
||||
|
||||
Make sure you have the `langchain_openai` package installed an the appropriate environment variables set (these are the same as needed for the LLM).
|
||||
|
||||
@@ -214,7 +274,7 @@ embeddings = OpenAIEmbeddings()
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="local" label="Local">
|
||||
<TabItem value="local" label="Local (using Ollama)">
|
||||
|
||||
Make sure you have Ollama running (same set up as with the LLM).
|
||||
|
||||
@@ -224,6 +284,17 @@ from langchain_community.embeddings import OllamaEmbeddings
|
||||
embeddings = OllamaEmbeddings()
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="cohere" label="Cohere (API)" default>
|
||||
|
||||
Make sure you have the `cohere` package installed an the appropriate environment variables set (these are the same as needed for the LLM).
|
||||
|
||||
```python
|
||||
from langchain_community.embeddings import CohereEmbeddings
|
||||
|
||||
embeddings = CohereEmbeddings()
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
Now, we can use this embedding model to ingest documents into a vectorstore.
|
||||
@@ -239,7 +310,7 @@ Then we can build our index:
|
||||
|
||||
```python
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
|
||||
|
||||
text_splitter = RecursiveCharacterTextSplitter()
|
||||
@@ -489,7 +560,7 @@ from langchain_openai import ChatOpenAI
|
||||
from langchain_community.document_loaders import WebBaseLoader
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
from langchain.tools.retriever import create_retriever_tool
|
||||
from langchain_community.tools.tavily_search import TavilySearchResults
|
||||
from langchain_openai import ChatOpenAI
|
||||
@@ -501,7 +572,7 @@ from langchain_core.messages import BaseMessage
|
||||
from langserve import add_routes
|
||||
|
||||
# 1. Load Retriever
|
||||
loader = WebBaseLoader("https://docs.smith.langchain.com/overview")
|
||||
loader = WebBaseLoader("https://docs.smith.langchain.com/user_guide")
|
||||
docs = loader.load()
|
||||
text_splitter = RecursiveCharacterTextSplitter()
|
||||
documents = text_splitter.split_documents(docs)
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"\n",
|
||||
"## Use case\n",
|
||||
"\n",
|
||||
"The popularity of projects like [PrivateGPT](https://github.com/imartinez/privateGPT), [llama.cpp](https://github.com/ggerganov/llama.cpp), and [GPT4All](https://github.com/nomic-ai/gpt4all) underscore the demand to run LLMs locally (on your own device).\n",
|
||||
"The popularity of projects like [PrivateGPT](https://github.com/imartinez/privateGPT), [llama.cpp](https://github.com/ggerganov/llama.cpp), [GPT4All](https://github.com/nomic-ai/gpt4all), and [llamafile](https://github.com/Mozilla-Ocho/llamafile) underscore the demand to run LLMs locally (on your own device).\n",
|
||||
"\n",
|
||||
"This has at least two important benefits:\n",
|
||||
"\n",
|
||||
@@ -46,7 +46,8 @@
|
||||
"\n",
|
||||
"1. [`llama.cpp`](https://github.com/ggerganov/llama.cpp): C++ implementation of llama inference code with [weight optimization / quantization](https://finbarr.ca/how-is-llama-cpp-possible/)\n",
|
||||
"2. [`gpt4all`](https://docs.gpt4all.io/index.html): Optimized C backend for inference\n",
|
||||
"3. [`Ollama`](https://ollama.ai/): Bundles model weights and environment into an app that runs on device and serves the LLM \n",
|
||||
"3. [`Ollama`](https://ollama.ai/): Bundles model weights and environment into an app that runs on device and serves the LLM\n",
|
||||
"4. [`llamafile`](https://github.com/Mozilla-Ocho/llamafile): Bundles model weights and everything needed to run the model in a single file, allowing you to run the LLM locally from this file without any additional installation steps\n",
|
||||
"\n",
|
||||
"In general, these frameworks will do a few things:\n",
|
||||
"\n",
|
||||
@@ -157,7 +158,7 @@
|
||||
"\n",
|
||||
"### Running Apple silicon GPU\n",
|
||||
"\n",
|
||||
"`Ollama` will automatically utilize the GPU on Apple devices.\n",
|
||||
"`Ollama` and [`llamafile`](https://github.com/Mozilla-Ocho/llamafile?tab=readme-ov-file#gpu-support) will automatically utilize the GPU on Apple devices.\n",
|
||||
" \n",
|
||||
"Other frameworks require the user to set up the environment to utilize the Apple GPU.\n",
|
||||
"\n",
|
||||
@@ -191,7 +192,7 @@
|
||||
"\n",
|
||||
"There are various ways to gain access to quantized model weights.\n",
|
||||
"\n",
|
||||
"1. [`HuggingFace`](https://huggingface.co/TheBloke) - Many quantized model are available for download and can be run with framework such as [`llama.cpp`](https://github.com/ggerganov/llama.cpp)\n",
|
||||
"1. [`HuggingFace`](https://huggingface.co/TheBloke) - Many quantized model are available for download and can be run with framework such as [`llama.cpp`](https://github.com/ggerganov/llama.cpp). You can also download models in [`llamafile` format](https://huggingface.co/models?other=llamafile) from HuggingFace.\n",
|
||||
"2. [`gpt4all`](https://gpt4all.io/index.html) - The model explorer offers a leaderboard of metrics and associated quantized models available for download \n",
|
||||
"3. [`Ollama`](https://github.com/jmorganca/ollama) - Several models can be accessed directly via `pull`\n",
|
||||
"\n",
|
||||
@@ -428,6 +429,62 @@
|
||||
"llm(\"The first man on the moon was ... Let's think step by step\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "056854e2-5e4b-4a03-be7e-03192e5c4e1e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### llamafile\n",
|
||||
"\n",
|
||||
"One of the simplest ways to run an LLM locally is using a [llamafile](https://github.com/Mozilla-Ocho/llamafile). All you need to do is:\n",
|
||||
"\n",
|
||||
"1) Download a llamafile from [HuggingFace](https://huggingface.co/models?other=llamafile)\n",
|
||||
"2) Make the file executable\n",
|
||||
"3) Run the file\n",
|
||||
"\n",
|
||||
"llamafiles bundle model weights and a [specially-compiled](https://github.com/Mozilla-Ocho/llamafile?tab=readme-ov-file#technical-details) version of [`llama.cpp`](https://github.com/ggerganov/llama.cpp) into a single file that can run on most computers any additional dependencies. They also come with an embedded inference server that provides an [API](https://github.com/Mozilla-Ocho/llamafile/blob/main/llama.cpp/server/README.md#api-endpoints) for interacting with your model. \n",
|
||||
"\n",
|
||||
"Here's a simple bash script that shows all 3 setup steps:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"# Download a llamafile from HuggingFace\n",
|
||||
"wget https://huggingface.co/jartine/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile\n",
|
||||
"\n",
|
||||
"# Make the file executable. On Windows, instead just rename the file to end in \".exe\".\n",
|
||||
"chmod +x TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile\n",
|
||||
"\n",
|
||||
"# Start the model server. Listens at http://localhost:8080 by default.\n",
|
||||
"./TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile --server --nobrowser\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"After you run the above setup steps, you can use LangChain to interact with your model:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "002e655c-ba18-4db3-ac7b-f33e825d14b6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"\\nFirstly, let's imagine the scene where Neil Armstrong stepped onto the moon. This happened in 1969. The first man on the moon was Neil Armstrong. We already know that.\\n2nd, let's take a step back. Neil Armstrong didn't have any special powers. He had to land his spacecraft safely on the moon without injuring anyone or causing any damage. If he failed to do this, he would have been killed along with all those people who were on board the spacecraft.\\n3rd, let's imagine that Neil Armstrong successfully landed his spacecraft on the moon and made it back to Earth safely. The next step was for him to be hailed as a hero by his people back home. It took years before Neil Armstrong became an American hero.\\n4th, let's take another step back. Let's imagine that Neil Armstrong wasn't hailed as a hero, and instead, he was just forgotten. This happened in the 1970s. Neil Armstrong wasn't recognized for his remarkable achievement on the moon until after he died.\\n5th, let's take another step back. Let's imagine that Neil Armstrong didn't die in the 1970s and instead, lived to be a hundred years old. This happened in 2036. In the year 2036, Neil Armstrong would have been a centenarian.\\nNow, let's think about the present. Neil Armstrong is still alive. He turned 95 years old on July 20th, 2018. If he were to die now, his achievement of becoming the first human being to set foot on the moon would remain an unforgettable moment in history.\\nI hope this helps you understand the significance and importance of Neil Armstrong's achievement on the moon!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.llms.llamafile import Llamafile\n",
|
||||
"\n",
|
||||
"llm = Llamafile()\n",
|
||||
"\n",
|
||||
"llm.invoke(\"The first man on the moon was ... Let's think step by step.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6b84e543",
|
||||
@@ -611,7 +668,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.11.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -643,9 +643,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"# 2. Load the data: In our case data's already loaded\n",
|
||||
"# 3. Anonymize the data before indexing\n",
|
||||
|
||||
391
docs/docs/guides/structured_output.ipynb
Normal file
391
docs/docs/guides/structured_output.ipynb
Normal file
@@ -0,0 +1,391 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6e3f0f72",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# [beta] Structured Output\n",
|
||||
"\n",
|
||||
"It is often crucial to have LLMs return structured output. This is because oftentimes the outputs of the LLMs are used in downstream applications, where specific arguments are required. Having the LLM return structured output reliably is necessary for that.\n",
|
||||
"\n",
|
||||
"There are a few different high level strategies that are used to do this:\n",
|
||||
"\n",
|
||||
"- Prompting: This is when you ask the LLM (very nicely) to return output in the desired format (JSON, XML). This is nice because it works with all LLMs. It is not nice because there is no guarantee that the LLM returns the output in the right format.\n",
|
||||
"- Function calling: This is when the LLM is fine-tuned to be able to not just generate a completion, but also generate a function call. The functions the LLM can call are generally passed as extra parameters to the model API. The function names and descriptions should be treated as part of the prompt (they usually count against token counts, and are used by the LLM to decide what to do).\n",
|
||||
"- Tool calling: A technique similar to function calling, but it allows the LLM to call multiple functions at the same time.\n",
|
||||
"- JSON mode: This is when the LLM is guaranteed to return JSON.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Different models may support different variants of these, with slightly different parameters. In order to make it easy to get LLMs to return structured output, we have added a common interface to LangChain models: `.with_structured_output`. \n",
|
||||
"\n",
|
||||
"By invoking this method (and passing in a JSON schema or a Pydantic model) the model will add whatever model parameters + output parsers are necessary to get back the structured output. There may be more than one way to do this (e.g., function calling vs JSON mode) - you can configure which method to use by passing into that method.\n",
|
||||
"\n",
|
||||
"Let's look at some examples of this in action!\n",
|
||||
"\n",
|
||||
"We will use Pydantic to easily structure the response schema."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "08029f4e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "070bf702",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class Joke(BaseModel):\n",
|
||||
" setup: str = Field(description=\"The setup of the joke\")\n",
|
||||
" punchline: str = Field(description=\"The punchline to the joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98f6edfa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## OpenAI\n",
|
||||
"\n",
|
||||
"OpenAI exposes a few different ways to get structured outputs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "3fe7caf0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "deddb6d3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Function Calling\n",
|
||||
"\n",
|
||||
"By default, we will use `function_calling`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "6700994a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = ChatOpenAI()\n",
|
||||
"model_with_structure = model.with_structured_output(Joke)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "c55a61b8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Joke(setup='Why was the cat sitting on the computer?', punchline='It wanted to keep an eye on the mouse!')"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_with_structure.invoke(\"Tell me a joke about cats\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "39d7a555",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### JSON Mode\n",
|
||||
"\n",
|
||||
"We also support JSON mode. Note that we need to specify in the prompt the format that it should respond in."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "df0370e3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_with_structure = model.with_structured_output(Joke, method=\"json_mode\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "23844a26",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Joke(setup=\"Why don't cats play poker in the jungle?\", punchline='Too many cheetahs!')"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_with_structure.invoke(\n",
|
||||
" \"Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8f3cce9e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fireworks\n",
|
||||
"\n",
|
||||
"[Fireworks](https://fireworks.ai/) similarly supports function calling and JSON mode for select models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "ad45fdd8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_fireworks import ChatFireworks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36270ed5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Function Calling\n",
|
||||
"\n",
|
||||
"By default, we will use `function_calling`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "49a20847",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = ChatFireworks(model=\"accounts/fireworks/models/firefunction-v1\")\n",
|
||||
"model_with_structure = model.with_structured_output(Joke)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "e3093a6c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Joke(setup=\"Why don't cats play poker in the jungle?\", punchline='Too many cheetahs!')"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_with_structure.invoke(\"Tell me a joke about cats\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ddb6b3ba",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### JSON Mode\n",
|
||||
"\n",
|
||||
"We also support JSON mode. Note that we need to specify in the prompt the format that it should respond in."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "ea0c22c1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_with_structure = model.with_structured_output(Joke, method=\"json_mode\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "649f9632",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Joke(setup='Why did the dog sit in the shade?', punchline='To avoid getting burned.')"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_with_structure.invoke(\n",
|
||||
" \"Tell me a joke about dogs, respond in JSON with `setup` and `punchline` keys\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ff70609a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Mistral\n",
|
||||
"\n",
|
||||
"We also support structured output with Mistral models, although we only support function calling."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "bffd3fad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_mistralai import ChatMistralAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "c8bd7549",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = ChatMistralAI(model=\"mistral-large-latest\")\n",
|
||||
"model_with_structure = model.with_structured_output(Joke)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "17b15816",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_with_structure.invoke(\"Tell me a joke about cats\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6bbbb698",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Together\n",
|
||||
"\n",
|
||||
"Since [TogetherAI](https://www.together.ai/) is just a drop in replacement for OpenAI, we can just use the OpenAI integration"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "9b9617e3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "90549664",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = ChatOpenAI(\n",
|
||||
" base_url=\"https://api.together.xyz/v1\",\n",
|
||||
" api_key=os.environ[\"TOGETHER_API_KEY\"],\n",
|
||||
" model=\"mistralai/Mixtral-8x7B-Instruct-v0.1\",\n",
|
||||
")\n",
|
||||
"model_with_structure = model.with_structured_output(Joke)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "01da39be",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Joke(setup='Why did the cat sit on the computer?', punchline='To keep an eye on the mouse!')"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_with_structure.invoke(\"Tell me a joke about cats\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3066b2af",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -177,7 +177,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks import ArgillaCallbackHandler\n",
|
||||
"from langchain_community.callbacks.argilla_callback import ArgillaCallbackHandler\n",
|
||||
"\n",
|
||||
"argilla_callback = ArgillaCallbackHandler(\n",
|
||||
" dataset_name=\"langchain-dataset\",\n",
|
||||
@@ -213,7 +213,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.callbacks import ArgillaCallbackHandler, StdOutCallbackHandler\n",
|
||||
"from langchain_core.callbacks.stdout import StdOutCallbackHandler\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"argilla_callback = ArgillaCallbackHandler(\n",
|
||||
@@ -277,9 +277,9 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.callbacks import ArgillaCallbackHandler, StdOutCallbackHandler\n",
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain_core.callbacks.stdout import StdOutCallbackHandler\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"argilla_callback = ArgillaCallbackHandler(\n",
|
||||
@@ -361,7 +361,7 @@
|
||||
],
|
||||
"source": [
|
||||
"from langchain.agents import AgentType, initialize_agent, load_tools\n",
|
||||
"from langchain.callbacks import ArgillaCallbackHandler, StdOutCallbackHandler\n",
|
||||
"from langchain_core.callbacks.stdout import StdOutCallbackHandler\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"argilla_callback = ArgillaCallbackHandler(\n",
|
||||
|
||||
@@ -97,7 +97,7 @@
|
||||
"if \"LANGCHAIN_COMET_TRACING\" in os.environ:\n",
|
||||
" del os.environ[\"LANGCHAIN_COMET_TRACING\"]\n",
|
||||
"\n",
|
||||
"from langchain.callbacks.tracers.comet import CometTracer\n",
|
||||
"from langchain_community.callbacks.tracers.comet import CometTracer\n",
|
||||
"\n",
|
||||
"tracer = CometTracer()\n",
|
||||
"\n",
|
||||
@@ -130,7 +130,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -118,7 +118,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks.confident_callback import DeepEvalCallbackHandler\n",
|
||||
"from langchain_community.callbacks.confident_callback import DeepEvalCallbackHandler\n",
|
||||
"\n",
|
||||
"deepeval_callback = DeepEvalCallbackHandler(\n",
|
||||
" implementation_name=\"langchainQuickstart\", metrics=[answer_relevancy_metric]\n",
|
||||
@@ -215,10 +215,10 @@
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_file_url = \"https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt\"\n",
|
||||
"\n",
|
||||
@@ -296,7 +296,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -65,6 +65,23 @@
|
||||
"Ensure you have installed the `context-python` package before using the handler."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2024-03-06T19:05:26.534124Z",
|
||||
"iopub.status.busy": "2024-03-06T19:05:26.533924Z",
|
||||
"iopub.status.idle": "2024-03-06T19:05:26.798727Z",
|
||||
"shell.execute_reply": "2024-03-06T19:05:26.798135Z",
|
||||
"shell.execute_reply.started": "2024-03-06T19:05:26.534109Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.callbacks.context_callback import ContextCallbackHandler"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
@@ -73,8 +90,6 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain.callbacks import ContextCallbackHandler\n",
|
||||
"\n",
|
||||
"token = os.environ[\"CONTEXT_API_TOKEN\"]\n",
|
||||
"\n",
|
||||
"context_callback = ContextCallbackHandler(token)"
|
||||
@@ -99,7 +114,6 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain.callbacks import ContextCallbackHandler\n",
|
||||
"from langchain.schema import (\n",
|
||||
" HumanMessage,\n",
|
||||
" SystemMessage,\n",
|
||||
@@ -155,7 +169,6 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain.callbacks import ContextCallbackHandler\n",
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain.prompts.chat import (\n",
|
||||
|
||||
215
docs/docs/integrations/callbacks/fiddler.ipynb
Normal file
215
docs/docs/integrations/callbacks/fiddler.ipynb
Normal file
@@ -0,0 +1,215 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0cebf93b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Fiddler\n",
|
||||
"\n",
|
||||
">[Fiddler](https://www.fiddler.ai/) is the pioneer in enterprise Generative and Predictive system ops, offering a unified platform that enables Data Science, MLOps, Risk, Compliance, Analytics, and other LOB teams to monitor, explain, analyze, and improve ML deployments at enterprise scale. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "38d746c2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Installation and Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e0151955",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install langchain langchain-community langchain-openai fiddler-client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5662f2e5-d510-4eef-b44b-fa929e5b4ad4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Fiddler connection details "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "64fac323",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"*Before you can add information about your model with Fiddler*\n",
|
||||
"\n",
|
||||
"1. The URL you're using to connect to Fiddler\n",
|
||||
"2. Your organization ID\n",
|
||||
"3. Your authorization token\n",
|
||||
"\n",
|
||||
"These can be found by navigating to the *Settings* page of your Fiddler environment."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f6f8b73e-d350-40f0-b7a4-fb1e68a65a22",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"URL = \"\" # Your Fiddler instance URL, Make sure to include the full URL (including https://). For example: https://demo.fiddler.ai\n",
|
||||
"ORG_NAME = \"\"\n",
|
||||
"AUTH_TOKEN = \"\" # Your Fiddler instance auth token\n",
|
||||
"\n",
|
||||
"# Fiddler project and model names, used for model registration\n",
|
||||
"PROJECT_NAME = \"\"\n",
|
||||
"MODEL_NAME = \"\" # Model name in Fiddler"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0645805a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Create a fiddler callback handler instance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "13de4f9a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.callbacks.fiddler_callback import FiddlerCallbackHandler\n",
|
||||
"\n",
|
||||
"fiddler_handler = FiddlerCallbackHandler(\n",
|
||||
" url=URL,\n",
|
||||
" org=ORG_NAME,\n",
|
||||
" project=PROJECT_NAME,\n",
|
||||
" model=MODEL_NAME,\n",
|
||||
" api_key=AUTH_TOKEN,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2276368e-f1dc-46be-afe3-18796e7a66f2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example 1 : Basic Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c9de0fd1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"# Note : Make sure openai API key is set in the environment variable OPENAI_API_KEY\n",
|
||||
"llm = OpenAI(temperature=0, streaming=True, callbacks=[fiddler_handler])\n",
|
||||
"output_parser = StrOutputParser()\n",
|
||||
"\n",
|
||||
"chain = llm | output_parser\n",
|
||||
"\n",
|
||||
"# Invoke the chain. Invocation will be logged to Fiddler, and metrics automatically generated\n",
|
||||
"chain.invoke(\"How far is moon from earth?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "309bde0b-e1ce-446c-98ac-3690c26a2676",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Few more invocations\n",
|
||||
"chain.invoke(\"What is the temperature on Mars?\")\n",
|
||||
"chain.invoke(\"How much is 2 + 200000?\")\n",
|
||||
"chain.invoke(\"Which movie won the oscars this year?\")\n",
|
||||
"chain.invoke(\"Can you write me a poem about insomnia?\")\n",
|
||||
"chain.invoke(\"How are you doing today?\")\n",
|
||||
"chain.invoke(\"What is the meaning of life?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "48fa4782-c867-4510-9430-4ffa3de3b5eb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example 2 : Chain with prompt templates"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2aa2c220-8946-4844-8d3c-8f69d744d13f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import (\n",
|
||||
" ChatPromptTemplate,\n",
|
||||
" FewShotChatMessagePromptTemplate,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"examples = [\n",
|
||||
" {\"input\": \"2+2\", \"output\": \"4\"},\n",
|
||||
" {\"input\": \"2+3\", \"output\": \"5\"},\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"example_prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" (\"ai\", \"{output}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"few_shot_prompt = FewShotChatMessagePromptTemplate(\n",
|
||||
" example_prompt=example_prompt,\n",
|
||||
" examples=examples,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"final_prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You are a wondrous wizard of math.\"),\n",
|
||||
" few_shot_prompt,\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Note : Make sure openai API key is set in the environment variable OPENAI_API_KEY\n",
|
||||
"llm = OpenAI(temperature=0, streaming=True, callbacks=[fiddler_handler])\n",
|
||||
"\n",
|
||||
"chain = final_prompt | llm\n",
|
||||
"\n",
|
||||
"# Invoke the chain. Invocation will be logged to Fiddler, and metrics automatically generated\n",
|
||||
"chain.invoke({\"input\": \"What's the square of a triangle?\"})"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -39,6 +39,16 @@
|
||||
"%pip install --upgrade --quiet tiktoken"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3c9d9424-0879-4f14-91e5-1292e22820d7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.callbacks.infino_callback import InfinoCallbackHandler"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
@@ -53,7 +63,6 @@
|
||||
"import matplotlib.dates as md\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from infinopy import InfinoClient\n",
|
||||
"from langchain.callbacks import InfinoCallbackHandler\n",
|
||||
"from langchain_openai import OpenAI"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -160,6 +160,25 @@
|
||||
"You can collect input LLM prompts and output responses in a LabelStudio project, connecting it via `LabelStudioCallbackHandler`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2024-03-06T19:07:34.462103Z",
|
||||
"iopub.status.busy": "2024-03-06T19:07:34.461651Z",
|
||||
"iopub.status.idle": "2024-03-06T19:07:34.661936Z",
|
||||
"shell.execute_reply": "2024-03-06T19:07:34.661284Z",
|
||||
"shell.execute_reply.started": "2024-03-06T19:07:34.462067Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.callbacks.labelstudio_callback import (\n",
|
||||
" LabelStudioCallbackHandler,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -170,7 +189,6 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks import LabelStudioCallbackHandler\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"llm = OpenAI(\n",
|
||||
@@ -241,7 +259,6 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks import LabelStudioCallbackHandler\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
|
||||
@@ -19,7 +19,7 @@ export LLMONITOR_APP_ID="..."
|
||||
If you'd prefer not to set an environment variable, you can pass the key directly when initializing the callback handler:
|
||||
|
||||
```python
|
||||
from langchain.callbacks import LLMonitorCallbackHandler
|
||||
from langchain_community.callbacks.llmonitor_callback import LLMonitorCallbackHandler
|
||||
|
||||
handler = LLMonitorCallbackHandler(app_id="...")
|
||||
```
|
||||
@@ -29,7 +29,6 @@ handler = LLMonitorCallbackHandler(app_id="...")
|
||||
```python
|
||||
from langchain_openai import OpenAI
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain.callbacks import LLMonitorCallbackHandler
|
||||
|
||||
handler = LLMonitorCallbackHandler()
|
||||
|
||||
@@ -53,9 +52,9 @@ Example:
|
||||
|
||||
```python
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain_community.callbacks.llmonitor_callback import LLMonitorCallbackHandler
|
||||
from langchain_core.messages import SystemMessage, HumanMessage
|
||||
from langchain.agents import OpenAIFunctionsAgent, AgentExecutor, tool
|
||||
from langchain.callbacks import LLMonitorCallbackHandler
|
||||
|
||||
llm = ChatOpenAI(temperature=0)
|
||||
|
||||
@@ -86,7 +85,8 @@ Another example:
|
||||
```python
|
||||
from langchain.agents import load_tools, initialize_agent, AgentType
|
||||
from langchain_openai import OpenAI
|
||||
from langchain.callbacks import LLMonitorCallbackHandler
|
||||
from langchain_community.callbacks.llmonitor_callback import LLMonitorCallbackHandler
|
||||
|
||||
|
||||
handler = LLMonitorCallbackHandler()
|
||||
|
||||
@@ -104,7 +104,7 @@ agent.run(
|
||||
User tracking allows you to identify your users, track their cost, conversations and more.
|
||||
|
||||
```python
|
||||
from langchain.callbacks.llmonitor_callback import LLMonitorCallbackHandler, identify
|
||||
from langchain_community.callbacks.llmonitor_callback import LLMonitorCallbackHandler, identify
|
||||
|
||||
with identify("user-123"):
|
||||
llm("Tell me a joke")
|
||||
|
||||
@@ -68,14 +68,32 @@
|
||||
"In this simple example we use `PromptLayerCallbackHandler` with `ChatOpenAI`. We add a PromptLayer tag named `chatopenai`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2024-03-06T19:10:56.673622Z",
|
||||
"iopub.status.busy": "2024-03-06T19:10:56.673421Z",
|
||||
"iopub.status.idle": "2024-03-06T19:10:56.887519Z",
|
||||
"shell.execute_reply": "2024-03-06T19:10:56.886895Z",
|
||||
"shell.execute_reply.started": "2024-03-06T19:10:56.673608Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import promptlayer # Don't forget this 🍰\n",
|
||||
"from langchain_community.callbacks.promptlayer_callback import (\n",
|
||||
" PromptLayerCallbackHandler,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import promptlayer # Don't forget this 🍰\n",
|
||||
"from langchain.callbacks import PromptLayerCallbackHandler\n",
|
||||
"from langchain.schema import (\n",
|
||||
" HumanMessage,\n",
|
||||
")\n",
|
||||
@@ -108,8 +126,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import promptlayer # Don't forget this 🍰\n",
|
||||
"from langchain.callbacks import PromptLayerCallbackHandler\n",
|
||||
"from langchain_community.llms import GPT4All\n",
|
||||
"\n",
|
||||
"model = GPT4All(model=\"./models/gpt4all-model.bin\", n_ctx=512, n_threads=8)\n",
|
||||
@@ -140,8 +156,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import promptlayer # Don't forget this 🍰\n",
|
||||
"from langchain.callbacks import PromptLayerCallbackHandler\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
@@ -70,6 +70,16 @@
|
||||
"os.environ[\"SERPAPI_API_KEY\"] = \"<ADD-KEY-HERE>\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e79dc1c0-b9dc-4652-9059-f3a8aa97b74a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.callbacks.sagemaker_callback import SageMakerCallbackHandler"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -80,7 +90,6 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import initialize_agent, load_tools\n",
|
||||
"from langchain.callbacks import SageMakerCallbackHandler\n",
|
||||
"from langchain.chains import LLMChain, SimpleSequentialChain\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
|
||||
@@ -28,7 +28,9 @@ You can run `streamlit hello` to load a sample app and validate your install suc
|
||||
To create a `StreamlitCallbackHandler`, you just need to provide a parent container to render the output.
|
||||
|
||||
```python
|
||||
from langchain_community.callbacks import StreamlitCallbackHandler
|
||||
from langchain_community.callbacks.streamlit import (
|
||||
StreamlitCallbackHandler,
|
||||
)
|
||||
import streamlit as st
|
||||
|
||||
st_callback = StreamlitCallbackHandler(st.container())
|
||||
@@ -47,7 +49,6 @@ thoughts and actions live in your app.
|
||||
import streamlit as st
|
||||
from langchain import hub
|
||||
from langchain.agents import AgentExecutor, create_react_agent, load_tools
|
||||
from langchain_community.callbacks import StreamlitCallbackHandler
|
||||
from langchain_openai import OpenAI
|
||||
|
||||
llm = OpenAI(temperature=0, streaming=True)
|
||||
|
||||
@@ -65,6 +65,16 @@
|
||||
"os.environ[\"TRUBRICS_PASSWORD\"] = \"***\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "36fa67da-8a05-4d54-b0a3-dc173f3107a0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.callbacks.trubrics_callback import TrubricsCallbackHandler"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cd7177b0-a9e8-45ae-adb0-ea779376511b",
|
||||
@@ -148,7 +158,6 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks import TrubricsCallbackHandler\n",
|
||||
"from langchain_openai import OpenAI"
|
||||
]
|
||||
},
|
||||
@@ -266,7 +275,6 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks import TrubricsCallbackHandler\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -5,9 +5,13 @@
|
||||
"id": "5125a1e3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Anthropic Functions\n",
|
||||
"# Anthropic Tools\n",
|
||||
"\n",
|
||||
"This notebook shows how to use an experimental wrapper around Anthropic that gives it the same API as OpenAI Functions."
|
||||
"This notebook shows how to use an experimental wrapper around Anthropic that gives it tool calling and structured output capabilities. It follows Anthropic's guide [here](https://docs.anthropic.com/claude/docs/functions-external-tools)\n",
|
||||
"\n",
|
||||
"The wrapper is available from the `langchain-anthropic` package, and it also requires the optional dependency `defusedxml` for parsing XML output from the llm.\n",
|
||||
"\n",
|
||||
"Note: this is a beta feature that will be replaced by Anthropic's formal implementation of tool calling, but it is useful for testing and experimentation in the meantime."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -17,7 +21,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_experimental.llms.anthropic_functions import AnthropicFunctions"
|
||||
"%pip install -qU langchain-anthropic defusedxml\n",
|
||||
"from langchain_anthropic.experimental import ChatAnthropicTools"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -25,19 +30,39 @@
|
||||
"id": "65499965",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize Model\n",
|
||||
"## Tool Binding\n",
|
||||
"\n",
|
||||
"You can initialize this wrapper the same way you'd initialize ChatAnthropic"
|
||||
"`ChatAnthropicTools` exposes a `bind_tools` method that allows you to pass in Pydantic models or BaseTools to the llm."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "e1d535f6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='', additional_kwargs={'tool_calls': [{'function': {'name': 'Person', 'arguments': '{\"name\": \"Erick\", \"age\": \"27\"}'}, 'type': 'function'}]})"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = AnthropicFunctions(model=\"claude-2\")"
|
||||
"from langchain_core.pydantic_v1 import BaseModel\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Person(BaseModel):\n",
|
||||
" name: str\n",
|
||||
" age: int\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"model = ChatAnthropicTools(model=\"claude-3-opus-20240229\").bind_tools(tools=[Person])\n",
|
||||
"model.invoke(\"I am a 27 year old named Erick\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -45,197 +70,33 @@
|
||||
"id": "fcc9eaf4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Passing in functions\n",
|
||||
"## Structured Output\n",
|
||||
"\n",
|
||||
"You can now pass in functions in a similar way"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "0779c320",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"functions = [\n",
|
||||
" {\n",
|
||||
" \"name\": \"get_current_weather\",\n",
|
||||
" \"description\": \"Get the current weather in a given location\",\n",
|
||||
" \"parameters\": {\n",
|
||||
" \"type\": \"object\",\n",
|
||||
" \"properties\": {\n",
|
||||
" \"location\": {\n",
|
||||
" \"type\": \"string\",\n",
|
||||
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
|
||||
" },\n",
|
||||
" \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
|
||||
" },\n",
|
||||
" \"required\": [\"location\"],\n",
|
||||
" },\n",
|
||||
" }\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "ad75a933",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.messages import HumanMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "fc703085",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"response = model.invoke(\n",
|
||||
" [HumanMessage(content=\"whats the weater in boston?\")], functions=functions\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "04d7936a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' ', additional_kwargs={'function_call': {'name': 'get_current_weather', 'arguments': '{\"location\": \"Boston, MA\", \"unit\": \"fahrenheit\"}'}}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0072fdba",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using for extraction\n",
|
||||
"\n",
|
||||
"You can now use this for extraction."
|
||||
"`ChatAnthropicTools` also implements the [`with_structured_output` spec](/docs/guides/structured_output) for extracting values. Note: this may not be as stable as with models that explicitly offer tool calling."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "7af5c567",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import create_extraction_chain\n",
|
||||
"\n",
|
||||
"schema = {\n",
|
||||
" \"properties\": {\n",
|
||||
" \"name\": {\"type\": \"string\"},\n",
|
||||
" \"height\": {\"type\": \"integer\"},\n",
|
||||
" \"hair_color\": {\"type\": \"string\"},\n",
|
||||
" },\n",
|
||||
" \"required\": [\"name\", \"height\"],\n",
|
||||
"}\n",
|
||||
"inp = \"\"\"\n",
|
||||
"Alex is 5 feet tall. Claudia is 1 feet taller Alex and jumps higher than him. Claudia is a brunette and Alex is blonde.\n",
|
||||
" \"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bd01082a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = create_extraction_chain(schema, model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b5a23e9f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke(inp)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "90ec959e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using for tagging\n",
|
||||
"\n",
|
||||
"You can now use this for tagging"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "03c1eb0d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import create_tagging_chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "581c0ece",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"schema = {\n",
|
||||
" \"properties\": {\n",
|
||||
" \"sentiment\": {\"type\": \"string\"},\n",
|
||||
" \"aggressiveness\": {\"type\": \"integer\"},\n",
|
||||
" \"language\": {\"type\": \"string\"},\n",
|
||||
" }\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "d9a8570e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = create_tagging_chain(schema, model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "cf37d679",
|
||||
"id": "0779c320",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'sentiment': 'positive', 'aggressiveness': '0', 'language': 'english'}"
|
||||
"Person(name='Erick', age=27)"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke(\"this is really cool\")"
|
||||
"chain = ChatAnthropicTools(model=\"claude-3-opus-20240229\").with_structured_output(\n",
|
||||
" Person\n",
|
||||
")\n",
|
||||
"chain.invoke(\"I am a 27 year old named Erick\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -255,7 +116,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.0"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -23,6 +23,14 @@
|
||||
"This example goes over how to use LangChain to interact with `ChatFireworks` models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "4a7c795e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"%pip install langchain-fireworks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
@@ -35,10 +43,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_community.chat_models.fireworks import ChatFireworks\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage"
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from langchain_fireworks import ChatFireworks"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -48,7 +54,7 @@
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"\n",
|
||||
"1. Make sure the `fireworks-ai` package is installed in your environment.\n",
|
||||
"1. Make sure the `langchain-fireworks` package is installed in your environment.\n",
|
||||
"2. Sign in to [Fireworks AI](http://fireworks.ai) for the an API Key to access our models, and make sure it is set as the `FIREWORKS_API_KEY` environment variable.\n",
|
||||
"3. Set up your model using a model id. If the model is not set, the default model is fireworks-llama-v2-7b-chat. See the full, most up-to-date model list on [app.fireworks.ai](https://app.fireworks.ai)."
|
||||
]
|
||||
@@ -67,7 +73,7 @@
|
||||
" os.environ[\"FIREWORKS_API_KEY\"] = getpass.getpass(\"Fireworks API Key:\")\n",
|
||||
"\n",
|
||||
"# Initialize a Fireworks chat model\n",
|
||||
"chat = ChatFireworks(model=\"accounts/fireworks/models/llama-v2-13b-chat\")"
|
||||
"chat = ChatFireworks(model=\"accounts/fireworks/models/mixtral-8x7b-instruct\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -82,17 +88,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 3,
|
||||
"id": "72340871-ae2f-415f-b399-0777d32dc379",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Hello! My name is LLaMA, I'm a large language model trained by a team of researcher at Meta AI. My primary function is to assist and converse with users like you, answering questions and engaging in discussion to the best of my ability. I'm here to help and provide information on a wide range of topics, so feel free to ask me anything!\", additional_kwargs={}, example=False)"
|
||||
"AIMessage(content=\"Hello! I'm an AI language model, a helpful assistant designed to chat and assist you with any questions or information you might need. I'm here to make your experience as smooth and enjoyable as possible. How can I assist you today?\")"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -102,22 +108,22 @@
|
||||
"system_message = SystemMessage(content=\"You are to chat with the user.\")\n",
|
||||
"human_message = HumanMessage(content=\"Who are you?\")\n",
|
||||
"\n",
|
||||
"chat([system_message, human_message])"
|
||||
"chat.invoke([system_message, human_message])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"id": "68c6b1fa-2ff7-4a63-8d88-3cec302180b8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Oh hello there! *giggle* It's such a beautiful day today, isn\", additional_kwargs={}, example=False)"
|
||||
"AIMessage(content=\"I'm an AI and do not have the ability to experience the weather firsthand. However,\")"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -125,200 +131,70 @@
|
||||
"source": [
|
||||
"# Setting additional parameters: temperature, max_tokens, top_p\n",
|
||||
"chat = ChatFireworks(\n",
|
||||
" model=\"accounts/fireworks/models/llama-v2-13b-chat\",\n",
|
||||
" model_kwargs={\"temperature\": 1, \"max_tokens\": 20, \"top_p\": 1},\n",
|
||||
" model=\"accounts/fireworks/models/mixtral-8x7b-instruct\",\n",
|
||||
" temperature=1,\n",
|
||||
" max_tokens=20,\n",
|
||||
")\n",
|
||||
"system_message = SystemMessage(content=\"You are to chat with the user.\")\n",
|
||||
"human_message = HumanMessage(content=\"How's the weather today?\")\n",
|
||||
"chat([system_message, human_message])"
|
||||
"chat.invoke([system_message, human_message])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d93aa186-39cf-4e1a-aa32-01ed31d43bc8",
|
||||
"id": "8c44cb36",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Simple Chat Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "28763fbc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can use chat models on fireworks, with system prompts and memory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "cbe29efc-37c3-4c83-8b84-b8bba1a1e589",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain_community.chat_models import ChatFireworks\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"# Tool Calling\n",
|
||||
"\n",
|
||||
"llm = ChatFireworks(\n",
|
||||
" model=\"accounts/fireworks/models/llama-v2-13b-chat\",\n",
|
||||
" model_kwargs={\"temperature\": 0, \"max_tokens\": 64, \"top_p\": 1.0},\n",
|
||||
")\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You are a helpful chatbot that speaks like a pirate.\"),\n",
|
||||
" MessagesPlaceholder(variable_name=\"history\"),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "02991e05-a38e-47d4-9ab3-7e630a8ead55",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Initially, there is no chat memory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "e2fd186f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'history': []}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"memory = ConversationBufferMemory(return_messages=True)\n",
|
||||
"memory.load_memory_variables({})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bee461da",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create a simple chain with memory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "86972e54",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = (\n",
|
||||
" RunnablePassthrough.assign(\n",
|
||||
" history=memory.load_memory_variables | (lambda x: x[\"history\"])\n",
|
||||
" )\n",
|
||||
" | prompt\n",
|
||||
" | llm.bind(stop=[\"\\n\\n\"])\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f48cb142",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the chain with a simple question, expecting an answer aligned with the system message provided."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "db3ad5b1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Ahoy there, me hearty! Yer a fine lookin' swashbuckler, I can see that! *adjusts eye patch* What be bringin' ye to these waters? Are ye here to plunder some booty or just to enjoy the sea breeze?\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"inputs = {\"input\": \"hi im bob\"}\n",
|
||||
"response = chain.invoke(inputs)\n",
|
||||
"response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "338f4bae",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Save the memory context, then read it back to inspect contents"
|
||||
"Fireworks offers the [`FireFunction-v1` tool calling model](https://fireworks.ai/blog/firefunction-v1-gpt-4-level-function-calling). You can use it for structured output and function calling use cases:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "257eec01",
|
||||
"id": "ee2db682",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'history': [HumanMessage(content='hi im bob', additional_kwargs={}, example=False),\n",
|
||||
" AIMessage(content=\"Ahoy there, me hearty! Yer a fine lookin' swashbuckler, I can see that! *adjusts eye patch* What be bringin' ye to these waters? Are ye here to plunder some booty or just to enjoy the sea breeze?\", additional_kwargs={}, example=False)]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'function': {'arguments': '{\"name\": \"Erick\", \"age\": 27}',\n",
|
||||
" 'name': 'ExtractFields'},\n",
|
||||
" 'id': 'call_J0WYP2TLenaFw3UeVU0UnWqx',\n",
|
||||
" 'index': 0,\n",
|
||||
" 'type': 'function'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"memory.save_context(inputs, {\"output\": response.content})\n",
|
||||
"memory.load_memory_variables({})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "08441347",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now as another question that requires use of the memory."
|
||||
"from pprint import pprint\n",
|
||||
"\n",
|
||||
"from langchain_core.pydantic_v1 import BaseModel\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class ExtractFields(BaseModel):\n",
|
||||
" name: str\n",
|
||||
" age: int\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chat = ChatFireworks(\n",
|
||||
" model=\"accounts/fireworks/models/firefunction-v1\",\n",
|
||||
").bind_tools([ExtractFields])\n",
|
||||
"\n",
|
||||
"result = chat.invoke(\"I am a 27 year old named Erick\")\n",
|
||||
"\n",
|
||||
"pprint(result.additional_kwargs[\"tool_calls\"][0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "7f5f2820",
|
||||
"execution_count": null,
|
||||
"id": "2321a4e6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Arrrr, ye be askin' about yer name, eh? Well, me matey, I be knowin' ye as Bob, the scurvy dog! *winks* But if ye want me to call ye somethin' else, just let me know, and I\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"inputs = {\"input\": \"whats my name\"}\n",
|
||||
"chain.invoke(inputs)"
|
||||
]
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -337,7 +213,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -98,9 +98,7 @@
|
||||
"prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n",
|
||||
"\n",
|
||||
"chain = prompt | chat\n",
|
||||
"chain.invoke({\n",
|
||||
" \"text\": \"Explain the importance of low latency LLMs.\"\n",
|
||||
"})"
|
||||
"chain.invoke({\"text\": \"Explain the importance of low latency LLMs.\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
201
docs/docs/integrations/chat/maritalk.ipynb
Normal file
201
docs/docs/integrations/chat/maritalk.ipynb
Normal file
@@ -0,0 +1,201 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a href=\"https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/docs/integrations/chat/maritalk.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
|
||||
"\n",
|
||||
"# Maritalk\n",
|
||||
"\n",
|
||||
"## Introduction\n",
|
||||
"\n",
|
||||
"MariTalk is an assistant developed by the Brazilian company [Maritaca AI](https://www.maritaca.ai).\n",
|
||||
"MariTalk is based on language models that have been specially trained to understand Portuguese well.\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to use MariTalk with LangChain through two examples:\n",
|
||||
"\n",
|
||||
"1. A simple example of how to use MariTalk to perform a task.\n",
|
||||
"2. LLM + RAG: The second example shows how to answer a question whose answer is found in a long document that does not fit within the token limit of MariTalk. For this, we will use a simple searcher (BM25) to first search the document for the most relevant sections and then feed them to MariTalk for answering."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation\n",
|
||||
"First, install the LangChain library (and all its dependencies) using the following command:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install langchain langchain-core langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Key\n",
|
||||
"You will need an API key that can be obtained from chat.maritaca.ai (\"Chaves da API\" section)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"### Example 1 - Pet Name Suggestions\n",
|
||||
"\n",
|
||||
"Let's define our language model, ChatMaritalk, and configure it with your API key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts.chat import ChatPromptTemplate\n",
|
||||
"from langchain_community.chat_models import ChatMaritalk\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"\n",
|
||||
"llm = ChatMaritalk(\n",
|
||||
" api_key=\"\", # Insert your API key here\n",
|
||||
" temperature=0.7,\n",
|
||||
" max_tokens=100,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"output_parser = StrOutputParser()\n",
|
||||
"\n",
|
||||
"chat_prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are an assistant specialized in suggesting pet names. Given the animal, you must suggest 4 names.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"I have a {animal}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = chat_prompt | llm | output_parser\n",
|
||||
"\n",
|
||||
"response = chain.invoke({\"animal\": \"dog\"})\n",
|
||||
"print(response) # should answer something like \"1. Max\\n2. Bella\\n3. Charlie\\n4. Rocky\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example 2 - RAG + LLM: UNICAMP 2024 Entrance Exam Question Answering System\n",
|
||||
"For this example, we need to install some extra libraries:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install unstructured rank_bm25 pdf2image pdfminer-six pikepdf pypdf unstructured_inference fastapi kaleido uvicorn \"pillow<10.1.0\" pillow_heif -q"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Loading the database\n",
|
||||
"\n",
|
||||
"The first step is to create a database with the information from the notice. For this, we will download the notice from the COMVEST website and segment the extracted text into 500-character windows."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import OnlinePDFLoader\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"# Loading the COMVEST 2024 notice\n",
|
||||
"loader = OnlinePDFLoader(\n",
|
||||
" \"https://www.comvest.unicamp.br/wp-content/uploads/2023/10/31-2023-Dispoe-sobre-o-Vestibular-Unicamp-2024_com-retificacao.pdf\"\n",
|
||||
")\n",
|
||||
"data = loader.load()\n",
|
||||
"\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(\n",
|
||||
" chunk_size=500, chunk_overlap=100, separators=[\"\\n\", \" \", \"\"]\n",
|
||||
")\n",
|
||||
"texts = text_splitter.split_documents(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Creating a Searcher\n",
|
||||
"Now that we have our database, we need a searcher. For this example, we will use a simple BM25 as a search system, but this could be replaced by any other searcher (such as search via embeddings)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.retrievers import BM25Retriever\n",
|
||||
"\n",
|
||||
"retriever = BM25Retriever.from_documents(texts)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Combining Search System + LLM\n",
|
||||
"Now that we have our searcher, we just need to implement a prompt specifying the task and invoke the chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.question_answering import load_qa_chain\n",
|
||||
"\n",
|
||||
"prompt = \"\"\"Baseado nos seguintes documentos, responda a pergunta abaixo.\n",
|
||||
"\n",
|
||||
"{context}\n",
|
||||
"\n",
|
||||
"Pergunta: {query}\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"qa_prompt = ChatPromptTemplate.from_messages([(\"human\", prompt)])\n",
|
||||
"\n",
|
||||
"chain = load_qa_chain(llm, chain_type=\"stuff\", verbose=True, prompt=qa_prompt)\n",
|
||||
"\n",
|
||||
"query = \"Qual o tempo máximo para realização da prova?\"\n",
|
||||
"\n",
|
||||
"docs = retriever.get_relevant_documents(query)\n",
|
||||
"\n",
|
||||
"chain.invoke(\n",
|
||||
" {\"input_documents\": docs, \"query\": query}\n",
|
||||
") # Should output something like: \"O tempo máximo para realização da prova é de 5 horas.\""
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -29,12 +29,12 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts.chat import (\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from langchain_core.prompts.chat import (\n",
|
||||
" ChatPromptTemplate,\n",
|
||||
" HumanMessagePromptTemplate,\n",
|
||||
" SystemMessagePromptTemplate,\n",
|
||||
")\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
@@ -91,7 +91,7 @@
|
||||
" content=\"Translate this sentence from English to French. I love programming.\"\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
"chat.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -144,7 +144,7 @@
|
||||
")\n",
|
||||
"\n",
|
||||
"# get a chat completion from the formatted messages\n",
|
||||
"chat(\n",
|
||||
"chat.invoke(\n",
|
||||
" chat_prompt.format_prompt(\n",
|
||||
" input_language=\"English\", output_language=\"French\", text=\"I love programming.\"\n",
|
||||
" ).to_messages()\n",
|
||||
|
||||
229
docs/docs/integrations/chat/perplexity.ipynb
Normal file
229
docs/docs/integrations/chat/perplexity.ipynb
Normal file
@@ -0,0 +1,229 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "a016701c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Perplexity\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bf733a38-db84-4363-89e2-de6735c37230",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ChatPerplexity\n",
|
||||
"\n",
|
||||
"This notebook covers how to get started with Perplexity chat models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "d4a7c55d-b235-4ca4-a579-c90cc9570da9",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-19T11:25:00.590587Z",
|
||||
"start_time": "2024-01-19T11:25:00.127293Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models import ChatPerplexity\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "97a8ce3a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The code provided assumes that your PPLX_API_KEY is set in your environment variables. If you would like to manually specify your API key and also choose a different model, you can use the following code:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"chat = ChatPerplexity(temperature=0, pplx_api_key=\"YOUR_API_KEY\", model=\"pplx-70b-online\")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You can check a list of available models [here](https://docs.perplexity.ai/docs/model-cards). For reproducibility, we can set the API key dynamically by taking it as an input in this notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "d3e49d78",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"PPLX_API_KEY = getpass()\n",
|
||||
"os.environ[\"PPLX_API_KEY\"] = PPLX_API_KEY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "70cf04e8-423a-4ff6-8b09-f11fb711c817",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-19T11:25:04.349676Z",
|
||||
"start_time": "2024-01-19T11:25:03.964930Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat = ChatPerplexity(temperature=0, model=\"pplx-70b-online\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "8199ef8f-eb8b-4253-9ea0-6c24a013ca4c",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-19T11:25:07.274418Z",
|
||||
"start_time": "2024-01-19T11:25:05.898031Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The Higgs Boson is an elementary subatomic particle that plays a crucial role in the Standard Model of particle physics, which accounts for three of the four fundamental forces governing the behavior of our universe: the strong and weak nuclear forces, electromagnetism, and gravity. The Higgs Boson is important for several reasons:\\n\\n1. **Final Elementary Particle**: The Higgs Boson is the last elementary particle waiting to be discovered under the Standard Model. Its detection helps complete the Standard Model and further our understanding of the fundamental forces in the universe.\\n\\n2. **Mass Generation**: The Higgs Boson is responsible for giving mass to other particles, a process that occurs through its interaction with the Higgs field. This mass generation is essential for the formation of atoms, molecules, and the visible matter we observe in the universe.\\n\\n3. **Implications for New Physics**: While the detection of the Higgs Boson has confirmed many aspects of the Standard Model, it also opens up new possibilities for discoveries beyond the Standard Model. Further research on the Higgs Boson could reveal insights into the nature of dark matter, supersymmetry, and other exotic phenomena.\\n\\n4. **Advancements in Technology**: The search for the Higgs Boson has led to significant advancements in technology, such as the development of artificial intelligence and machine learning algorithms used in particle accelerators like the Large Hadron Collider (LHC). These advancements have not only contributed to the discovery of the Higgs Boson but also have potential applications in various other fields.\\n\\nIn summary, the Higgs Boson is important because it completes the Standard Model, plays a crucial role in mass generation, hints at new physics phenomena beyond the Standard Model, and drives advancements in technology.\\n'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"system = \"You are a helpful assistant.\"\n",
|
||||
"human = \"{input}\"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n",
|
||||
"\n",
|
||||
"chain = prompt | chat\n",
|
||||
"response = chain.invoke({\"input\": \"Why is the Higgs Boson important?\"})\n",
|
||||
"response.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "de6d8d5a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can format and structure the prompts like you would typically. In the following example, we ask the model to tell us a joke about cats."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "c5fac0e9-05a4-4fc1-a3b3-e5bbb24b971b",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-19T11:25:10.448733Z",
|
||||
"start_time": "2024-01-19T11:25:08.866277Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Here\\'s a joke about cats:\\n\\nWhy did the cat want math lessons from a mermaid?\\n\\nBecause it couldn\\'t find its \"core purpose\" in life!\\n\\nRemember, cats are unique and fascinating creatures, and each one has its own special traits and abilities. While some may see them as mysterious or even a bit aloof, they are still beloved pets that bring joy and companionship to their owners. So, if your cat ever seeks guidance from a mermaid, just remember that they are on their own journey to self-discovery!\\n'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat = ChatPerplexity(temperature=0, model=\"pplx-70b-online\")\n",
|
||||
"prompt = ChatPromptTemplate.from_messages([(\"human\", \"Tell me a joke about {topic}\")])\n",
|
||||
"chain = prompt | chat\n",
|
||||
"response = chain.invoke({\"topic\": \"cats\"})\n",
|
||||
"response.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "13d93dc4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## `ChatPerplexity` also supports streaming functionality:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "025be980-e50d-4a68-93dc-c9c7b500ce34",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-19T11:25:24.438696Z",
|
||||
"start_time": "2024-01-19T11:25:14.687480Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Here is a list of some famous tourist attractions in Pakistan:\n",
|
||||
"\n",
|
||||
"1. **Minar-e-Pakistan**: A 62-meter high minaret in Lahore that represents the history of Pakistan.\n",
|
||||
"2. **Badshahi Mosque**: A historic mosque in Lahore with a capacity of 10,000 worshippers.\n",
|
||||
"3. **Shalimar Gardens**: A beautiful garden in Lahore with landscaped grounds and a series of cascading pools.\n",
|
||||
"4. **Pakistan Monument**: A national monument in Islamabad representing the four provinces and three districts of Pakistan.\n",
|
||||
"5. **National Museum of Pakistan**: A museum in Karachi showcasing the country's cultural history.\n",
|
||||
"6. **Faisal Mosque**: A large mosque in Islamabad that can accommodate up to 300,000 worshippers.\n",
|
||||
"7. **Clifton Beach**: A popular beach in Karachi offering water activities and recreational facilities.\n",
|
||||
"8. **Kartarpur Corridor**: A visa-free border crossing and religious corridor connecting Gurdwara Darbar Sahib in Pakistan to Gurudwara Sri Kartarpur Sahib in India.\n",
|
||||
"9. **Mohenjo-daro**: An ancient Indus Valley civilization site in Sindh, Pakistan, dating back to around 2500 BCE.\n",
|
||||
"10. **Hunza Valley**: A picturesque valley in Gilgit-Baltistan known for its stunning mountain scenery and unique culture.\n",
|
||||
"\n",
|
||||
"These attractions showcase the rich history, diverse culture, and natural beauty of Pakistan, making them popular destinations for both local and international tourists.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat = ChatPerplexity(temperature=0.7, model=\"pplx-70b-online\")\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [(\"human\", \"Give me a list of famous tourist attractions in Pakistan\")]\n",
|
||||
")\n",
|
||||
"chain = prompt | chat\n",
|
||||
"for chunk in chain.stream({}):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -14,6 +14,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b895d98989d4de01",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic use"
|
||||
@@ -21,13 +22,25 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 10,
|
||||
"id": "43daa39972d4c533",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"is_executing": true
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-02-17T07:55:44.526904Z",
|
||||
"start_time": "2024-02-17T07:55:43.166698Z"
|
||||
},
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "AIMessage(content='Hello! How can I help you today?')"
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\"\"\"For basic init and call\"\"\"\n",
|
||||
"from langchain_community.chat_models import ChatSparkLLM\n",
|
||||
@@ -62,14 +75,27 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 13,
|
||||
"id": "7dc162bd65fec08f",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Hello! How can I help you today?"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat = ChatSparkLLM(streaming=True)\n",
|
||||
"chat = ChatSparkLLM(\n",
|
||||
" spark_app_id=\"<app_id>\",\n",
|
||||
" spark_api_key=\"<api_key>\",\n",
|
||||
" spark_api_secret=\"<api_secret>\",\n",
|
||||
" streaming=True,\n",
|
||||
")\n",
|
||||
"for chunk in chat.stream(\"Hello!\"):\n",
|
||||
" print(chunk.content, end=\"\")"
|
||||
]
|
||||
|
||||
297
docs/docs/integrations/document_loaders/airbyte.ipynb
Normal file
297
docs/docs/integrations/document_loaders/airbyte.ipynb
Normal file
@@ -0,0 +1,297 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3a5ebf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AirbyteLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35ac77b1-449b-44f7-b8f3-3494d55c286e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
">[Airbyte](https://github.com/airbytehq/airbyte) is a data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes. It has the largest catalog of ELT connectors to data warehouses and databases.\n",
|
||||
"\n",
|
||||
"This covers how to load any source from Airbyte into LangChain documents\n",
|
||||
"\n",
|
||||
"## Installation\n",
|
||||
"\n",
|
||||
"In order to use `AirbyteLoader` you need to install the `langchain-airbyte` integration package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "180c8b74",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"% pip install -qU langchain-airbyte"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3dd92c62",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note: Currently, the `airbyte` library does not support Pydantic v2.\n",
|
||||
"Please downgrade to Pydantic v1 to use this package.\n",
|
||||
"\n",
|
||||
"Note: This package also currently requires Python 3.10+.\n",
|
||||
"\n",
|
||||
"## Loading Documents\n",
|
||||
"\n",
|
||||
"By default, the `AirbyteLoader` will load any structured data from a stream and output yaml-formatted documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "721d9316",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"```yaml\n",
|
||||
"academic_degree: PhD\n",
|
||||
"address:\n",
|
||||
" city: Lauderdale Lakes\n",
|
||||
" country_code: FI\n",
|
||||
" postal_code: '75466'\n",
|
||||
" province: New Jersey\n",
|
||||
" state: Hawaii\n",
|
||||
" street_name: Stoneyford\n",
|
||||
" street_number: '1112'\n",
|
||||
"age: 44\n",
|
||||
"blood_type: \"O\\u2212\"\n",
|
||||
"created_at: '2004-04-02T13:05:27+00:00'\n",
|
||||
"email: bread2099+1@outlook.com\n",
|
||||
"gender: Fluid\n",
|
||||
"height: '1.62'\n",
|
||||
"id: 1\n",
|
||||
"language: Belarusian\n",
|
||||
"name: Moses\n",
|
||||
"nationality: Dutch\n",
|
||||
"occupation: Track Worker\n",
|
||||
"telephone: 1-467-194-2318\n",
|
||||
"title: M.Sc.Tech.\n",
|
||||
"updated_at: '2024-02-27T16:41:01+00:00'\n",
|
||||
"weight: 6\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_airbyte import AirbyteLoader\n",
|
||||
"\n",
|
||||
"loader = AirbyteLoader(\n",
|
||||
" source=\"source-faker\",\n",
|
||||
" stream=\"users\",\n",
|
||||
" config={\"count\": 10},\n",
|
||||
")\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0].page_content[:500])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fca024cb",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"source": [
|
||||
"You can also specify a custom prompt template for formatting documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "9fa002a5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"My name is Verdie and I am 1.73 meters tall.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"loader_templated = AirbyteLoader(\n",
|
||||
" source=\"source-faker\",\n",
|
||||
" stream=\"users\",\n",
|
||||
" config={\"count\": 10},\n",
|
||||
" template=PromptTemplate.from_template(\n",
|
||||
" \"My name is {name} and I am {height} meters tall.\"\n",
|
||||
" ),\n",
|
||||
")\n",
|
||||
"docs_templated = loader_templated.load()\n",
|
||||
"print(docs_templated[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d3e6d887",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Loading Documents\n",
|
||||
"\n",
|
||||
"One of the powerful features of `AirbyteLoader` is its ability to load large documents from upstream sources. When working with large datasets, the default `.load()` behavior can be slow and memory-intensive. To avoid this, you can use the `.lazy_load()` method to load documents in a more memory-efficient manner."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "684b9187",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Just calling lazy load is quick! This took 0.0001 seconds\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"\n",
|
||||
"loader = AirbyteLoader(\n",
|
||||
" source=\"source-faker\",\n",
|
||||
" stream=\"users\",\n",
|
||||
" config={\"count\": 3},\n",
|
||||
" template=PromptTemplate.from_template(\n",
|
||||
" \"My name is {name} and I am {height} meters tall.\"\n",
|
||||
" ),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"start_time = time.time()\n",
|
||||
"my_iterator = loader.lazy_load()\n",
|
||||
"print(\n",
|
||||
" f\"Just calling lazy load is quick! This took {time.time() - start_time:.4f} seconds\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6b24a64b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And you can iterate over documents as they're yielded:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "3e8355d0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"My name is Andera and I am 1.91 meters tall.\n",
|
||||
"My name is Jody and I am 1.85 meters tall.\n",
|
||||
"My name is Zonia and I am 1.53 meters tall.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for doc in my_iterator:\n",
|
||||
" print(doc.page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d1040d81",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also lazy load documents in an async manner with `.alazy_load()`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "dc5d0911",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"My name is Carmelina and I am 1.74 meters tall.\n",
|
||||
"My name is Ali and I am 1.90 meters tall.\n",
|
||||
"My name is Rochell and I am 1.83 meters tall.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = AirbyteLoader(\n",
|
||||
" source=\"source-faker\",\n",
|
||||
" stream=\"users\",\n",
|
||||
" config={\"count\": 3},\n",
|
||||
" template=PromptTemplate.from_template(\n",
|
||||
" \"My name is {name} and I am {height} meters tall.\"\n",
|
||||
" ),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"my_async_iterator = loader.alazy_load()\n",
|
||||
"\n",
|
||||
"async for doc in my_async_iterator:\n",
|
||||
" print(doc.page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ba4ede33",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configuration\n",
|
||||
"\n",
|
||||
"`AirbyteLoader` can be configured with the following options:\n",
|
||||
"\n",
|
||||
"- `source` (str, required): The name of the Airbyte source to load from.\n",
|
||||
"- `stream` (str, required): The name of the stream to load from (Airbyte sources can return multiple streams)\n",
|
||||
"- `config` (dict, required): The configuration for the Airbyte source\n",
|
||||
"- `template` (PromptTemplate, optional): A custom prompt template for formatting documents\n",
|
||||
"- `include_metadata` (bool, optional, default True): Whether to include all fields as metadata in the output documents\n",
|
||||
"\n",
|
||||
"The majority of the configuration will be in `config`, and you can find the specific configuration options in the \"Config field reference\" for each source in the [Airbyte documentation](https://docs.airbyte.com/integrations/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2e2ed269",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
-- Provisioning table "mlb_teams_2012".
|
||||
--
|
||||
-- psql postgresql://postgres@localhost < mlb_teams_2012.sql
|
||||
|
||||
DROP TABLE IF EXISTS mlb_teams_2012;
|
||||
CREATE TABLE mlb_teams_2012 ("Team" VARCHAR, "Payroll (millions)" FLOAT, "Wins" BIGINT);
|
||||
INSERT INTO mlb_teams_2012
|
||||
("Team", "Payroll (millions)", "Wins")
|
||||
VALUES
|
||||
('Nationals', 81.34, 98),
|
||||
('Reds', 82.20, 97),
|
||||
('Yankees', 197.96, 95),
|
||||
('Giants', 117.62, 94),
|
||||
('Braves', 83.31, 94),
|
||||
('Athletics', 55.37, 94),
|
||||
('Rangers', 120.51, 93),
|
||||
('Orioles', 81.43, 93),
|
||||
('Rays', 64.17, 90),
|
||||
('Angels', 154.49, 89),
|
||||
('Tigers', 132.30, 88),
|
||||
('Cardinals', 110.30, 88),
|
||||
('Dodgers', 95.14, 86),
|
||||
('White Sox', 96.92, 85),
|
||||
('Brewers', 97.65, 83),
|
||||
('Phillies', 174.54, 81),
|
||||
('Diamondbacks', 74.28, 81),
|
||||
('Pirates', 63.43, 79),
|
||||
('Padres', 55.24, 76),
|
||||
('Mariners', 81.97, 75),
|
||||
('Mets', 93.35, 74),
|
||||
('Blue Jays', 75.48, 73),
|
||||
('Royals', 60.91, 72),
|
||||
('Marlins', 118.07, 69),
|
||||
('Red Sox', 173.18, 69),
|
||||
('Indians', 78.43, 68),
|
||||
('Twins', 94.08, 66),
|
||||
('Rockies', 78.06, 64),
|
||||
('Cubs', 88.19, 61),
|
||||
('Astros', 60.65, 55)
|
||||
;
|
||||
380
docs/docs/integrations/document_loaders/google_alloydb.ipynb
Normal file
380
docs/docs/integrations/document_loaders/google_alloydb.ipynb
Normal file
@@ -0,0 +1,380 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "E_RJy7C1bpCT"
|
||||
},
|
||||
"source": [
|
||||
"# Google AlloyDB for PostgreSQL\n",
|
||||
"\n",
|
||||
"> [AlloyDB](https://cloud.google.com/alloydb) is a fully managed relational database service that offers high performance, seamless integration, and impressive scalability. AlloyDB is 100% compatible with PostgreSQL. Extend your database application to build AI-powered experiences leveraging AlloyDB's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use `AlloyDB for PostgreSQL` to load Documents with the `AlloyDBLoader` class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "xjcxaw6--Xyy"
|
||||
},
|
||||
"source": [
|
||||
"## Before you begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
" * [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
" * [Enable the AlloyDB Admin API.](https://console.cloud.google.com/flows/enableapi?apiid=alloydb.googleapis.com)\n",
|
||||
" * [Create a AlloyDB cluster and instance.](https://cloud.google.com/alloydb/docs/cluster-create)\n",
|
||||
" * [Create a AlloyDB database.](https://cloud.google.com/alloydb/docs/quickstart/create-and-connect)\n",
|
||||
" * [Add a User to the database.](https://cloud.google.com/alloydb/docs/database-users/about)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "IR54BmgvdHT_"
|
||||
},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"Install the integration library, `langchain-google-alloydb-pg`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 1000
|
||||
},
|
||||
"id": "0ZITIDE160OD",
|
||||
"outputId": "90e0636e-ff34-4e1e-ad37-d2a6db4a317e"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-google-alloydb-pg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "v40bB_GMcr9f"
|
||||
},
|
||||
"source": [
|
||||
"**Colab only:** Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "6o0iGVIdDD6K"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "cTXTbj4UltKf"
|
||||
},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"* If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"* If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Uj02bMRAc9_c"
|
||||
},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "wnp1R1PYc9_c",
|
||||
"outputId": "6502c721-a2fd-451f-b946-9f7b850d5966"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @title Project { display-mode: \"form\" }\n",
|
||||
"PROJECT_ID = \"gcp_project_id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"! gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "rEWWNoNnKOgq",
|
||||
"metadata": {
|
||||
"id": "rEWWNoNnKOgq"
|
||||
},
|
||||
"source": [
|
||||
"### 💡 API Enablement\n",
|
||||
"The `langchain-google-alloydb-pg` package requires that you [enable the AlloyDB Admin API](https://console.cloud.google.com/flows/enableapi?apiid=alloydb.googleapis.com) in your Google Cloud Project."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "5utKIdq7KYi5",
|
||||
"metadata": {
|
||||
"id": "5utKIdq7KYi5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# enable AlloyDB Admin API\n",
|
||||
"!gcloud services enable alloydb.googleapis.com"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f8f2830ee9ca1e01",
|
||||
"metadata": {
|
||||
"id": "f8f2830ee9ca1e01"
|
||||
},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "OMvzMWRrR6n7",
|
||||
"metadata": {
|
||||
"id": "OMvzMWRrR6n7"
|
||||
},
|
||||
"source": [
|
||||
"### Set AlloyDB database variables\n",
|
||||
"Find your database values, in the [AlloyDB Instances page](https://console.cloud.google.com/alloydb/clusters)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "irl7eMFnSPZr",
|
||||
"metadata": {
|
||||
"id": "irl7eMFnSPZr"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @title Set Your Values Here { display-mode: \"form\" }\n",
|
||||
"REGION = \"us-central1\" # @param {type: \"string\"}\n",
|
||||
"CLUSTER = \"my-cluster\" # @param {type: \"string\"}\n",
|
||||
"INSTANCE = \"my-primary\" # @param {type: \"string\"}\n",
|
||||
"DATABASE = \"my-database\" # @param {type: \"string\"}\n",
|
||||
"TABLE_NAME = \"vector_store\" # @param {type: \"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "QuQigs4UoFQ2",
|
||||
"metadata": {
|
||||
"id": "QuQigs4UoFQ2"
|
||||
},
|
||||
"source": [
|
||||
"### AlloyDBEngine Connection Pool\n",
|
||||
"\n",
|
||||
"One of the requirements and arguments to establish AlloyDB as a vector store is a `AlloyDBEngine` object. The `AlloyDBEngine` configures a connection pool to your AlloyDB database, enabling successful connections from your application and following industry best practices.\n",
|
||||
"\n",
|
||||
"To create a `AlloyDBEngine` using `AlloyDBEngine.from_instance()` you need to provide only 5 things:\n",
|
||||
"\n",
|
||||
"1. `project_id` : Project ID of the Google Cloud Project where the AlloyDB instance is located.\n",
|
||||
"1. `region` : Region where the AlloyDB instance is located.\n",
|
||||
"1. `cluster`: The name of the AlloyDB cluster.\n",
|
||||
"1. `instance` : The name of the AlloyDB instance.\n",
|
||||
"1. `database` : The name of the database to connect to on the AlloyDB instance.\n",
|
||||
"\n",
|
||||
"By default, [IAM database authentication](https://cloud.google.com/alloydb/docs/connect-iam) will be used as the method of database authentication. This library uses the IAM principal belonging to the [Application Default Credentials (ADC)](https://cloud.google.com/docs/authentication/application-default-credentials) sourced from the environment.\n",
|
||||
"\n",
|
||||
"Optionally, [built-in database authentication](https://cloud.google.com/alloydb/docs/database-users/about) using a username and password to access the AlloyDB database can also be used. Just provide the optional `user` and `password` arguments to `AlloyDBEngine.from_instance()`:\n",
|
||||
"\n",
|
||||
"* `user` : Database user to use for built-in database authentication and login\n",
|
||||
"* `password` : Database password to use for built-in database authentication and login.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Note**: This tutorial demonstrates the async interface. All async methods have corresponding sync methods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_alloydb_pg import AlloyDBEngine\n",
|
||||
"\n",
|
||||
"engine = await AlloyDBEngine.afrom_instance(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" region=REGION,\n",
|
||||
" cluster=CLUSTER,\n",
|
||||
" instance=INSTANCE,\n",
|
||||
" database=DATABASE,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "e1tl0aNx7SWy"
|
||||
},
|
||||
"source": [
|
||||
"### Create AlloyDBLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "z-AZyzAQ7bsf"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_alloydb_pg import AlloyDBLoader\n",
|
||||
"\n",
|
||||
"# Creating a basic AlloyDBLoader object\n",
|
||||
"loader = await AlloyDBLoader.create(engine, table_name=TABLE_NAME)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "PeOMpftjc9_e"
|
||||
},
|
||||
"source": [
|
||||
"### Load Documents via default table\n",
|
||||
"The loader returns a list of Documents from the table using the first column as page_content and all other columns as metadata. The default table will have the first column as\n",
|
||||
"page_content and the second column as metadata (JSON). Each row becomes a document."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "cwvi_O5Wc9_e"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = await loader.aload()\n",
|
||||
"print(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "kSkL9l1Hc9_e"
|
||||
},
|
||||
"source": [
|
||||
"### Load documents via custom table/metadata or custom page content columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = await AlloyDBLoader.create(\n",
|
||||
" engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
" content_columns=[\"product_name\"], # Optional\n",
|
||||
" metadata_columns=[\"id\"], # Optional\n",
|
||||
")\n",
|
||||
"docs = await loader.aload()\n",
|
||||
"print(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "5R6h0_Cvc9_f"
|
||||
},
|
||||
"source": [
|
||||
"### Set page content format\n",
|
||||
"The loader returns a list of Documents, with one document per row, with page content in specified string format, i.e. text (space separated concatenation), JSON, YAML, CSV, etc. JSON and YAML formats include headers, while text and CSV do not include field headers.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "NGNdS7cqc9_f"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = AlloyDBLoader.create(\n",
|
||||
" engine,\n",
|
||||
" table_name=\"products\",\n",
|
||||
" content_columns=[\"product_name\", \"description\"],\n",
|
||||
" format=\"YAML\",\n",
|
||||
")\n",
|
||||
"docs = await loader.aload()\n",
|
||||
"print(docs)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"toc_visible": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
469
docs/docs/integrations/document_loaders/google_bigtable.ipynb
Normal file
469
docs/docs/integrations/document_loaders/google_bigtable.ipynb
Normal file
@@ -0,0 +1,469 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Bigtable\n",
|
||||
"\n",
|
||||
"> [Bigtable](https://cloud.google.com/bigtable) is a key-value and wide-column store, ideal for fast access to structured, semi-structured, or unstructured data. Extend your database application to build AI-powered experiences leveraging Bigtable's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use [Bigtable](https://cloud.google.com/bigtable) to [save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/) with `BigtableLoader` and `BigtableSaver`.\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/googleapis/langchain-google-bigtable-python/blob/main/docs/document_loader.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
"* [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
"* [Create a Bigtable instance](https://cloud.google.com/bigtable/docs/creating-instance)\n",
|
||||
"* [Create a Bigtable table](https://cloud.google.com/bigtable/docs/managing-tables)\n",
|
||||
"* [Create Bigtable access credentials](https://developers.google.com/workspace/guides/create-credentials)\n",
|
||||
"\n",
|
||||
"After confirmed access to database in the runtime environment of this notebook, filling the following values and run the cell before running example scripts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please specify an instance and a table for demo purpose.\n",
|
||||
"INSTANCE_ID = \"my_instance\" # @param {type:\"string\"}\n",
|
||||
"TABLE_ID = \"my_table\" # @param {type:\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-bigtable` package, so we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -upgrade --quiet langchain-google-bigtable"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Colab only**: Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the value below with your Google Cloud project ID and then run the cell.\n",
|
||||
"\n",
|
||||
"PROJECT_ID = \"my-project-id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"!gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"- If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"- If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using the saver\n",
|
||||
"\n",
|
||||
"Save langchain documents with `BigtableSaver.add_documents(<documents>)`. To initialize `BigtableSaver` class you need to provide 2 things:\n",
|
||||
"\n",
|
||||
"1. `instance_id` - An instance of Bigtable.\n",
|
||||
"1. `table_id` - The name of the table within the Bigtable to store langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_google_bigtable import BigtableSaver\n",
|
||||
"\n",
|
||||
"test_docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Apple Granny Smith 150 0.99 1\",\n",
|
||||
" metadata={\"fruit_id\": 1},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Banana Cavendish 200 0.59 0\",\n",
|
||||
" metadata={\"fruit_id\": 2},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Orange Navel 80 1.29 1\",\n",
|
||||
" metadata={\"fruit_id\": 3},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"saver = BigtableSaver(\n",
|
||||
" instance_id=INSTANCE_ID,\n",
|
||||
" table_id=TABLE_ID,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"saver.add_documents(test_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Querying for Documents from Bigtable\n",
|
||||
"For more details on connecting to a Bigtable table, please check the [Python SDK documentation](https://cloud.google.com/python/docs/reference/bigtable/latest/client)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Load documents from table\n",
|
||||
"\n",
|
||||
"Load langchain documents with `BigtableLoader.load()` or `BigtableLoader.lazy_load()`. `lazy_load` returns a generator that only queries database during the iteration. To initialize `BigtableLoader` class you need to provide:\n",
|
||||
"\n",
|
||||
"1. `instance_id` - An instance of Bigtable.\n",
|
||||
"1. `table_id` - The name of the table within the Bigtable to store langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_bigtable import BigtableLoader\n",
|
||||
"\n",
|
||||
"loader = BigtableLoader(\n",
|
||||
" instance_id=INSTANCE_ID,\n",
|
||||
" table_id=TABLE_ID,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" print(doc)\n",
|
||||
" break"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents\n",
|
||||
"\n",
|
||||
"Delete a list of langchain documents from Bigtable table with `BigtableSaver.delete(<documents>)`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_bigtable import BigtableSaver\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete: \", docs)\n",
|
||||
"\n",
|
||||
"onedoc = test_docs[0]\n",
|
||||
"saver.delete([onedoc])\n",
|
||||
"print(\"Documents after delete: \", loader.load())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Limiting the returned rows\n",
|
||||
"There are two ways to limit the returned rows:\n",
|
||||
"\n",
|
||||
"1. Using a [filter](https://cloud.google.com/python/docs/reference/bigtable/latest/row-filters)\n",
|
||||
"2. Using a [row_set](https://cloud.google.com/python/docs/reference/bigtable/latest/row-set#google.cloud.bigtable.row_set.RowSet)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import google.cloud.bigtable.row_filters as row_filters\n",
|
||||
"\n",
|
||||
"filter_loader = BigtableLoader(\n",
|
||||
" INSTANCE_ID, TABLE_ID, filter=row_filters.ColumnQualifierRegexFilter(b\"os_build\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from google.cloud.bigtable.row_set import RowSet\n",
|
||||
"\n",
|
||||
"row_set = RowSet()\n",
|
||||
"row_set.add_row_range_from_keys(\n",
|
||||
" start_key=\"phone#4c410523#20190501\", end_key=\"phone#4c410523#201906201\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"row_set_loader = BigtableLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" TABLE_ID,\n",
|
||||
" row_set=row_set,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Custom client\n",
|
||||
"The client created by default is the default client, using only admin=True option. To use a non-default, a [custom client](https://cloud.google.com/python/docs/reference/bigtable/latest/client#class-googlecloudbigtableclientclientprojectnone-credentialsnone-readonlyfalse-adminfalse-clientinfonone-clientoptionsnone-adminclientoptionsnone-channelnone) can be passed to the constructor."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.cloud import bigtable\n",
|
||||
"\n",
|
||||
"custom_client_loader = BigtableLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" TABLE_ID,\n",
|
||||
" client=bigtable.Client(...),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Custom content\n",
|
||||
"The BigtableLoader assumes there is a column family called `langchain`, that has a column called `content`, that contains values encoded in UTF-8. These defaults can be changed like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_bigtable import Encoding\n",
|
||||
"\n",
|
||||
"custom_content_loader = BigtableLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" TABLE_ID,\n",
|
||||
" content_encoding=Encoding.ASCII,\n",
|
||||
" content_column_family=\"my_content_family\",\n",
|
||||
" content_column_name=\"my_content_column_name\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Metadata mapping\n",
|
||||
"By default, the `metadata` map on the `Document` object will contain a single key, `rowkey`, with the value of the row's rowkey value. To add more items to that map, use metadata_mapping."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"from langchain_google_bigtable import MetadataMapping\n",
|
||||
"\n",
|
||||
"metadata_mapping_loader = BigtableLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" TABLE_ID,\n",
|
||||
" metadata_mappings=[\n",
|
||||
" MetadataMapping(\n",
|
||||
" column_family=\"my_int_family\",\n",
|
||||
" column_name=\"my_int_column\",\n",
|
||||
" metadata_key=\"key_in_metadata_map\",\n",
|
||||
" encoding=Encoding.INT_BIG_ENDIAN,\n",
|
||||
" ),\n",
|
||||
" MetadataMapping(\n",
|
||||
" column_family=\"my_custom_family\",\n",
|
||||
" column_name=\"my_custom_column\",\n",
|
||||
" metadata_key=\"custom_key\",\n",
|
||||
" encoding=Encoding.CUSTOM,\n",
|
||||
" custom_decoding_func=lambda input: json.loads(input.decode()),\n",
|
||||
" custom_encoding_func=lambda input: str.encode(json.dumps(input)),\n",
|
||||
" ),\n",
|
||||
" ],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Metadata as JSON\n",
|
||||
"\n",
|
||||
"If there is a column in Bigtable that contains a JSON string that you would like to have added to the output document metadata, it is possible to add the following parameters to BigtableLoader. Note, the default value for `metadata_as_json_encoding` is UTF-8."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metadata_as_json_loader = BigtableLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" TABLE_ID,\n",
|
||||
" metadata_as_json_encoding=Encoding.ASCII,\n",
|
||||
" metadata_as_json_family=\"my_metadata_as_json_family\",\n",
|
||||
" metadata_as_json_name=\"my_metadata_as_json_column_name\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Customize BigtableSaver\n",
|
||||
"\n",
|
||||
"The BigtableSaver is also customizable similar to BigtableLoader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"saver = BigtableSaver(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" TABLE_ID,\n",
|
||||
" client=bigtable.Client(...),\n",
|
||||
" content_encoding=Encoding.ASCII,\n",
|
||||
" content_column_family=\"my_content_family\",\n",
|
||||
" content_column_name=\"my_content_column_name\",\n",
|
||||
" metadata_mappings=[\n",
|
||||
" MetadataMapping(\n",
|
||||
" column_family=\"my_int_family\",\n",
|
||||
" column_name=\"my_int_column\",\n",
|
||||
" metadata_key=\"key_in_metadata_map\",\n",
|
||||
" encoding=Encoding.INT_BIG_ENDIAN,\n",
|
||||
" ),\n",
|
||||
" MetadataMapping(\n",
|
||||
" column_family=\"my_custom_family\",\n",
|
||||
" column_name=\"my_custom_column\",\n",
|
||||
" metadata_key=\"custom_key\",\n",
|
||||
" encoding=Encoding.CUSTOM,\n",
|
||||
" custom_decoding_func=lambda input: json.loads(input.decode()),\n",
|
||||
" custom_encoding_func=lambda input: str.encode(json.dumps(input)),\n",
|
||||
" ),\n",
|
||||
" ],\n",
|
||||
" metadata_as_json_encoding=Encoding.ASCII,\n",
|
||||
" metadata_as_json_family=\"my_metadata_as_json_family\",\n",
|
||||
" metadata_as_json_name=\"my_metadata_as_json_column_name\",\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,629 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Cloud SQL for SQL Server\n",
|
||||
"\n",
|
||||
"> [Cloud SQL](https://cloud.google.com/sql) is a fully managed relational database service that offers high performance, seamless integration, and impressive scalability. It offers [MySQL](https://cloud.google.com/sql/mysql), [PostgreSQL](https://cloud.google.com/sql/postgres), and [SQL Server](https://cloud.google.com/sql/sqlserver) database engines. Extend your database application to build AI-powered experiences leveraging Cloud SQL's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use [Cloud SQL for SQL Server](https://cloud.google.com/sql/sqlserver) to [save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/) with `MSSQLLoader` and `MSSQLDocumentSaver`.\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/googleapis/langchain-google-cloud-sql-mssql-python/blob/main/docs/document_loader.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
"* [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
"* [Create a Cloud SQL for SQL Server instance](https://cloud.google.com/sql/docs/sqlserver/create-instance)\n",
|
||||
"* [Create a Cloud SQL database](https://cloud.google.com/sql/docs/mssql/create-manage-databases)\n",
|
||||
"* [Add an IAM database user to the database](https://cloud.google.com/sql/docs/sqlserver/add-manage-iam-users#creating-a-database-user) (Optional)\n",
|
||||
"\n",
|
||||
"After confirmed access to database in the runtime environment of this notebook, filling the following values and run the cell before running example scripts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the both the Google Cloud region and name of your Cloud SQL instance.\n",
|
||||
"REGION = \"us-central1\" # @param {type:\"string\"}\n",
|
||||
"INSTANCE = \"test-instance\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# @markdown Please fill in user name and password of your Cloud SQL instance.\n",
|
||||
"DB_USER = \"sqlserver\" # @param {type:\"string\"}\n",
|
||||
"DB_PASS = \"password\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# @markdown Please specify a database and a table for demo purpose.\n",
|
||||
"DATABASE = \"test\" # @param {type:\"string\"}\n",
|
||||
"TABLE_NAME = \"test-default\" # @param {type:\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-cloud-sql-mssql` package, so we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-google-cloud-sql-mssql"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Colab only**: Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"- If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"- If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the value below with your Google Cloud project ID and then run the cell.\n",
|
||||
"\n",
|
||||
"PROJECT_ID = \"my-project-id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"!gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 💡 API Enablement\n",
|
||||
"The `langchain-google-cloud-sql-mssql` package requires that you [enable the Cloud SQL Admin API](https://console.cloud.google.com/flows/enableapi?apiid=sqladmin.googleapis.com) in your Google Cloud Project."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# enable Cloud SQL Admin API\n",
|
||||
"!gcloud services enable sqladmin.googleapis.com"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### MSSQLEngine Connection Pool\n",
|
||||
"\n",
|
||||
"Before saving or loading documents from MSSQL table, we need first configures a connection pool to Cloud SQL database. The `MSSQLEngine` configures a [SQLAlchemy connection pool](https://docs.sqlalchemy.org/en/20/core/pooling.html#module-sqlalchemy.pool) to your Cloud SQL database, enabling successful connections from your application and following industry best practices.\n",
|
||||
"\n",
|
||||
"To create a `MSSQLEngine` using `MSSQLEngine.from_instance()` you need to provide only 6 things:\n",
|
||||
"\n",
|
||||
"1. `project_id` : Project ID of the Google Cloud Project where the Cloud SQL instance is located.\n",
|
||||
"1. `region` : Region where the Cloud SQL instance is located.\n",
|
||||
"1. `instance` : The name of the Cloud SQL instance.\n",
|
||||
"1. `database` : The name of the database to connect to on the Cloud SQL instance.\n",
|
||||
"1. `user` : Database user to use for built-in database authentication and login.\n",
|
||||
"1. `password` : Database password to use for built-in database authentication and login."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mssql import MSSQLEngine\n",
|
||||
"\n",
|
||||
"engine = MSSQLEngine.from_instance(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" region=REGION,\n",
|
||||
" instance=INSTANCE,\n",
|
||||
" database=DATABASE,\n",
|
||||
" user=DB_USER,\n",
|
||||
" password=DB_PASS,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialize a table\n",
|
||||
"\n",
|
||||
"Initialize a table of default schema via `MSSQLEngine.init_document_table(<table_name>)`. Table Columns:\n",
|
||||
"- page_content (type: text)\n",
|
||||
"- langchain_metadata (type: JSON)\n",
|
||||
"\n",
|
||||
"`overwrite_existing=True` flag means the newly initialized table will replace any existing table of the same name."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"engine.init_document_table(TABLE_NAME, overwrite_existing=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save documents\n",
|
||||
"\n",
|
||||
"Save langchain documents with `MSSQLDocumentSaver.add_documents(<documents>)`. To initialize `MSSQLDocumentSaver` class you need to provide 2 things:\n",
|
||||
"1. `engine` - An instance of a `MSSQLEngine` engine.\n",
|
||||
"2. `table_name` - The name of the table within the Cloud SQL database to store langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_google_cloud_sql_mssql import MSSQLDocumentSaver\n",
|
||||
"\n",
|
||||
"test_docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Apple Granny Smith 150 0.99 1\",\n",
|
||||
" metadata={\"fruit_id\": 1},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Banana Cavendish 200 0.59 0\",\n",
|
||||
" metadata={\"fruit_id\": 2},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Orange Navel 80 1.29 1\",\n",
|
||||
" metadata={\"fruit_id\": 3},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"saver = MSSQLDocumentSaver(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"saver.add_documents(test_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load langchain documents with `MSSQLLoader.load()` or `MSSQLLoader.lazy_load()`. `lazy_load` returns a generator that only queries database during the iteration. To initialize `MSSQLDocumentSaver` class you need to provide:\n",
|
||||
"1. `engine` - An instance of a `MSSQLEngine` engine.\n",
|
||||
"2. `table_name` - The name of the table within the Cloud SQL database to store langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mssql import MSSQLLoader\n",
|
||||
"\n",
|
||||
"loader = MSSQLLoader(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"docs = loader.lazy_load()\n",
|
||||
"for doc in docs:\n",
|
||||
" print(\"Loaded documents:\", doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents via query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Other than loading documents from a table, we can also choose to load documents from a view generated from a SQL query. For example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mssql import MSSQLLoader\n",
|
||||
"\n",
|
||||
"loader = MSSQLLoader(\n",
|
||||
" engine=engine,\n",
|
||||
" query=f\"select * from \\\"{TABLE_NAME}\\\" where JSON_VALUE(langchain_metadata, '$.fruit_id') = 1;\",\n",
|
||||
")\n",
|
||||
"onedoc = loader.load()\n",
|
||||
"onedoc"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The view generated from SQL query can have different schema than default table. In such cases, the behavior of MSSQLLoader is the same as loading from table with non-default schema. Please refer to section [Load documents with customized document page content & metadata](#Load-documents-with-customized-document-page-content-&-metadata)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Delete a list of langchain documents from MSSQL table with `MSSQLDocumentSaver.delete(<documents>)`.\n",
|
||||
"\n",
|
||||
"For table with default schema (page_content, langchain_metadata), the deletion criteria is:\n",
|
||||
"\n",
|
||||
"A `row` should be deleted if there exists a `document` in the list, such that\n",
|
||||
"- `document.page_content` equals `row[page_content]`\n",
|
||||
"- `document.metadata` equals `row[langchain_metadata]`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mssql import MSSQLLoader\n",
|
||||
"\n",
|
||||
"loader = MSSQLLoader(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete:\", docs)\n",
|
||||
"saver.delete(onedoc)\n",
|
||||
"print(\"Documents after delete:\", loader.load())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents with customized document page content & metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First we prepare an example table with non-default schema, and populate it with some arbitary data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sqlalchemy\n",
|
||||
"\n",
|
||||
"with engine.connect() as conn:\n",
|
||||
" conn.execute(sqlalchemy.text(f'DROP TABLE IF EXISTS \"{TABLE_NAME}\"'))\n",
|
||||
" conn.commit()\n",
|
||||
" conn.execute(\n",
|
||||
" sqlalchemy.text(\n",
|
||||
" f\"\"\"\n",
|
||||
" IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[{TABLE_NAME}]') AND type in (N'U'))\n",
|
||||
" BEGIN\n",
|
||||
" CREATE TABLE [dbo].[{TABLE_NAME}](\n",
|
||||
" fruit_id INT IDENTITY(1,1) PRIMARY KEY,\n",
|
||||
" fruit_name VARCHAR(100) NOT NULL,\n",
|
||||
" variety VARCHAR(50),\n",
|
||||
" quantity_in_stock INT NOT NULL,\n",
|
||||
" price_per_unit DECIMAL(6,2) NOT NULL,\n",
|
||||
" organic BIT NOT NULL\n",
|
||||
" )\n",
|
||||
" END\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" conn.execute(\n",
|
||||
" sqlalchemy.text(\n",
|
||||
" f\"\"\"\n",
|
||||
" INSERT INTO \"{TABLE_NAME}\" (fruit_name, variety, quantity_in_stock, price_per_unit, organic)\n",
|
||||
" VALUES\n",
|
||||
" ('Apple', 'Granny Smith', 150, 0.99, 1),\n",
|
||||
" ('Banana', 'Cavendish', 200, 0.59, 0),\n",
|
||||
" ('Orange', 'Navel', 80, 1.29, 1);\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" conn.commit()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If we still load langchain documents with default parameters of `MSSQLLoader` from this example table, the `page_content` of loaded documents will be the first column of the table, and `metadata` will be consisting of key-value pairs of all the other columns."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = MSSQLLoader(\n",
|
||||
" engine=engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
")\n",
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can specify the content and metadata we want to load by setting the `content_columns` and `metadata_columns` when initializing the `MSSQLLoader`.\n",
|
||||
"1. `content_columns`: The columns to write into the `page_content` of the document.\n",
|
||||
"2. `metadata_columns`: The columns to write into the `metadata` of the document.\n",
|
||||
"\n",
|
||||
"For example here, the values of columns in `content_columns` will be joined together into a space-separated string, as `page_content` of loaded documents, and `metadata` of loaded documents will only contain key-value pairs of columns specified in `metadata_columns`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = MSSQLLoader(\n",
|
||||
" engine=engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
" content_columns=[\n",
|
||||
" \"variety\",\n",
|
||||
" \"quantity_in_stock\",\n",
|
||||
" \"price_per_unit\",\n",
|
||||
" \"organic\",\n",
|
||||
" ],\n",
|
||||
" metadata_columns=[\"fruit_id\", \"fruit_name\"],\n",
|
||||
")\n",
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save document with customized page content & metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In order to save langchain document into table with customized metadata fields. We need first create such a table via `MSSQLEngine.init_document_table()`, and specify the list of `metadata_columns` we want it to have. In this example, the created table will have table columns:\n",
|
||||
"- description (type: text): for storing fruit description.\n",
|
||||
"- fruit_name (type text): for storing fruit name.\n",
|
||||
"- organic (type tinyint(1)): to tell if the fruit is organic.\n",
|
||||
"- other_metadata (type: JSON): for storing other metadata information of the fruit.\n",
|
||||
"\n",
|
||||
"We can use the following parameters with `MSSQLEngine.init_document_table()` to create the table:\n",
|
||||
"1. `table_name`: The name of the table within the Cloud SQL database to store langchain documents.\n",
|
||||
"2. `metadata_columns`: A list of `sqlalchemy.Column` indicating the list of metadata columns we need.\n",
|
||||
"3. `content_column`: The name of column to store `page_content` of langchain document. Default: `page_content`.\n",
|
||||
"4. `metadata_json_column`: The name of JSON column to store extra `metadata` of langchain document. Default: `langchain_metadata`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"engine.init_document_table(\n",
|
||||
" TABLE_NAME,\n",
|
||||
" metadata_columns=[\n",
|
||||
" sqlalchemy.Column(\n",
|
||||
" \"fruit_name\",\n",
|
||||
" sqlalchemy.UnicodeText,\n",
|
||||
" primary_key=False,\n",
|
||||
" nullable=True,\n",
|
||||
" ),\n",
|
||||
" sqlalchemy.Column(\n",
|
||||
" \"organic\",\n",
|
||||
" sqlalchemy.Boolean,\n",
|
||||
" primary_key=False,\n",
|
||||
" nullable=True,\n",
|
||||
" ),\n",
|
||||
" ],\n",
|
||||
" content_column=\"description\",\n",
|
||||
" metadata_json_column=\"other_metadata\",\n",
|
||||
" overwrite_existing=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Save documents with `MSSQLDocumentSaver.add_documents(<documents>)`. As you can see in this example, \n",
|
||||
"- `document.page_content` will be saved into `description` column.\n",
|
||||
"- `document.metadata.fruit_name` will be saved into `fruit_name` column.\n",
|
||||
"- `document.metadata.organic` will be saved into `organic` column.\n",
|
||||
"- `document.metadata.fruit_id` will be saved into `other_metadata` column in JSON format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Granny Smith 150 0.99\",\n",
|
||||
" metadata={\"fruit_id\": 1, \"fruit_name\": \"Apple\", \"organic\": 1},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"saver = MSSQLDocumentSaver(\n",
|
||||
" engine=engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
" content_column=\"description\",\n",
|
||||
" metadata_json_column=\"other_metadata\",\n",
|
||||
")\n",
|
||||
"saver.add_documents(test_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with engine.connect() as conn:\n",
|
||||
" result = conn.execute(sqlalchemy.text(f'select * from \"{TABLE_NAME}\";'))\n",
|
||||
" print(result.keys())\n",
|
||||
" print(result.fetchall())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents with customized page content & metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also delete documents from table with customized metadata columns via `MSSQLDocumentSaver.delete(<documents>)`. The deletion criteria is:\n",
|
||||
"\n",
|
||||
"A `row` should be deleted if there exists a `document` in the list, such that\n",
|
||||
"- `document.page_content` equals `row[page_content]`\n",
|
||||
"- For every metadata field `k` in `document.metadata`\n",
|
||||
" - `document.metadata[k]` equals `row[k]` or `document.metadata[k]` equals `row[langchain_metadata][k]`\n",
|
||||
"- There no extra metadata field presents in `row` but not in `document.metadata`.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = MSSQLLoader(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete:\", docs)\n",
|
||||
"saver.delete(docs)\n",
|
||||
"print(\"Documents after delete:\", loader.load())"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,642 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Cloud SQL for MySQL\n",
|
||||
"\n",
|
||||
"> [Cloud SQL](https://cloud.google.com/sql) is a fully managed relational database service that offers high performance, seamless integration, and impressive scalability. It offers [MySQL](https://cloud.google.com/sql/mysql), [PostgreSQL](https://cloud.google.com/sql/postgres), and [SQL Server](https://cloud.google.com/sql/sqlserver) database engines. Extend your database application to build AI-powered experiences leveraging Cloud SQL's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use [Cloud SQL for MySQL](https://cloud.google.com/sql/mysql) to [save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/) with `MySQLLoader` and `MySQLDocumentSaver`.\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/googleapis/langchain-google-cloud-sql-mysql-python/blob/main/docs/document_loader.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
"* [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
"* [Create a Cloud SQL for MySQL instance](https://cloud.google.com/sql/docs/mysql/create-instance)\n",
|
||||
"* [Create a Cloud SQL database](https://cloud.google.com/sql/docs/mysql/create-manage-databases)\n",
|
||||
"* [Add an IAM database user to the database](https://cloud.google.com/sql/docs/mysql/add-manage-iam-users#creating-a-database-user) (Optional)\n",
|
||||
"\n",
|
||||
"After confirmed access to database in the runtime environment of this notebook, filling the following values and run the cell before running example scripts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the both the Google Cloud region and name of your Cloud SQL instance.\n",
|
||||
"REGION = \"us-central1\" # @param {type:\"string\"}\n",
|
||||
"INSTANCE = \"test-instance\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# @markdown Please specify a database and a table for demo purpose.\n",
|
||||
"DATABASE = \"test\" # @param {type:\"string\"}\n",
|
||||
"TABLE_NAME = \"test-default\" # @param {type:\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-cloud-sql-mysql` package, so we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -upgrade --quiet langchain-google-cloud-sql-mysql"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Colab only**: Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the value below with your Google Cloud project ID and then run the cell.\n",
|
||||
"\n",
|
||||
"PROJECT_ID = \"my-project-id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"!gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"- If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"- If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### API Enablement\n",
|
||||
"The `langchain-google-cloud-sql-mysql` package requires that you [enable the Cloud SQL Admin API](https://console.cloud.google.com/flows/enableapi?apiid=sqladmin.googleapis.com) in your Google Cloud Project."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# enable Cloud SQL Admin API\n",
|
||||
"!gcloud services enable sqladmin.googleapis.com"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### MySQLEngine Connection Pool\n",
|
||||
"\n",
|
||||
"Before saving or loading documents from MySQL table, we need first configures a connection pool to Cloud SQL database. The `MySQLEngine` configures a connection pool to your Cloud SQL database, enabling successful connections from your application and following industry best practices.\n",
|
||||
"\n",
|
||||
"To create a `MySQLEngine` using `MySQLEngine.from_instance()` you need to provide only 4 things:\n",
|
||||
"\n",
|
||||
"1. `project_id` : Project ID of the Google Cloud Project where the Cloud SQL instance is located.\n",
|
||||
"2. `region` : Region where the Cloud SQL instance is located.\n",
|
||||
"3. `instance` : The name of the Cloud SQL instance.\n",
|
||||
"4. `database` : The name of the database to connect to on the Cloud SQL instance.\n",
|
||||
"\n",
|
||||
"By default, [IAM database authentication](https://cloud.google.com/sql/docs/mysql/iam-authentication#iam-db-auth) will be used as the method of database authentication. This library uses the IAM principal belonging to the [Application Default Credentials (ADC)](https://cloud.google.com/docs/authentication/application-default-credentials) sourced from the envionment.\n",
|
||||
"\n",
|
||||
"For more informatin on IAM database authentication please see:\n",
|
||||
"\n",
|
||||
"* [Configure an instance for IAM database authentication](https://cloud.google.com/sql/docs/mysql/create-edit-iam-instances)\n",
|
||||
"* [Manage users with IAM database authentication](https://cloud.google.com/sql/docs/mysql/add-manage-iam-users)\n",
|
||||
"\n",
|
||||
"Optionally, [built-in database authentication](https://cloud.google.com/sql/docs/mysql/built-in-authentication) using a username and password to access the Cloud SQL database can also be used. Just provide the optional `user` and `password` arguments to `MySQLEngine.from_instance()`:\n",
|
||||
"\n",
|
||||
"* `user` : Database user to use for built-in database authentication and login\n",
|
||||
"* `password` : Database password to use for built-in database authentication and login."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mysql import MySQLEngine\n",
|
||||
"\n",
|
||||
"engine = MySQLEngine.from_instance(\n",
|
||||
" project_id=PROJECT_ID, region=REGION, instance=INSTANCE, database=DATABASE\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialize a table\n",
|
||||
"\n",
|
||||
"Initialize a table of default schema via `MySQLEngine.init_document_table(<table_name>)`. Table Columns:\n",
|
||||
"\n",
|
||||
"- page_content (type: text)\n",
|
||||
"- langchain_metadata (type: JSON)\n",
|
||||
"\n",
|
||||
"`overwrite_existing=True` flag means the newly initialized table will replace any existing table of the same name."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"engine.init_document_table(TABLE_NAME, overwrite_existing=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save documents\n",
|
||||
"\n",
|
||||
"Save langchain documents with `MySQLDocumentSaver.add_documents(<documents>)`. To initialize `MySQLDocumentSaver` class you need to provide 2 things:\n",
|
||||
"\n",
|
||||
"1. `engine` - An instance of a `MySQLEngine` engine.\n",
|
||||
"2. `table_name` - The name of the table within the Cloud SQL database to store langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_google_cloud_sql_mysql import MySQLDocumentSaver\n",
|
||||
"\n",
|
||||
"test_docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Apple Granny Smith 150 0.99 1\",\n",
|
||||
" metadata={\"fruit_id\": 1},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Banana Cavendish 200 0.59 0\",\n",
|
||||
" metadata={\"fruit_id\": 2},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Orange Navel 80 1.29 1\",\n",
|
||||
" metadata={\"fruit_id\": 3},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"saver = MySQLDocumentSaver(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"saver.add_documents(test_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load langchain documents with `MySQLLoader.load()` or `MySQLLoader.lazy_load()`. `lazy_load` returns a generator that only queries database during the iteration. To initialize `MySQLLoader` class you need to provide:\n",
|
||||
"\n",
|
||||
"1. `engine` - An instance of a `MySQLEngine` engine.\n",
|
||||
"2. `table_name` - The name of the table within the Cloud SQL database to store langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mysql import MySQLLoader\n",
|
||||
"\n",
|
||||
"loader = MySQLLoader(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"docs = loader.lazy_load()\n",
|
||||
"for doc in docs:\n",
|
||||
" print(\"Loaded documents:\", doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents via query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Other than loading documents from a table, we can also choose to load documents from a view generated from a SQL query. For example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mysql import MySQLLoader\n",
|
||||
"\n",
|
||||
"loader = MySQLLoader(\n",
|
||||
" engine=engine,\n",
|
||||
" query=f\"select * from `{TABLE_NAME}` where JSON_EXTRACT(langchain_metadata, '$.fruit_id') = 1;\",\n",
|
||||
")\n",
|
||||
"onedoc = loader.load()\n",
|
||||
"onedoc"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The view generated from SQL query can have different schema than default table. In such cases, the behavior of MySQLLoader is the same as loading from table with non-default schema. Please refer to section [Load documents with customized document page content & metadata](#Load-documents-with-customized-document-page-content-&-metadata)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Delete a list of langchain documents from MySQL table with `MySQLDocumentSaver.delete(<documents>)`.\n",
|
||||
"\n",
|
||||
"For table with default schema (page_content, langchain_metadata), the deletion criteria is:\n",
|
||||
"\n",
|
||||
"A `row` should be deleted if there exists a `document` in the list, such that\n",
|
||||
"\n",
|
||||
"- `document.page_content` equals `row[page_content]`\n",
|
||||
"- `document.metadata` equals `row[langchain_metadata]`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mysql import MySQLLoader\n",
|
||||
"\n",
|
||||
"loader = MySQLLoader(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete:\", docs)\n",
|
||||
"saver.delete(onedoc)\n",
|
||||
"print(\"Documents after delete:\", loader.load())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents with customized document page content & metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First we prepare an example table with non-default schema, and populate it with some arbitary data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sqlalchemy\n",
|
||||
"\n",
|
||||
"with engine.connect() as conn:\n",
|
||||
" conn.execute(sqlalchemy.text(f\"DROP TABLE IF EXISTS `{TABLE_NAME}`\"))\n",
|
||||
" conn.commit()\n",
|
||||
" conn.execute(\n",
|
||||
" sqlalchemy.text(\n",
|
||||
" f\"\"\"\n",
|
||||
" CREATE TABLE IF NOT EXISTS `{TABLE_NAME}`(\n",
|
||||
" fruit_id INT AUTO_INCREMENT PRIMARY KEY,\n",
|
||||
" fruit_name VARCHAR(100) NOT NULL,\n",
|
||||
" variety VARCHAR(50),\n",
|
||||
" quantity_in_stock INT NOT NULL,\n",
|
||||
" price_per_unit DECIMAL(6,2) NOT NULL,\n",
|
||||
" organic TINYINT(1) NOT NULL\n",
|
||||
" )\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" conn.execute(\n",
|
||||
" sqlalchemy.text(\n",
|
||||
" f\"\"\"\n",
|
||||
" INSERT INTO `{TABLE_NAME}` (fruit_name, variety, quantity_in_stock, price_per_unit, organic)\n",
|
||||
" VALUES\n",
|
||||
" ('Apple', 'Granny Smith', 150, 0.99, 1),\n",
|
||||
" ('Banana', 'Cavendish', 200, 0.59, 0),\n",
|
||||
" ('Orange', 'Navel', 80, 1.29, 1);\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" conn.commit()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If we still load langchain documents with default parameters of `MySQLLoader` from this example table, the `page_content` of loaded documents will be the first column of the table, and `metadata` will be consisting of key-value pairs of all the other columns."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = MySQLLoader(\n",
|
||||
" engine=engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
")\n",
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can specify the content and metadata we want to load by setting the `content_columns` and `metadata_columns` when initializing the `MySQLLoader`.\n",
|
||||
"\n",
|
||||
"1. `content_columns`: The columns to write into the `page_content` of the document.\n",
|
||||
"2. `metadata_columns`: The columns to write into the `metadata` of the document.\n",
|
||||
"\n",
|
||||
"For example here, the values of columns in `content_columns` will be joined together into a space-separated string, as `page_content` of loaded documents, and `metadata` of loaded documents will only contain key-value pairs of columns specified in `metadata_columns`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = MySQLLoader(\n",
|
||||
" engine=engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
" content_columns=[\n",
|
||||
" \"variety\",\n",
|
||||
" \"quantity_in_stock\",\n",
|
||||
" \"price_per_unit\",\n",
|
||||
" \"organic\",\n",
|
||||
" ],\n",
|
||||
" metadata_columns=[\"fruit_id\", \"fruit_name\"],\n",
|
||||
")\n",
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save document with customized page content & metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In order to save langchain document into table with customized metadata fields. We need first create such a table via `MySQLEngine.init_document_table()`, and specify the list of `metadata_columns` we want it to have. In this example, the created table will have table columns:\n",
|
||||
"\n",
|
||||
"- description (type: text): for storing fruit description.\n",
|
||||
"- fruit_name (type text): for storing fruit name.\n",
|
||||
"- organic (type tinyint(1)): to tell if the fruit is organic.\n",
|
||||
"- other_metadata (type: JSON): for storing other metadata information of the fruit.\n",
|
||||
"\n",
|
||||
"We can use the following parameters with `MySQLEngine.init_document_table()` to create the table:\n",
|
||||
"\n",
|
||||
"1. `table_name`: The name of the table within the Cloud SQL database to store langchain documents.\n",
|
||||
"2. `metadata_columns`: A list of `sqlalchemy.Column` indicating the list of metadata columns we need.\n",
|
||||
"3. `content_column`: The name of column to store `page_content` of langchain document. Default: `page_content`.\n",
|
||||
"4. `metadata_json_column`: The name of JSON column to store extra `metadata` of langchain document. Default: `langchain_metadata`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"engine.init_document_table(\n",
|
||||
" TABLE_NAME,\n",
|
||||
" metadata_columns=[\n",
|
||||
" sqlalchemy.Column(\n",
|
||||
" \"fruit_name\",\n",
|
||||
" sqlalchemy.UnicodeText,\n",
|
||||
" primary_key=False,\n",
|
||||
" nullable=True,\n",
|
||||
" ),\n",
|
||||
" sqlalchemy.Column(\n",
|
||||
" \"organic\",\n",
|
||||
" sqlalchemy.Boolean,\n",
|
||||
" primary_key=False,\n",
|
||||
" nullable=True,\n",
|
||||
" ),\n",
|
||||
" ],\n",
|
||||
" content_column=\"description\",\n",
|
||||
" metadata_json_column=\"other_metadata\",\n",
|
||||
" overwrite_existing=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Save documents with `MySQLDocumentSaver.add_documents(<documents>)`. As you can see in this example, \n",
|
||||
"\n",
|
||||
"- `document.page_content` will be saved into `description` column.\n",
|
||||
"- `document.metadata.fruit_name` will be saved into `fruit_name` column.\n",
|
||||
"- `document.metadata.organic` will be saved into `organic` column.\n",
|
||||
"- `document.metadata.fruit_id` will be saved into `other_metadata` column in JSON format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Granny Smith 150 0.99\",\n",
|
||||
" metadata={\"fruit_id\": 1, \"fruit_name\": \"Apple\", \"organic\": 1},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"saver = MySQLDocumentSaver(\n",
|
||||
" engine=engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
" content_column=\"description\",\n",
|
||||
" metadata_json_column=\"other_metadata\",\n",
|
||||
")\n",
|
||||
"saver.add_documents(test_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with engine.connect() as conn:\n",
|
||||
" result = conn.execute(sqlalchemy.text(f\"select * from `{TABLE_NAME}`;\"))\n",
|
||||
" print(result.keys())\n",
|
||||
" print(result.fetchall())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents with customized page content & metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also delete documents from table with customized metadata columns via `MySQLDocumentSaver.delete(<documents>)`. The deletion criteria is:\n",
|
||||
"\n",
|
||||
"A `row` should be deleted if there exists a `document` in the list, such that\n",
|
||||
"\n",
|
||||
"- `document.page_content` equals `row[page_content]`\n",
|
||||
"- For every metadata field `k` in `document.metadata`\n",
|
||||
" - `document.metadata[k]` equals `row[k]` or `document.metadata[k]` equals `row[langchain_metadata][k]`\n",
|
||||
"- There no extra metadata field presents in `row` but not in `document.metadata`.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = MySQLLoader(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete:\", docs)\n",
|
||||
"saver.delete(docs)\n",
|
||||
"print(\"Documents after delete:\", loader.load())"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,382 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "E_RJy7C1bpCT"
|
||||
},
|
||||
"source": [
|
||||
"# Google Cloud SQL for PostgreSQL\n",
|
||||
"\n",
|
||||
"> [Cloud SQL for PostgreSQL](https://cloud.google.com/sql/docs/postgres) is a fully-managed database service that helps you set up, maintain, manage, and administer your PostgreSQL relational databases on Google Cloud Platform. Extend your database application to build AI-powered experiences leveraging Cloud SQL for PostgreSQL's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use `Cloud SQL for PostgreSQL` to load Documents with the `PostgreSQLLoader` class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "xjcxaw6--Xyy"
|
||||
},
|
||||
"source": [
|
||||
"## Before you begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
" * [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
" * [Enable the Cloud SQL Admin API.](https://console.cloud.google.com/marketplace/product/google/sqladmin.googleapis.com)\n",
|
||||
" * [Create a Cloud SQL for PostgreSQL instance.](https://cloud.google.com/sql/docs/postgres/create-instance)\n",
|
||||
" * [Create a Cloud SQL for PostgreSQL database.](https://cloud.google.com/sql/docs/postgres/create-manage-databases)\n",
|
||||
" * [Add a User to the database.](https://cloud.google.com/sql/docs/postgres/create-manage-users)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "IR54BmgvdHT_"
|
||||
},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"Install the integration library, `langchain-google-cloud-sql-pg`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 1000
|
||||
},
|
||||
"id": "0ZITIDE160OD",
|
||||
"outputId": "90e0636e-ff34-4e1e-ad37-d2a6db4a317e"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-google-cloud-sql-pg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "v40bB_GMcr9f"
|
||||
},
|
||||
"source": [
|
||||
"**Colab only:** Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "6o0iGVIdDD6K"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "cTXTbj4UltKf"
|
||||
},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"* If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"* If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Uj02bMRAc9_c"
|
||||
},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "wnp1R1PYc9_c",
|
||||
"outputId": "6502c721-a2fd-451f-b946-9f7b850d5966"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @title Project { display-mode: \"form\" }\n",
|
||||
"PROJECT_ID = \"gcp_project_id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"! gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "rEWWNoNnKOgq",
|
||||
"metadata": {
|
||||
"id": "rEWWNoNnKOgq"
|
||||
},
|
||||
"source": [
|
||||
"### 💡 API Enablement\n",
|
||||
"The `langchain_google_cloud_sql_pg` package requires that you [enable the Cloud SQL Admin API](https://console.cloud.google.com/flows/enableapi?apiid=sqladmin.googleapis.com) in your Google Cloud Project."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "5utKIdq7KYi5",
|
||||
"metadata": {
|
||||
"id": "5utKIdq7KYi5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# enable Cloud SQL Admin API\n",
|
||||
"!gcloud services enable sqladmin.googleapis.com"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f8f2830ee9ca1e01",
|
||||
"metadata": {
|
||||
"id": "f8f2830ee9ca1e01"
|
||||
},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "OMvzMWRrR6n7",
|
||||
"metadata": {
|
||||
"id": "OMvzMWRrR6n7"
|
||||
},
|
||||
"source": [
|
||||
"### Set Cloud SQL database values\n",
|
||||
"Find your database variables, in the [Cloud SQL Instances page](https://console.cloud.google.com/sql/instances)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "irl7eMFnSPZr",
|
||||
"metadata": {
|
||||
"id": "irl7eMFnSPZr"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @title Set Your Values Here { display-mode: \"form\" }\n",
|
||||
"REGION = \"us-central1\" # @param {type: \"string\"}\n",
|
||||
"INSTANCE = \"my-primary\" # @param {type: \"string\"}\n",
|
||||
"DATABASE = \"my-database\" # @param {type: \"string\"}\n",
|
||||
"TABLE_NAME = \"vector_store\" # @param {type: \"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "QuQigs4UoFQ2",
|
||||
"metadata": {
|
||||
"id": "QuQigs4UoFQ2"
|
||||
},
|
||||
"source": [
|
||||
"### Cloud SQL Engine\n",
|
||||
"\n",
|
||||
"One of the requirements and arguments to establish PostgreSQL as a document loader is a `PostgresEngine` object. The `PostgresEngine` configures a connection pool to your Cloud SQL for PostgreSQL database, enabling successful connections from your application and following industry best practices.\n",
|
||||
"\n",
|
||||
"To create a `PostgresEngine` using `PostgresEngine.from_instance()` you need to provide only 4 things:\n",
|
||||
"\n",
|
||||
"1. `project_id` : Project ID of the Google Cloud Project where the Cloud SQL instance is located.\n",
|
||||
"1. `region` : Region where the Cloud SQL instance is located.\n",
|
||||
"1. `instance` : The name of the Cloud SQL instance.\n",
|
||||
"1. `database` : The name of the database to connect to on the Cloud SQL instance.\n",
|
||||
"\n",
|
||||
"By default, [IAM database authentication](https://cloud.google.com/sql/docs/postgres/iam-authentication) will be used as the method of database authentication. This library uses the IAM principal belonging to the [Application Default Credentials (ADC)](https://cloud.google.com/docs/authentication/application-default-credentials) sourced from the environment.\n",
|
||||
"\n",
|
||||
"Optionally, [built-in database authentication](https://cloud.google.com/sql/docs/postgres/users) using a username and password to access the Cloud SQL database can also be used. Just provide the optional `user` and `password` arguments to `PostgresEngine.from_instance()`:\n",
|
||||
"\n",
|
||||
"* `user` : Database user to use for built-in database authentication and login\n",
|
||||
"* `password` : Database password to use for built-in database authentication and login.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Note**: This tutorial demonstrates the async interface. All async methods have corresponding sync methods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_pg import PostgresEngine\n",
|
||||
"\n",
|
||||
"engine = await PostgresEngine.afrom_instance(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" region=REGION,\n",
|
||||
" instance=INSTANCE,\n",
|
||||
" database=DATABASE,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "e1tl0aNx7SWy"
|
||||
},
|
||||
"source": [
|
||||
"### Create PostgresLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "z-AZyzAQ7bsf"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_pg import PostgresLoader\n",
|
||||
"\n",
|
||||
"# Creating a basic PostgreSQL object\n",
|
||||
"loader = await PostgresLoader.create(engine, table_name=TABLE_NAME)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "PeOMpftjc9_e"
|
||||
},
|
||||
"source": [
|
||||
"### Load Documents via default table\n",
|
||||
"The loader returns a list of Documents from the table using the first column as page_content and all other columns as metadata. The default table will have the first column as\n",
|
||||
"page_content and the second column as metadata (JSON). Each row becomes a document. Please note that if you want your documents to have ids you will need to add them in."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "cwvi_O5Wc9_e"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_pg import PostgresLoader\n",
|
||||
"\n",
|
||||
"# Creating a basic PostgresLoader object\n",
|
||||
"loader = await PostgresLoader.create(engine, table_name=TABLE_NAME)\n",
|
||||
"\n",
|
||||
"docs = await loader.aload()\n",
|
||||
"print(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "kSkL9l1Hc9_e"
|
||||
},
|
||||
"source": [
|
||||
"### Load documents via custom table/metadata or custom page content columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = await PostgresLoader.create(\n",
|
||||
" engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
" content_columns=[\"product_name\"], # Optional\n",
|
||||
" metadata_columns=[\"id\"], # Optional\n",
|
||||
")\n",
|
||||
"docs = await loader.aload()\n",
|
||||
"print(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "5R6h0_Cvc9_f"
|
||||
},
|
||||
"source": [
|
||||
"### Set page content format\n",
|
||||
"The loader returns a list of Documents, with one document per row, with page content in specified string format, i.e. text (space separated concatenation), JSON, YAML, CSV, etc. JSON and YAML formats include headers, while text and CSV do not include field headers.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "NGNdS7cqc9_f"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = await PostgresLoader.create(\n",
|
||||
" engine,\n",
|
||||
" table_name=\"products\",\n",
|
||||
" content_columns=[\"product_name\", \"description\"],\n",
|
||||
" format=\"YAML\",\n",
|
||||
")\n",
|
||||
"docs = await loader.aload()\n",
|
||||
"print(docs)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"toc_visible": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
411
docs/docs/integrations/document_loaders/google_datastore.ipynb
Normal file
411
docs/docs/integrations/document_loaders/google_datastore.ipynb
Normal file
@@ -0,0 +1,411 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Firestore in Datastore mode\n",
|
||||
"\n",
|
||||
"> [Firestore in Datastore mode](https://cloud.google.com/datastore) is a serverless document-oriented database that scales to meet any demand. Extend your database application to build AI-powered experiences leveraging Datastore's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use [Firestore in Datastore mode](https://cloud.google.com/datastore) to [save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/) with `DatastoreLoader` and `DatastoreSaver`.\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/googleapis/langchain-google-datastore-python/blob/main/docs/document_loader.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
"* [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
"* [Create a Datastore database](https://cloud.google.com/datastore/docs/manage-databases)\n",
|
||||
"\n",
|
||||
"After confirmed access to database in the runtime environment of this notebook, filling the following values and run the cell before running example scripts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please specify a source for demo purpose.\n",
|
||||
"SOURCE = \"test\" # @param {type:\"Query\"|\"CollectionGroup\"|\"DocumentReference\"|\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-datastore` package, so we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -upgrade --quiet langchain-google-datastore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Colab only**: Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the value below with your Google Cloud project ID and then run the cell.\n",
|
||||
"\n",
|
||||
"PROJECT_ID = \"my-project-id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"!gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"- If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"- If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### API Enablement\n",
|
||||
"The `langchain-google-datastore` package requires that you [enable the Datastore API](https://console.cloud.google.com/flows/enableapi?apiid=datastore.googleapis.com) in your Google Cloud Project."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# enable Datastore API\n",
|
||||
"!gcloud services enable datastore.googleapis.com"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save documents\n",
|
||||
"\n",
|
||||
"`DatastoreSaver` can store Documents into Datastore. By default it will try to extract the Document reference from the metadata\n",
|
||||
"\n",
|
||||
"Save langchain documents with `DatastoreSaver.upsert_documents(<documents>)`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_google_datastore import DatastoreSaver\n",
|
||||
"\n",
|
||||
"data = [Document(page_content=\"Hello, World!\")]\n",
|
||||
"saver = DatastoreSaver()\n",
|
||||
"saver.upsert_documents(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Save documents without reference\n",
|
||||
"\n",
|
||||
"If a collection is specified the documents will be stored with an auto generated id."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"saver = DatastoreSaver(\"Collection\")\n",
|
||||
"\n",
|
||||
"saver.upsert_documents(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Save documents with other references"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"doc_ids = [\"AnotherCollection/doc_id\", \"foo/bar\"]\n",
|
||||
"saver = DatastoreSaver()\n",
|
||||
"\n",
|
||||
"saver.upsert_documents(documents=data, document_ids=doc_ids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load from Collection or SubCollection"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load langchain documents with `DatastoreLoader.load()` or `Datastore.lazy_load()`. `lazy_load` returns a generator that only queries database during the iteration. To initialize `DatastoreLoader` class you need to provide:\n",
|
||||
"\n",
|
||||
"1. `source` - An instance of a Query, CollectionGroup, DocumentReference or the single `\\`-delimited path to a Datastore collection`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_datastore import DatastoreLoader\n",
|
||||
"\n",
|
||||
"loader_collection = DatastoreLoader(\"Collection\")\n",
|
||||
"loader_subcollection = DatastoreLoader(\"Collection/doc/SubCollection\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"data_collection = loader_collection.load()\n",
|
||||
"data_subcollection = loader_subcollection.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load a single Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.cloud import datastore\n",
|
||||
"\n",
|
||||
"client = datastore.Client()\n",
|
||||
"doc_ref = client.collection(\"foo\").document(\"bar\")\n",
|
||||
"\n",
|
||||
"loader_document = DatastoreLoader(doc_ref)\n",
|
||||
"\n",
|
||||
"data = loader_document.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load from CollectionGroup or Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.cloud.datastore import CollectionGroup, FieldFilter, Query\n",
|
||||
"\n",
|
||||
"col_ref = client.collection(\"col_group\")\n",
|
||||
"collection_group = CollectionGroup(col_ref)\n",
|
||||
"\n",
|
||||
"loader_group = DatastoreLoader(collection_group)\n",
|
||||
"\n",
|
||||
"col_ref = client.collection(\"collection\")\n",
|
||||
"query = col_ref.where(filter=FieldFilter(\"region\", \"==\", \"west_coast\"))\n",
|
||||
"\n",
|
||||
"loader_query = DatastoreLoader(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents\n",
|
||||
"\n",
|
||||
"Delete a list of langchain documents from Datastore collection with `DatastoreSaver.delete_documents(<documents>)`.\n",
|
||||
"\n",
|
||||
"If document ids is provided, the Documents will be ignored."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"saver = DatastoreSaver()\n",
|
||||
"\n",
|
||||
"saver.delete_documents(data)\n",
|
||||
"\n",
|
||||
"# The Documents will be ignored and only the document ids will be used.\n",
|
||||
"saver.delete_documents(data, doc_ids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents with customize document page content & metadata\n",
|
||||
"\n",
|
||||
"The arguments of `page_content_fields` and `metadata_fields` will specify the Datastore Document fields to be written into LangChain Document `page_content` and `metadata`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = DatastoreLoader(\n",
|
||||
" source=\"foo/bar/subcol\",\n",
|
||||
" page_content_fields=[\"data_field\"],\n",
|
||||
" metadata_fields=[\"metadata_field\"],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Customize Page Content Format\n",
|
||||
"\n",
|
||||
"When the `page_content` contains only one field the information will be the field value only. Otherwise the `page_content` will be in JSON format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Customize Connection & Authentication"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.auth import compute_engine\n",
|
||||
"from google.cloud.datastore import Client\n",
|
||||
"\n",
|
||||
"client = Client(database=\"non-default-db\", creds=compute_engine.Credentials())\n",
|
||||
"loader = DatastoreLoader(\n",
|
||||
" source=\"foo\",\n",
|
||||
" client=client,\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
613
docs/docs/integrations/document_loaders/google_el_carro.ipynb
Normal file
613
docs/docs/integrations/document_loaders/google_el_carro.ipynb
Normal file
@@ -0,0 +1,613 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# Google El Carro Oracle Operator\n",
|
||||
">\n",
|
||||
"Google [El Carro Oracle Operator](https://github.com/GoogleCloudPlatform/elcarro-oracle-operator)\n",
|
||||
"offers a way to run Oracle databases in Kubernetes as a portable, open source,\n",
|
||||
"community driven, no vendor lock-in container orchestration system. El Carro\n",
|
||||
"provides a powerful declarative API for comprehensive and consistent\n",
|
||||
"configuration and deployment as well as for real-time operations and\n",
|
||||
"monitoring.\n",
|
||||
"Extend your Oracle database's capabilities to build AI-powered experiences by\n",
|
||||
"leveraging the El Carro Langchain integration.\n",
|
||||
"\n",
|
||||
"This guide goes over how to use El Carro Langchain integration to\n",
|
||||
"[save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/)\n",
|
||||
"with `ElCarroLoader` and `ElCarroDocumentSaver`. This integration works for\n",
|
||||
"any Oracle database, regardless of where it is running."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "ZqONzXRcWMJg"
|
||||
},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"Please complete\n",
|
||||
"the [Getting Started](https://github.com/googleapis/langchain-google-el-carro-python/tree/main/README.md#getting-started)\n",
|
||||
"section of\n",
|
||||
"the README to set up your El Carro Oracle database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "imbbHxKfWPso"
|
||||
},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-el-carro` package, so\n",
|
||||
"we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "Su5BMP2zWRwM"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-google-el-carro"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "azV0k45WWSVI"
|
||||
},
|
||||
"source": [
|
||||
"## Basic Usage\n",
|
||||
"\n",
|
||||
"### Set Up Oracle Database Connection\n",
|
||||
"Fill out the following variable with your Oracle database connections details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @title Set Your Values Here { display-mode: \"form\" }\n",
|
||||
"HOST = \"127.0.0.1\" # @param {type: \"string\"}\n",
|
||||
"PORT = 3307 # @param {type: \"integer\"}\n",
|
||||
"DATABASE = \"my-database\" # @param {type: \"string\"}\n",
|
||||
"TABLE_NAME = \"message_store\" # @param {type: \"string\"}\n",
|
||||
"USER = \"my-user\" # @param {type: \"string\"}\n",
|
||||
"PASSWORD = input(\"Please provide a password to be used for the database user: \")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"\n",
|
||||
"If you are using El Carro, you can find the hostname and port values in the\n",
|
||||
"status of the El Carro Kubernetes instance.\n",
|
||||
"Use the user password you created for your PDB.\n",
|
||||
"Example"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"kubectl get -w instances.oracle.db.anthosapis.com -n db\n",
|
||||
"NAME DB ENGINE VERSION EDITION ENDPOINT URL DB NAMES BACKUP ID READYSTATUS READYREASON DBREADYSTATUS DBREADYREASON\n",
|
||||
"mydb Oracle 18c Express mydb-svc.db 34.71.69.25:6021 False CreateInProgress"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### ElCarroEngine Connection Pool\n",
|
||||
"\n",
|
||||
"`ElCarroEngine` configures a connection pool to your Oracle database, enabling successful connections from your application and following industry best practices."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "xG1mYFkEWbkp"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_el_carro import ElCarroEngine\n",
|
||||
"\n",
|
||||
"elcarro_engine = ElCarroEngine.from_instance(\n",
|
||||
" db_host=\"127.0.0.1\",\n",
|
||||
" db_port=3307,\n",
|
||||
" db_name=\"PDB1\",\n",
|
||||
" db_user=\"scott\",\n",
|
||||
" db_password=\"tiger\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "ICW3k_qUWgyv"
|
||||
},
|
||||
"source": [
|
||||
"### Initialize a table\n",
|
||||
"\n",
|
||||
"Initialize a table of default schema\n",
|
||||
"via `elcarro_engine.init_document_table(<TABLE_NAME>)`. Table Columns:\n",
|
||||
"\n",
|
||||
"- page_content (type: text)\n",
|
||||
"- langchain_metadata (type: JSON)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "JmlGLukoWdfS"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"TABLE_NAME = \"doc_table\"\n",
|
||||
"elcarro_engine.init_document_table(\n",
|
||||
" TABLE_NAME=TABLE_NAME,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "kaI3avj5Wn5O"
|
||||
},
|
||||
"source": [
|
||||
"### Save documents\n",
|
||||
"\n",
|
||||
"Save langchain documents with `ElCarroDocumentSaver.add_documents(<documents>)`.\n",
|
||||
"To initialize `ElCarroDocumentSaver` class you need to provide 2 things:\n",
|
||||
"\n",
|
||||
"1. `elcarro_engine` - An instance of a `ElCarroEngine` engine.\n",
|
||||
"2. `TABLE_NAME` - The name of the table within the Oracle database to store\n",
|
||||
" langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "skaXpthSWpeg"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_google_el_carro import ElCarroDocumentSaver\n",
|
||||
"\n",
|
||||
"doc = Document(\n",
|
||||
" page_content=\"Banana\",\n",
|
||||
" metadata={\"type\": \"fruit\", \"weight\": 100, \"organic\": 1},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"saver = ElCarroDocumentSaver(\n",
|
||||
" elcarro_engine=elcarro_engine,\n",
|
||||
" TABLE_NAME=TABLE_NAME,\n",
|
||||
")\n",
|
||||
"saver.add_documents([doc])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "owTYQdNyWs9s"
|
||||
},
|
||||
"source": [
|
||||
"### Load documents\n",
|
||||
"\n",
|
||||
"Load langchain documents with `ElCarroLoader.load()`\n",
|
||||
"or `ElCarroLoader.lazy_load()`.\n",
|
||||
"`lazy_load` returns a generator that only queries database during the iteration.\n",
|
||||
"To initialize `ElCarroLoader` class you need to provide:\n",
|
||||
"\n",
|
||||
"1. `elcarro_engine` - An instance of a `ElCarroEngine` engine.\n",
|
||||
"2. `TABLE_NAME` - The name of the table within the Oracle database to store\n",
|
||||
" langchain documents.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "CM6p11amWvYp"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_el_carro import ElCarroLoader\n",
|
||||
"\n",
|
||||
"loader = ElCarroLoader(elcarro_engine=elcarro_engine, TABLE_NAME=TABLE_NAME)\n",
|
||||
"docs = loader.lazy_load()\n",
|
||||
"for doc in docs:\n",
|
||||
" print(\"Loaded documents:\", doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "OTIDGiZ8WyS3"
|
||||
},
|
||||
"source": [
|
||||
"### Load documents via query\n",
|
||||
"\n",
|
||||
"Other than loading documents from a table, we can also choose to load documents\n",
|
||||
"from a view generated from a SQL query. For example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "p3OB9AwgWzrq"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_el_carro import ElCarroLoader\n",
|
||||
"\n",
|
||||
"loader = ElCarroLoader(\n",
|
||||
" elcarro_engine=elcarro_engine,\n",
|
||||
" query=f\"SELECT * FROM {TABLE_NAME} WHERE json_value(extra_json_metadata, '$.shape') = 'round'\",\n",
|
||||
")\n",
|
||||
"onedoc = loader.load()\n",
|
||||
"print(onedoc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "E6Fl7YNvW3Ep"
|
||||
},
|
||||
"source": [
|
||||
"The view generated from SQL query can have different schema than default table.\n",
|
||||
"In such cases, the behavior of ElCarroLoader is the same as loading from table\n",
|
||||
"with non-default schema. Please refer to\n",
|
||||
"section [Load documents with customized document page content & metadata](#load-documents-with-customized-document-page-content--metadata)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "QgsP78MhW4wc"
|
||||
},
|
||||
"source": [
|
||||
"### Delete documents\n",
|
||||
"\n",
|
||||
"Delete a list of langchain documents from an Oracle table\n",
|
||||
"with `ElCarroDocumentSaver.delete(<documents>)`.\n",
|
||||
"\n",
|
||||
"For a table with a default schema (page_content, langchain_metadata), the\n",
|
||||
"deletion criteria is:\n",
|
||||
"\n",
|
||||
"A `row` should be deleted if there exists a `document` in the list, such that\n",
|
||||
"\n",
|
||||
"- `document.page_content` equals `row[page_content]`\n",
|
||||
"- `document.metadata` equals `row[langchain_metadata]`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "QSYRHGHXW6IN"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete:\", docs)\n",
|
||||
"saver.delete(onedoc)\n",
|
||||
"print(\"Documents after delete:\", loader.load())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "RerPkBRAW8yR"
|
||||
},
|
||||
"source": [
|
||||
"## Advanced Usage\n",
|
||||
"\n",
|
||||
"### Load documents with customized document page content & metadata\n",
|
||||
"\n",
|
||||
"First we prepare an example table with non-default schema, and populate it with\n",
|
||||
"some arbitrary data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "u0Fd46aqW-8k"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sqlalchemy\n",
|
||||
"\n",
|
||||
"create_table_query = f\"\"\"CREATE TABLE {TABLE_NAME} (\n",
|
||||
" fruit_id NUMBER GENERATED BY DEFAULT AS IDENTITY (START WITH 1),\n",
|
||||
" fruit_name VARCHAR2(100) NOT NULL,\n",
|
||||
" variety VARCHAR2(50),\n",
|
||||
" quantity_in_stock NUMBER(10) NOT NULL,\n",
|
||||
" price_per_unit NUMBER(6,2) NOT NULL,\n",
|
||||
" organic NUMBER(3) NOT NULL\n",
|
||||
")\"\"\"\n",
|
||||
"\n",
|
||||
"with elcarro_engine.connect() as conn:\n",
|
||||
" conn.execute(sqlalchemy.text(create_table_query))\n",
|
||||
" conn.commit()\n",
|
||||
" conn.execute(\n",
|
||||
" sqlalchemy.text(\n",
|
||||
" f\"\"\"\n",
|
||||
" INSERT INTO {TABLE_NAME} (fruit_name, variety, quantity_in_stock, price_per_unit, organic)\n",
|
||||
" VALUES ('Apple', 'Granny Smith', 150, 0.99, 1)\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" conn.execute(\n",
|
||||
" sqlalchemy.text(\n",
|
||||
" f\"\"\"\n",
|
||||
" INSERT INTO {TABLE_NAME} (fruit_name, variety, quantity_in_stock, price_per_unit, organic)\n",
|
||||
" VALUES ('Banana', 'Cavendish', 200, 0.59, 0)\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" conn.execute(\n",
|
||||
" sqlalchemy.text(\n",
|
||||
" f\"\"\"\n",
|
||||
" INSERT INTO {TABLE_NAME} (fruit_name, variety, quantity_in_stock, price_per_unit, organic)\n",
|
||||
" VALUES ('Orange', 'Navel', 80, 1.29, 1)\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" conn.commit()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "hGPYiTu7XBh3"
|
||||
},
|
||||
"source": [
|
||||
"If we still load langchain documents with default parameters of `ElCarroLoader`\n",
|
||||
"from this example table, the `page_content` of loaded documents will be the\n",
|
||||
"first column of the table, and `metadata` will be consisting of key-value pairs\n",
|
||||
"of all the other columns."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "eQbRapM_XC1S"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = ElCarroLoader(\n",
|
||||
" elcarro_engine=elcarro_engine,\n",
|
||||
" TABLE_NAME=TABLE_NAME,\n",
|
||||
")\n",
|
||||
"loaded_docs = loader.load()\n",
|
||||
"print(f\"Loaded Documents: [{loaded_docs}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "tOH6i2jWXFqz"
|
||||
},
|
||||
"source": [
|
||||
"We can specify the content and metadata we want to load by setting\n",
|
||||
"the `content_columns` and `metadata_columns` when initializing\n",
|
||||
"the `ElCarroLoader`.\n",
|
||||
"\n",
|
||||
"1. `content_columns`: The columns to write into the `page_content` of the\n",
|
||||
" document.\n",
|
||||
"2. `metadata_columns`: The columns to write into the `metadata` of the document.\n",
|
||||
"\n",
|
||||
"For example here, the values of columns in `content_columns` will be joined\n",
|
||||
"together into a space-separated string, as `page_content` of loaded documents,\n",
|
||||
"and `metadata` of loaded documents will only contain key-value pairs of columns\n",
|
||||
"specified in `metadata_columns`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "9gCFWqgGXHD3"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = ElCarroLoader(\n",
|
||||
" elcarro_engine=elcarro_engine,\n",
|
||||
" TABLE_NAME=TABLE_NAME,\n",
|
||||
" content_columns=[\n",
|
||||
" \"variety\",\n",
|
||||
" \"quantity_in_stock\",\n",
|
||||
" \"price_per_unit\",\n",
|
||||
" \"organic\",\n",
|
||||
" ],\n",
|
||||
" metadata_columns=[\"fruit_id\", \"fruit_name\"],\n",
|
||||
")\n",
|
||||
"loaded_docs = loader.load()\n",
|
||||
"print(f\"Loaded Documents: [{loaded_docs}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "4KlSfvPJXKgM"
|
||||
},
|
||||
"source": [
|
||||
"### Save document with customized page content & metadata\n",
|
||||
"\n",
|
||||
"In order to save langchain document into table with customized metadata fields.\n",
|
||||
"We need first create such a table via `ElCarroEngine.init_document_table()`, and\n",
|
||||
"specify the list of `metadata_columns` we want it to have. In this example, the\n",
|
||||
"created table will have table columns:\n",
|
||||
"\n",
|
||||
"- content (type: text): for storing fruit description.\n",
|
||||
"- type (type text): for storing fruit type.\n",
|
||||
"- weight (type INT): for storing fruit weight.\n",
|
||||
"- extra_json_metadata (type: JSON): for storing other metadata information of the\n",
|
||||
" fruit.\n",
|
||||
"\n",
|
||||
"We can use the following parameters\n",
|
||||
"with `elcarro_engine.init_document_table()` to create the table:\n",
|
||||
"\n",
|
||||
"1. `TABLE_NAME`: The name of the table within the Oracle database to store\n",
|
||||
" langchain documents.\n",
|
||||
"2. `metadata_columns`: A list of `sqlalchemy.Column` indicating the list of\n",
|
||||
" metadata columns we need.\n",
|
||||
"3. `content_column`: column name to store `page_content` of langchain\n",
|
||||
" document. Default: `\"page_content\", \"VARCHAR2(4000)\"`\n",
|
||||
"4. `metadata_json_column`: column name to store extra\n",
|
||||
" JSON `metadata` of langchain document.\n",
|
||||
" Default: `\"langchain_metadata\", \"VARCHAR2(4000)\"`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "1Wqs05gpXMW9"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"elcarro_engine.init_document_table(\n",
|
||||
" TABLE_NAME=TABLE_NAME,\n",
|
||||
" metadata_columns=[\n",
|
||||
" sqlalchemy.Column(\"type\", sqlalchemy.dialects.oracle.VARCHAR2(200)),\n",
|
||||
" sqlalchemy.Column(\"weight\", sqlalchemy.INT),\n",
|
||||
" ],\n",
|
||||
" content_column=\"content\",\n",
|
||||
" metadata_json_column=\"extra_json_metadata\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "bVEWHYU-XPFt"
|
||||
},
|
||||
"source": [
|
||||
"Save documents with `ElCarroDocumentSaver.add_documents(<documents>)`. As you\n",
|
||||
"can see in this example,\n",
|
||||
"\n",
|
||||
"- `document.page_content` will be saved into `page_content` column.\n",
|
||||
"- `document.metadata.type` will be saved into `type` column.\n",
|
||||
"- `document.metadata.weight` will be saved into `weight` column.\n",
|
||||
"- `document.metadata.organic` will be saved into `extra_json_metadata` column in\n",
|
||||
" JSON format.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "Iy4wRZLPXQn5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"doc = Document(\n",
|
||||
" page_content=\"Banana\",\n",
|
||||
" metadata={\"type\": \"fruit\", \"weight\": 100, \"organic\": 1},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(f\"Original Document: [{doc}]\")\n",
|
||||
"\n",
|
||||
"saver = ElCarroDocumentSaver(\n",
|
||||
" elcarro_engine=elcarro_engine,\n",
|
||||
" TABLE_NAME=TABLE_NAME,\n",
|
||||
" content_column=\"content\",\n",
|
||||
" metadata_json_column=\"extra_json_metadata\",\n",
|
||||
")\n",
|
||||
"saver.add_documents([doc])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "x0vkL7PKXUmU"
|
||||
},
|
||||
"source": [
|
||||
"### Delete documents with customized page content & metadata\n",
|
||||
"\n",
|
||||
"We can also delete documents from table with customized metadata columns\n",
|
||||
"via `ElCarroDocumentSaver.delete(<documents>)`. The deletion criteria is:\n",
|
||||
"\n",
|
||||
"A `row` should be deleted if there exists a `document` in the list, such that\n",
|
||||
"\n",
|
||||
"- `document.page_content` equals `row[page_content]`\n",
|
||||
"- For every metadata field `k` in `document.metadata`\n",
|
||||
" - `document.metadata[k]` equals `row[k]` or `document.metadata[k]`\n",
|
||||
" equals `row[langchain_metadata][k]`\n",
|
||||
"- There is no extra metadata field present in `row` but not\n",
|
||||
" in `document.metadata`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "OcJPeCuKXWSa"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"saver.delete(loaded_docs)\n",
|
||||
"print(f\"Documents left: {len(loader.load())}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "S4SxUoY-XsPN"
|
||||
},
|
||||
"source": [
|
||||
"## More examples\n",
|
||||
"\n",
|
||||
"Please look\n",
|
||||
"at [demo_doc_loader_basic.py](https://github.com/googleapis/langchain-google-el-carro-python/tree/main/samples/demo_doc_loader_basic.py)\n",
|
||||
"and [demo_doc_loader_advanced.py](https://github.com/googleapis/langchain-google-el-carro-python/tree/main/samples/demo_doc_loader_advanced.py)\n",
|
||||
"for\n",
|
||||
"complete code examples.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
413
docs/docs/integrations/document_loaders/google_firestore.ipynb
Normal file
413
docs/docs/integrations/document_loaders/google_firestore.ipynb
Normal file
@@ -0,0 +1,413 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Firestore (Native Mode)\n",
|
||||
"\n",
|
||||
"> [Firestore](https://cloud.google.com/firestore) is a serverless document-oriented database that scales to meet any demand. Extend your database application to build AI-powered experiences leveraging Firestore's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use [Firestore](https://cloud.google.com/firestore) to [save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/) with `FirestoreLoader` and `FirestoreSaver`.\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/googleapis/langchain-google-firestore-python/blob/main/docs/document_loader.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
"* [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
"* [Create a Firestore database](https://cloud.google.com/firestore/docs/manage-databases)\n",
|
||||
"\n",
|
||||
"After confirmed access to database in the runtime environment of this notebook, filling the following values and run the cell before running example scripts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please specify a source for demo purpose.\n",
|
||||
"SOURCE = \"test\" # @param {type:\"Query\"|\"CollectionGroup\"|\"DocumentReference\"|\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-firestore` package, so we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -upgrade --quiet langchain-google-firestore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Colab only**: Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the value below with your Google Cloud project ID and then run the cell.\n",
|
||||
"\n",
|
||||
"PROJECT_ID = \"my-project-id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"!gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"- If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"- If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### API Enablement\n",
|
||||
"The `langchain-google-firestore` package requires that you [enable the Firestore Admin API](https://console.cloud.google.com/flows/enableapi?apiid=firestore.googleapis.com) in your Google Cloud Project."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# enable Firestore Admin API\n",
|
||||
"!gcloud services enable firestore.googleapis.com"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save documents\n",
|
||||
"\n",
|
||||
"`FirestoreSaver` can store Documents into Firestore. By default it will try to extract the Document reference from the metadata\n",
|
||||
"\n",
|
||||
"Save langchain documents with `FirestoreSaver.upsert_documents(<documents>)`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents.base import Document\n",
|
||||
"from langchain_google_firestore import FirestoreSaver\n",
|
||||
"\n",
|
||||
"saver = FirestoreSaver()\n",
|
||||
"\n",
|
||||
"data = [Document(page_content=\"Hello, World!\")]\n",
|
||||
"\n",
|
||||
"saver.upsert_documents(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Save documents without reference\n",
|
||||
"\n",
|
||||
"If a collection is specified the documents will be stored with an auto generated id."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"saver = FirestoreSaver(\"Collection\")\n",
|
||||
"\n",
|
||||
"saver.upsert_documents(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Save documents with other references"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"doc_ids = [\"AnotherCollection/doc_id\", \"foo/bar\"]\n",
|
||||
"saver = FirestoreSaver()\n",
|
||||
"\n",
|
||||
"saver.upsert_documents(documents=data, document_ids=doc_ids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load from Collection or SubCollection"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load langchain documents with `FirestoreLoader.load()` or `Firestore.lazy_load()`. `lazy_load` returns a generator that only queries database during the iteration. To initialize `FirestoreLoader` class you need to provide:\n",
|
||||
"\n",
|
||||
"1. `source` - An instance of a Query, CollectionGroup, DocumentReference or the single `\\`-delimited path to a Firestore collection`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_firestore import FirestoreLoader\n",
|
||||
"\n",
|
||||
"loader_collection = FirestoreLoader(\"Collection\")\n",
|
||||
"loader_subcollection = FirestoreLoader(\"Collection/doc/SubCollection\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"data_collection = loader_collection.load()\n",
|
||||
"data_subcollection = loader_subcollection.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load a single Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.cloud import firestore\n",
|
||||
"\n",
|
||||
"client = firestore.Client()\n",
|
||||
"doc_ref = client.collection(\"foo\").document(\"bar\")\n",
|
||||
"\n",
|
||||
"loader_document = FirestoreLoader(doc_ref)\n",
|
||||
"\n",
|
||||
"data = loader_document.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load from CollectionGroup or Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.cloud.firestore import CollectionGroup, FieldFilter, Query\n",
|
||||
"\n",
|
||||
"col_ref = client.collection(\"col_group\")\n",
|
||||
"collection_group = CollectionGroup(col_ref)\n",
|
||||
"\n",
|
||||
"loader_group = FirestoreLoader(collection_group)\n",
|
||||
"\n",
|
||||
"col_ref = client.collection(\"collection\")\n",
|
||||
"query = col_ref.where(filter=FieldFilter(\"region\", \"==\", \"west_coast\"))\n",
|
||||
"\n",
|
||||
"loader_query = FirestoreLoader(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents\n",
|
||||
"\n",
|
||||
"Delete a list of langchain documents from Firestore collection with `FirestoreSaver.delete_documents(<documents>)`.\n",
|
||||
"\n",
|
||||
"If document ids is provided, the Documents will be ignored."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"saver = FirestoreSaver()\n",
|
||||
"\n",
|
||||
"saver.delete_documents(data)\n",
|
||||
"\n",
|
||||
"# The Documents will be ignored and only the document ids will be used.\n",
|
||||
"saver.delete_documents(data, doc_ids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents with customize document page content & metadata\n",
|
||||
"\n",
|
||||
"The arguments of `page_content_fields` and `metadata_fields` will specify the Firestore Document fields to be written into LangChain Document `page_content` and `metadata`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = FirestoreLoader(\n",
|
||||
" source=\"foo/bar/subcol\",\n",
|
||||
" page_content_fields=[\"data_field\"],\n",
|
||||
" metadata_fields=[\"metadata_field\"],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Customize Page Content Format\n",
|
||||
"\n",
|
||||
"When the `page_content` contains only one field the information will be the field value only. Otherwise the `page_content` will be in JSON format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Customize Connection & Authentication"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.auth import compute_engine\n",
|
||||
"from google.cloud.firestore import Client\n",
|
||||
"\n",
|
||||
"client = Client(database=\"non-default-db\", creds=compute_engine.Credentials())\n",
|
||||
"loader = FirestoreLoader(\n",
|
||||
" source=\"foo\",\n",
|
||||
" client=client,\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,318 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "6-0_o3DxsFGi"
|
||||
},
|
||||
"source": [
|
||||
"# Google Memorystore for Redis\n",
|
||||
"\n",
|
||||
"> [Google Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis/memorystore-for-redis-overview) is a fully-managed service that is powered by the Redis in-memory data store to build application caches that provide sub-millisecond data access. Extend your database application to build AI-powered experiences leveraging Memorystore for Redis's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use [Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis/memorystore-for-redis-overview) to [save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/) with `MemorystoreDocumentLoader` and `MemorystoreDocumentSaver`.\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/googleapis/langchain-google-memorystore-redis-python/blob/main/docs/document_loader.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
"* [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
"* [Create a Memorystore for Redis instance](https://cloud.google.com/memorystore/docs/redis/create-instance-console). Ensure that the version is greater than or equal to 5.0.\n",
|
||||
"\n",
|
||||
"After confirmed access to database in the runtime environment of this notebook, filling the following values and run the cell before running example scripts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please specify an endpoint associated with the instance and a key prefix for demo purpose.\n",
|
||||
"ENDPOINT = \"redis://127.0.0.1:6379\" # @param {type:\"string\"}\n",
|
||||
"KEY_PREFIX = \"doc:\" # @param {type:\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-memorystore-redis` package, so we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -upgrade --quiet langchain-google-memorystore-redis"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Colab only**: Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the value below with your Google Cloud project ID and then run the cell.\n",
|
||||
"\n",
|
||||
"PROJECT_ID = \"my-project-id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"!gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"- If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"- If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "2L7kMu__sFGl"
|
||||
},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save documents\n",
|
||||
"\n",
|
||||
"Save langchain documents with `MemorystoreDocumentSaver.add_documents(<documents>)`. To initialize `MemorystoreDocumentSaver` class you need to provide 2 things:\n",
|
||||
"\n",
|
||||
"1. `client` - A `redis.Redis` client object.\n",
|
||||
"1. `key_prefix` - A prefix for the keys to store Documents in Redis.\n",
|
||||
"\n",
|
||||
"The Documents will be stored into randomly generated keys with the specified prefix of `key_prefix`. Alternatively, you can designate the suffixes of the keys by specifying `ids` in the `add_documents` method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import redis\n",
|
||||
"from langchain_core.documents.base import Document\n",
|
||||
"from langchain_google_memorystore_redis import MemorystoreDocumentSaver\n",
|
||||
"\n",
|
||||
"test_docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Apple Granny Smith 150 0.99 1\",\n",
|
||||
" metadata={\"fruit_id\": 1},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Banana Cavendish 200 0.59 0\",\n",
|
||||
" metadata={\"fruit_id\": 2},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Orange Navel 80 1.29 1\",\n",
|
||||
" metadata={\"fruit_id\": 3},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"doc_ids = [f\"{i}\" for i in range(len(test_docs))]\n",
|
||||
"\n",
|
||||
"redis_client = redis.from_url(ENDPOINT)\n",
|
||||
"saver = MemorystoreDocumentSaver(\n",
|
||||
" client=redis_client,\n",
|
||||
" key_prefix=KEY_PREFIX,\n",
|
||||
" content_field=\"page_content\",\n",
|
||||
")\n",
|
||||
"saver.add_documents(test_docs, ids=doc_ids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "A2fT1iEhsFGl"
|
||||
},
|
||||
"source": [
|
||||
"### Load documents\n",
|
||||
"\n",
|
||||
"Initialize a loader that loads all documents stored in the Memorystore for Redis instance with a specific prefix.\n",
|
||||
"\n",
|
||||
"Load langchain documents with `MemorystoreDocumentLoader.load()` or `MemorystoreDocumentLoader.lazy_load()`. `lazy_load` returns a generator that only queries database during the iteration. To initialize `MemorystoreDocumentLoader` class you need to provide:\n",
|
||||
"\n",
|
||||
"1. `client` - A `redis.Redis` client object.\n",
|
||||
"1. `key_prefix` - A prefix for the keys to store Documents in Redis."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "YEDKWR6asFGl"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import redis\n",
|
||||
"from langchain_google_memorystore_redis import MemorystoreDocumentLoader\n",
|
||||
"\n",
|
||||
"redis_client = redis.from_url(ENDPOINT)\n",
|
||||
"loader = MemorystoreDocumentLoader(\n",
|
||||
" client=redis_client,\n",
|
||||
" key_prefix=KEY_PREFIX,\n",
|
||||
" content_fields=set([\"page_content\"]),\n",
|
||||
")\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" print(\"Loaded documents:\", doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents\n",
|
||||
"\n",
|
||||
"Delete all of keys with the specified prefix in the Memorystore for Redis instance with `MemorystoreDocumentSaver.delete()`. You can also specify the suffixes of the keys if you know."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete:\", docs)\n",
|
||||
"\n",
|
||||
"saver.delete(ids=[0])\n",
|
||||
"print(\"Documents after delete:\", loader.load())\n",
|
||||
"\n",
|
||||
"saver.delete()\n",
|
||||
"print(\"Documents after delete all:\", loader.load())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "02xxvmzTsFGm"
|
||||
},
|
||||
"source": [
|
||||
"### Customize Document Page Content & Metadata\n",
|
||||
"\n",
|
||||
"When initializing a loader with more than 1 content field, the `page_content` of the loaded Documents will contain a JSON-encoded string with top level fields equal to the specified fields in `content_fields`.\n",
|
||||
"\n",
|
||||
"If the `metadata_fields` are specified, the `metadata` field of the loaded Documents will only have the top level fields equal to the specified `metadata_fields`. If any of the values of the metadata fields is stored as a JSON-encoded string, it will be decoded prior to being loaded to the metadata fields."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "BvS3UFsysFGm"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = MemorystoreDocumentLoader(\n",
|
||||
" client=redis_client,\n",
|
||||
" key_prefix=KEY_PREFIX,\n",
|
||||
" content_fields=set([\"content_field_1\", \"content_field_2\"]),\n",
|
||||
" metadata_fields=set([\"title\", \"author\"]),\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [
|
||||
{
|
||||
"file_id": "1kuFhDfyzOdzS1apxQ--1efXB1pJ79yVY",
|
||||
"timestamp": 1708033015250
|
||||
}
|
||||
]
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
512
docs/docs/integrations/document_loaders/google_spanner.ipynb
Normal file
512
docs/docs/integrations/document_loaders/google_spanner.ipynb
Normal file
@@ -0,0 +1,512 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Spanner\n",
|
||||
"\n",
|
||||
"> [Spanner](https://cloud.google.com/spanner) is a highly scalable database that combines unlimited scalability with relational semantics, such as secondary indexes, strong consistency, schemas, and SQL providing 99.999% availability in one easy solution. Extend your database application to build AI-powered experiences leveraging Spanner's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use [Spanner](https://cloud.google.com/spanner) to [save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/) with `SpannerLoader` and `SpannerDocumentSaver`.\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/googleapis/langchain-google-spanner-python/blob/main/docs/document_loader.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
"* [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
"* [Create a Spanner instance](https://cloud.google.com/spanner/docs/create-manage-instances)\n",
|
||||
"* [Create a Spanner database](https://cloud.google.com/spanner/docs/create-manage-databases)\n",
|
||||
"* [Create a Spanner table](https://cloud.google.com/spanner/docs/create-query-database-console#create-schema)\n",
|
||||
"\n",
|
||||
"After confirmed access to database in the runtime environment of this notebook, filling the following values and run the cell before running example scripts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please specify an instance id, a database, and a table for demo purpose.\n",
|
||||
"INSTANCE_ID = \"test_instance\" # @param {type:\"string\"}\n",
|
||||
"DATABASE_ID = \"test_database\" # @param {type:\"string\"}\n",
|
||||
"TABLE_NAME = \"test_table\" # @param {type:\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-spanner` package, so we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -upgrade --quiet langchain-google-spanner"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Colab only**: Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the value below with your Google Cloud project ID and then run the cell.\n",
|
||||
"\n",
|
||||
"PROJECT_ID = \"my-project-id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"!gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"- If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"- If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save documents\n",
|
||||
"\n",
|
||||
"Save langchain documents with `SpannerDocumentSaver.add_documents(<documents>)`. To initialize `SpannerDocumentSaver` class you need to provide 3 things:\n",
|
||||
"\n",
|
||||
"1. `instance_id` - An instance of Spanner to load data from.\n",
|
||||
"1. `database_id` - An instance of Spanner database to load data from.\n",
|
||||
"1. `table_name` - The name of the table within the Spanner database to store langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_google_spanner import SpannerDocumentSaver\n",
|
||||
"\n",
|
||||
"test_docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Apple Granny Smith 150 0.99 1\",\n",
|
||||
" metadata={\"fruit_id\": 1},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Banana Cavendish 200 0.59 0\",\n",
|
||||
" metadata={\"fruit_id\": 2},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Orange Navel 80 1.29 1\",\n",
|
||||
" metadata={\"fruit_id\": 3},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"saver = SpannerDocumentSaver(\n",
|
||||
" instance_id=INSTANCE_ID,\n",
|
||||
" database_id=DATABASE_ID,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
")\n",
|
||||
"saver.add_documents(test_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Querying for Documents from Spanner\n",
|
||||
"\n",
|
||||
"For more details on connecting to a Spanner table, please check the [Python SDK documentation](https://cloud.google.com/python/docs/reference/spanner/latest)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Load documents from table\n",
|
||||
"\n",
|
||||
"Load langchain documents with `SpannerLoader.load()` or `SpannerLoader.lazy_load()`. `lazy_load` returns a generator that only queries database during the iteration. To initialize `SpannerLoader` class you need to provide:\n",
|
||||
"\n",
|
||||
"1. `instance_id` - An instance of Spanner to load data from.\n",
|
||||
"1. `database_id` - An instance of Spanner database to load data from.\n",
|
||||
"1. `query` - A query of the database dialect."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_spanner import SpannerLoader\n",
|
||||
"\n",
|
||||
"query = f\"SELECT * from {TABLE_NAME}\"\n",
|
||||
"loader = SpannerLoader(\n",
|
||||
" instance_id=INSTANCE_ID,\n",
|
||||
" database_id=DATABASE_ID,\n",
|
||||
" query=query,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" print(doc)\n",
|
||||
" break"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents\n",
|
||||
"\n",
|
||||
"Delete a list of langchain documents from the table with `SpannerDocumentSaver.delete(<documents>)`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete:\", docs)\n",
|
||||
"\n",
|
||||
"doc = test_docs[0]\n",
|
||||
"saver.delete([doc])\n",
|
||||
"print(\"Documents after delete:\", loader.load())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Customize Document Page Content & Metadata\n",
|
||||
"\n",
|
||||
"The loader will returns a list of Documents with page content from a specific data columns. All other data columns will be added to metadata. Each row becomes a document."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Customize page content format\n",
|
||||
"\n",
|
||||
"The SpannerLoader assumes there is a column called `page_content`. These defaults can be changed like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_content_loader = SpannerLoader(\n",
|
||||
" INSTANCE_ID, DATABASE_ID, query, content_columns=[\"custom_content\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If multiple columns are specified, the page content's string format will default to `text` (space-separated string concatenation). There are other format that user can specify, including `text`, `JSON`, `YAML`, `CSV`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Customize metadata format\n",
|
||||
"\n",
|
||||
"The SpannerLoader assumes there is a metadata column called `langchain_metadata` that store JSON data. The metadata column will be used as the base dictionary. By default, all other column data will be added and may overwrite the original value. These defaults can be changed like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_metadata_loader = SpannerLoader(\n",
|
||||
" INSTANCE_ID, DATABASE_ID, query, metadata_columns=[\"column1\", \"column2\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Customize JSON metadata column name\n",
|
||||
"\n",
|
||||
"By default, the loader uses `langchain_metadata` as the base dictionary. This can be customized to select a JSON column to use as base dictionary for the Document's metadata."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_metadata_json_loader = SpannerLoader(\n",
|
||||
" INSTANCE_ID, DATABASE_ID, query, metadata_json_column=\"another-json-column\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Custom staleness\n",
|
||||
"\n",
|
||||
"The default [staleness](https://cloud.google.com/python/docs/reference/spanner/latest/snapshot-usage#beginning-a-snapshot) is 15s. This can be customized by specifying a weaker bound (which can either be to perform all reads as of a given timestamp), or as of a given duration in the past."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import datetime\n",
|
||||
"\n",
|
||||
"timestamp = datetime.datetime.utcnow()\n",
|
||||
"custom_timestamp_loader = SpannerLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" DATABASE_ID,\n",
|
||||
" query,\n",
|
||||
" staleness=timestamp,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"duration = 20.0\n",
|
||||
"custom_duration_loader = SpannerLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" DATABASE_ID,\n",
|
||||
" query,\n",
|
||||
" staleness=duration,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Turn on data boost\n",
|
||||
"\n",
|
||||
"By default, the loader will not use [data boost](https://cloud.google.com/spanner/docs/databoost/databoost-overview) since it has additional costs associated, and require additional IAM permissions. However, user can choose to turn it on."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_databoost_loader = SpannerLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" DATABASE_ID,\n",
|
||||
" query,\n",
|
||||
" databoost=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Custom client\n",
|
||||
"\n",
|
||||
"The client created by default is the default client. To pass in `credentials` and `project` explicitly, a custom client can be passed to the constructor."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.cloud import spanner\n",
|
||||
"from google.oauth2 import service_account\n",
|
||||
"\n",
|
||||
"creds = service_account.Credentials.from_service_account_file(\"/path/to/key.json\")\n",
|
||||
"custom_client = spanner.Client(project=\"my-project\", credentials=creds)\n",
|
||||
"saver = SpannerDocumentSaver(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" DATABASE_ID,\n",
|
||||
" TABLE_NAME,\n",
|
||||
" client=custom_client,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Custom initialization for SpannerDocumentSaver\n",
|
||||
"\n",
|
||||
"The SpannerDocumentSaver allows custom initialization. This allows user to specify how the Document is saved into the table.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"content_column: This will be used as the column name for the Document's page content. Defaulted to `page_content`.\n",
|
||||
"\n",
|
||||
"metadata_columns: These metadata will be saved into specific columns if the key exists in the Document's metadata.\n",
|
||||
"\n",
|
||||
"metadata_json_column: This will be the column name for the spcial JSON column. Defaulted to `langchain_metadata`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_saver = SpannerDocumentSaver(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" DATABASE_ID,\n",
|
||||
" TABLE_NAME,\n",
|
||||
" content_column=\"my-content\",\n",
|
||||
" metadata_columns=[\"foo\"],\n",
|
||||
" metadata_json_column=\"my-special-json-column\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialize custom schema for Spanner\n",
|
||||
"\n",
|
||||
"The SpannerDocumentSaver will have a `init_document_table` method to create a new table to store docs with custom schema."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_spanner import Column\n",
|
||||
"\n",
|
||||
"new_table_name = \"my_new_table\"\n",
|
||||
"\n",
|
||||
"SpannerDocumentSaver.init_document_table(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" DATABASE_ID,\n",
|
||||
" new_table_name,\n",
|
||||
" content_column=\"my-page-content\",\n",
|
||||
" metadata_columns=[\n",
|
||||
" Column(\"category\", \"STRING(36)\", True),\n",
|
||||
" Column(\"price\", \"FLOAT64\", False),\n",
|
||||
" ],\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -78,9 +78,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQAWithSourcesChain\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_openai import OpenAI, OpenAIEmbeddings"
|
||||
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -62,9 +62,9 @@
|
||||
"warnings.filterwarnings(\"ignore\")\n",
|
||||
"from pprint import pprint\n",
|
||||
"\n",
|
||||
"from langchain.text_splitter import Language\n",
|
||||
"from langchain_community.document_loaders.generic import GenericLoader\n",
|
||||
"from langchain_community.document_loaders.parsers import LanguageParser"
|
||||
"from langchain_community.document_loaders.parsers import LanguageParser\n",
|
||||
"from langchain_text_splitters import Language"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -323,7 +323,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import (\n",
|
||||
"from langchain_text_splitters import (\n",
|
||||
" Language,\n",
|
||||
" RecursiveCharacterTextSplitter,\n",
|
||||
")"
|
||||
@@ -426,6 +426,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7fb27b941602401d91542211134fc71a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Adding Languages using Tree-sitter Template\n",
|
||||
|
||||
@@ -80,7 +80,7 @@
|
||||
" chat_entity=\"<CHAT_URL>\", # recommended to use Entity here\n",
|
||||
" api_hash=\"<API HASH >\",\n",
|
||||
" api_id=\"<API_ID>\",\n",
|
||||
" user_name=\"\", # needed only for caching the session.\n",
|
||||
" username=\"\", # needed only for caching the session.\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -168,9 +168,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings"
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
77
docs/docs/integrations/document_loaders/yuque.ipynb
Normal file
77
docs/docs/integrations/document_loaders/yuque.ipynb
Normal file
@@ -0,0 +1,77 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "66a7777e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Yuque\n",
|
||||
"\n",
|
||||
">[Yuque](https://www.yuque.com/) is a professional cloud-based knowledge base for team collaboration in documentation.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load documents from `Yuque`.\n",
|
||||
"\n",
|
||||
"You can obtain the personal access token by clicking on your personal avatar in the [Personal Settings](https://www.yuque.com/settings/tokens) page."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9ec8a3b3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import YuqueLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = YuqueLoader(access_token=\"<your_personal_access_token>\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "2ea958f0327ed6e8"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3470dadf",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -24,9 +24,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"id": "59c710c4",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-03-05T20:58:42.397591Z",
|
||||
"start_time": "2024-03-05T20:58:40.944729Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install -qU langchain-ai21"
|
||||
@@ -46,10 +51,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "035dea0f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
"tags": [],
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-03-05T20:58:44.465443Z",
|
||||
"start_time": "2024-03-05T20:58:42.399724Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -74,14 +83,16 @@
|
||||
"execution_count": 6,
|
||||
"id": "98f70927a87e4745",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-03-05T20:58:45.859265Z",
|
||||
"start_time": "2024-03-05T20:58:44.466637Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\nLangChain is a decentralized blockchain network that leverages AI and machine learning to provide language translation services.'"
|
||||
]
|
||||
"text/plain": "'\\nLangChain is a (database)\\nLangChain is a database for storing and processing documents'"
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
@@ -105,13 +116,75 @@
|
||||
"chain.invoke({\"question\": \"What is LangChain?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# AI21 Contextual Answer\n",
|
||||
"\n",
|
||||
"You can use AI21's contextual answers model to receives text or document, serving as a context,\n",
|
||||
"and a question and returns an answer based entirely on this context.\n",
|
||||
"\n",
|
||||
"This means that if the answer to your question is not in the document,\n",
|
||||
"the model will indicate it (instead of providing a false answer)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "9965c10269159ed1"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a52f765c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"from langchain_ai21 import AI21ContextualAnswers\n",
|
||||
"\n",
|
||||
"tsm = AI21ContextualAnswers()\n",
|
||||
"\n",
|
||||
"response = tsm.invoke(input={\"context\": \"Your context\", \"question\": \"Your question\"})"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-03-05T20:59:00.943426Z",
|
||||
"start_time": "2024-03-05T20:59:00.263497Z"
|
||||
}
|
||||
},
|
||||
"id": "411adf42eab80829",
|
||||
"execution_count": 9
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"You can also use it with chains and output parsers and vector DBs"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "af59ffdbf4964875"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_ai21 import AI21ContextualAnswers\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"\n",
|
||||
"tsm = AI21ContextualAnswers()\n",
|
||||
"chain = tsm | StrOutputParser()\n",
|
||||
"\n",
|
||||
"response = chain.invoke(\n",
|
||||
" {\"context\": \"Your context\", \"question\": \"Your question\"},\n",
|
||||
")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-03-05T20:59:07.719225Z",
|
||||
"start_time": "2024-03-05T20:59:07.102950Z"
|
||||
}
|
||||
},
|
||||
"id": "bc63830f921b4ac9",
|
||||
"execution_count": 10
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
145
docs/docs/integrations/llms/anthropic.ipynb
Normal file
145
docs/docs/integrations/llms/anthropic.ipynb
Normal file
@@ -0,0 +1,145 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "602a52a4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Anthropic\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9597802c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AnthropicLLM\n",
|
||||
"\n",
|
||||
"This example goes over how to use LangChain to interact with `Anthropic` models.\n",
|
||||
"\n",
|
||||
"NOTE: AnthropicLLM only supports legacy Claude 2 models. To use the newest Claude 3 models, please use [`ChatAnthropic`](/docs/integrations/chat/anthropic) instead.\n",
|
||||
"\n",
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "59c710c4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-anthropic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "560a2f9254963fd7",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"## Environment Setup\n",
|
||||
"\n",
|
||||
"We'll need to get a [Anthropic](https://console.anthropic.com/settings/keys) and set the `ANTHROPIC_API_KEY` environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "035dea0f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"ANTHROPIC_API_KEY\"] = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1891df96eb076e1a",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"## Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "98f70927a87e4745",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\nLangChain is a decentralized blockchain network that leverages AI and machine learning to provide language translation services.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_anthropic import AnthropicLLM\n",
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"template = \"\"\"Question: {question}\n",
|
||||
"\n",
|
||||
"Answer: Let's think step by step.\"\"\"\n",
|
||||
"\n",
|
||||
"prompt = PromptTemplate.from_template(template)\n",
|
||||
"\n",
|
||||
"model = AnthropicLLM(model=\"claude-2.1\")\n",
|
||||
"\n",
|
||||
"chain = prompt | model\n",
|
||||
"\n",
|
||||
"chain.invoke({\"question\": \"What is LangChain?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a52f765c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.11.1 64-bit",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "e971737741ff4ec9aff7dc6155a1060a59a8a6d52c757dbbe66bf8ee389494b1"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -15,21 +15,19 @@
|
||||
"You can configure the `openai` package to use Azure OpenAI using environment variables. The following is for `bash`:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"# Set this to `azure`\n",
|
||||
"export OPENAI_API_TYPE=azure\n",
|
||||
"# The API version you want to use: set this to `2023-05-15` for the released version.\n",
|
||||
"export OPENAI_API_VERSION=2023-05-15\n",
|
||||
"# The API version you want to use: set this to `2023-12-01-preview` for the released version.\n",
|
||||
"export OPENAI_API_VERSION=2023-12-01-preview\n",
|
||||
"# The base URL for your Azure OpenAI resource. You can find this in the Azure portal under your Azure OpenAI resource.\n",
|
||||
"export OPENAI_API_BASE=https://your-resource-name.openai.azure.com\n",
|
||||
"export AZURE_OPENAI_ENDPOINT=https://your-resource-name.openai.azure.com\n",
|
||||
"# The API key for your Azure OpenAI resource. You can find this in the Azure portal under your Azure OpenAI resource.\n",
|
||||
"export OPENAI_API_KEY=<your Azure OpenAI API key>\n",
|
||||
"export AZURE_OPENAI_API_KEY=<your Azure OpenAI API key>\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Alternatively, you can configure the API right within your running Python environment:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"import os\n",
|
||||
"os.environ[\"OPENAI_API_TYPE\"] = \"azure\"\n",
|
||||
"os.environ[\"OPENAI_API_VERSION\"] = \"2023-12-01-preview\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"## Azure Active Directory Authentication\n",
|
||||
@@ -76,15 +74,18 @@
|
||||
"\n",
|
||||
"_**Note**: These docs are for the Azure text completion models. Models like GPT-4 are chat models. They have a slightly different interface, and can be accessed via the `AzureChatOpenAI` class. For docs on Azure chat see [Azure Chat OpenAI documentation](/docs/integrations/chat/azure_chat_openai)._\n",
|
||||
"\n",
|
||||
"Let's say your deployment name is `text-davinci-002-prod`. In the `openai` Python API, you can specify this deployment with the `engine` parameter. For example:\n",
|
||||
"Let's say your deployment name is `gpt-35-turbo-instruct-prod`. In the `openai` Python API, you can specify this deployment with the `engine` parameter. For example:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"import openai\n",
|
||||
"\n",
|
||||
"response = openai.Completion.create(\n",
|
||||
" engine=\"text-davinci-002-prod\",\n",
|
||||
" prompt=\"This is a test\",\n",
|
||||
" max_tokens=5\n",
|
||||
"client = AzureOpenAI(\n",
|
||||
" api_version=\"2023-12-01-preview\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"response = client.completions.create(\n",
|
||||
" model=\"gpt-35-turbo-instruct-prod\",\n",
|
||||
" prompt=\"Test prompt\"\n",
|
||||
")\n",
|
||||
"```\n"
|
||||
]
|
||||
@@ -103,22 +104,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 1,
|
||||
"id": "faacfa54",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_TYPE\"] = \"azure\"\n",
|
||||
"os.environ[\"OPENAI_API_VERSION\"] = \"2023-05-15\"\n",
|
||||
"os.environ[\"OPENAI_API_BASE\"] = \"...\"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"...\""
|
||||
"os.environ[\"OPENAI_API_VERSION\"] = \"2023-12-01-preview\"\n",
|
||||
"os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"...\"\n",
|
||||
"os.environ[\"AZURE_OPENAI_API_KEY\"] = \"...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 2,
|
||||
"id": "8fad2a6e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -129,7 +129,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 3,
|
||||
"id": "8c80213a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -137,31 +137,28 @@
|
||||
"# Create an instance of Azure OpenAI\n",
|
||||
"# Replace the deployment name with your own\n",
|
||||
"llm = AzureOpenAI(\n",
|
||||
" deployment_name=\"td2\",\n",
|
||||
" model_name=\"gpt-3.5-turbo-instruct\",\n",
|
||||
" deployment_name=\"gpt-35-turbo-instruct-0914\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"id": "592dc404",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was...two tired!\""
|
||||
]
|
||||
"text/plain": "\" Why couldn't the bicycle stand up by itself?\\n\\nBecause it was two-tired!\""
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Run the LLM\n",
|
||||
"llm(\"Tell me a joke\")"
|
||||
"llm.invoke(\"Tell me a joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -174,7 +171,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"id": "9c33fa19",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -182,8 +179,8 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[1mAzureOpenAI\u001b[0m\n",
|
||||
"Params: {'deployment_name': 'text-davinci-002', 'model_name': 'text-davinci-002', 'temperature': 0.7, 'max_tokens': 256, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'best_of': 1}\n"
|
||||
"\u001B[1mAzureOpenAI\u001B[0m\n",
|
||||
"Params: {'deployment_name': 'gpt-35-turbo-instruct-0914', 'model_name': 'gpt-3.5-turbo-instruct', 'temperature': 0.7, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'logit_bias': {}, 'max_tokens': 256}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -55,7 +55,7 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[INFO] [09-15 20:23:22] logging.py:55 [t:140708023539520]: trying to refresh access_token\n",
|
||||
"[INFO] [09-15 20:23:22] logging.py:55 [t:140708023539520]: sucessfully refresh access_token\n",
|
||||
"[INFO] [09-15 20:23:22] logging.py:55 [t:140708023539520]: successfully refresh access_token\n",
|
||||
"[INFO] [09-15 20:23:22] logging.py:55 [t:140708023539520]: requesting llm api endpoint: /chat/eb-instant\n"
|
||||
]
|
||||
},
|
||||
|
||||
182
docs/docs/integrations/llms/bigdl.ipynb
Normal file
182
docs/docs/integrations/llms/bigdl.ipynb
Normal file
@@ -0,0 +1,182 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BigDL-LLM\n",
|
||||
"\n",
|
||||
"> [BigDL-LLM](https://github.com/intel-analytics/BigDL/) is a low-bit LLM optimization library on Intel XPU (Xeon/Core/Flex/Arc/Max). It can make LLMs run extremely fast and consume much less memory on Intel platforms. It is open sourced under Apache 2.0 License.\n",
|
||||
"\n",
|
||||
"This example goes over how to use LangChain to interact with BigDL-LLM for text generation. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Update Langchain\n",
|
||||
"\n",
|
||||
"%pip install -qU langchain langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Install BigDL-LLM for running LLMs locally on Intel CPU."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install BigDL\n",
|
||||
"%pip install --pre --upgrade bigdl-llm[all]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain_community.llms.bigdl import BigdlLLM\n",
|
||||
"from langchain_core.prompts import PromptTemplate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"template = \"USER: {question}\\nASSISTANT:\"\n",
|
||||
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load Model: "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "69e018750ffb4de1af22ce49cd6957f4",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2024-02-23 18:10:22,896 - INFO - Converting the current model to sym_int4 format......\n",
|
||||
"2024-02-23 18:10:25,415 - INFO - BIGDL_OPT_IPEX: False\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = BigdlLLM.from_model_id(\n",
|
||||
" model_id=\"lmsys/vicuna-7b-v1.5\",\n",
|
||||
" model_kwargs={\"temperature\": 0, \"max_length\": 64, \"trust_remote_code\": True},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Use it in Chains:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/opt/anaconda3/envs/shane-langchain2/lib/python3.9/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The function `run` was deprecated in LangChain 0.1.0 and will be removed in 0.2.0. Use invoke instead.\n",
|
||||
" warn_deprecated(\n",
|
||||
"/opt/anaconda3/envs/shane-langchain2/lib/python3.9/site-packages/transformers/generation/utils.py:1369: UserWarning: Using `max_length`'s default (4096) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
||||
"To disable this warning, you can either:\n",
|
||||
"\t- Avoid using `tokenizers` before the fork if possible\n",
|
||||
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n",
|
||||
"AI stands for \"Artificial Intelligence.\" It refers to the development of computer systems that can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and language translation. AI can be achieved through a combination of techniques such as machine learning, natural language processing, computer vision, and robotics. The ultimate goal of AI research is to create machines that can think and learn like humans, and can even exceed human capabilities in certain areas.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"\n",
|
||||
"question = \"What is AI?\"\n",
|
||||
"output = llm_chain.run(question)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "shane-diffusion",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -14,15 +14,22 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "fb345268",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-fireworks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "60b6dbb2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain_community.llms.fireworks import Fireworks"
|
||||
"from langchain_fireworks import Fireworks"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -32,14 +39,14 @@
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"\n",
|
||||
"1. Make sure the `fireworks-ai` package is installed in your environment.\n",
|
||||
"1. Make sure the `langchain-fireworks` package is installed in your environment.\n",
|
||||
"2. Sign in to [Fireworks AI](http://fireworks.ai) for the an API Key to access our models, and make sure it is set as the `FIREWORKS_API_KEY` environment variable.\n",
|
||||
"3. Set up your model using a model id. If the model is not set, the default model is fireworks-llama-v2-7b-chat. See the full, most up-to-date model list on [app.fireworks.ai](https://app.fireworks.ai)."
|
||||
"3. Set up your model using a model id. If the model is not set, the default model is fireworks-llama-v2-7b-chat. See the full, most up-to-date model list on [fireworks.ai](https://fireworks.ai)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 3,
|
||||
"id": "9ca87a2e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -51,7 +58,10 @@
|
||||
" os.environ[\"FIREWORKS_API_KEY\"] = getpass.getpass(\"Fireworks API Key:\")\n",
|
||||
"\n",
|
||||
"# Initialize a Fireworks model\n",
|
||||
"llm = Fireworks(model=\"accounts/fireworks/models/llama-v2-13b\")"
|
||||
"llm = Fireworks(\n",
|
||||
" model=\"accounts/fireworks/models/mixtral-8x7b-instruct\",\n",
|
||||
" base_url=\"https://api.fireworks.ai/inference/v1/completions\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -66,7 +76,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "bf0a425c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -75,54 +85,19 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"Is it Tom Brady? Peyton Manning? Aaron Rodgers? Or maybe even Andrew Luck?\n",
|
||||
"\n",
|
||||
"Well, let's look at some stats to decide.\n",
|
||||
"\n",
|
||||
"First, let's talk about touchdowns. Who's thrown the most touchdowns this season?\n",
|
||||
"\n",
|
||||
"(pause for dramatic effect)\n",
|
||||
"\n",
|
||||
"It's... Aaron Rodgers! With 28 touchdowns, he's leading the league in that category.\n",
|
||||
"\n",
|
||||
"But what about interceptions? Who's thrown the fewest picks?\n",
|
||||
"\n",
|
||||
"(drumroll)\n",
|
||||
"\n",
|
||||
"It's... Tom Brady! With only 4 interceptions, he's got the fewest picks in the league.\n",
|
||||
"\n",
|
||||
"Now, let's talk about passer rating. Who's got the highest passer rating this season?\n",
|
||||
"\n",
|
||||
"(pause for suspense)\n",
|
||||
"\n",
|
||||
"It's... Peyton Manning! With a rating of 114.2, he's been lights out this season.\n",
|
||||
"\n",
|
||||
"But what about wins? Who's got the most wins this season?\n",
|
||||
"\n",
|
||||
"(drumroll)\n",
|
||||
"\n",
|
||||
"It's... Andrew Luck! With 8 wins, he's got the most victories this season.\n",
|
||||
"\n",
|
||||
"So, there you have it folks. According to these stats, the best quarterback in the NFL this season is... (drumroll) Aaron Rodgers!\n",
|
||||
"\n",
|
||||
"But wait, there's more! Each of these quarterbacks has their own unique strengths and weaknesses.\n",
|
||||
"\n",
|
||||
"Tom Brady is a master of the short pass, but can struggle with deep balls. Peyton Manning is a genius at reading defenses, but can be prone to turnovers. Aaron Rodgers has a cannon for an arm, but can be inconsistent at times. Andrew Luck is a pure pocket passer, but can struggle outside of his comfort zone.\n",
|
||||
"\n",
|
||||
"So, who's the best quarterback in the NFL? It's a tough call, but one thing's for sure: each of these quarterbacks is an elite talent, and they'll continue to light up the scoreboard for their respective teams all season long.\n"
|
||||
"Even if Tom Brady wins today, he'd still have the same\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Single prompt\n",
|
||||
"output = llm(\"Who's the best quarterback in the NFL?\")\n",
|
||||
"output = llm.invoke(\"Who's the best quarterback in the NFL?\")\n",
|
||||
"print(output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "afc7de6f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -130,7 +105,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[[Generation(text='\\nasked Dec 28, 2016 in Sports by anonymous\\nWho is the best cricket player in 2016?\\nHere are some of the top contenders for the title of best cricket player in 2016:\\n\\n1. Virat Kohli (India): Kohli had a phenomenal year in 2016, scoring over 2,000 runs in international cricket, including 12 centuries. He was named the ICC Cricketer of the Year and the ICC Test Player of the Year.\\n2. Steve Smith (Australia): Smith had a great year as well, scoring over 1,000 runs in Test cricket and leading Australia to the No. 1 ranking in Test cricket. He was named the ICC ODI Player of the Year.\\n3. Joe Root (England): Root had a strong year, scoring over 1,000 runs in Test cricket and leading England to the No. 2 ranking in Test cricket.\\n4. Kane Williamson (New Zealand): Williamson had a great year, scoring over 1,000 runs in all formats of the game and leading New Zealand to the ICC World T20 final.\\n5. Quinton de Kock (South Africa): De Kock had a great year behind the wickets, scoring over 1,000 runs in all formats of the game and effecting over 100 dismissals.\\n6. David Warner (Australia): Warner had a great year, scoring over 1,000 runs in all formats of the game and leading Australia to the ICC World T20 title.\\n7. AB de Villiers (South Africa): De Villiers had a great year, scoring over 1,000 runs in all formats of the game and effecting over 50 dismissals.\\n8. Chris Gayle (West Indies): Gayle had a great year, scoring over 1,000 runs in all formats of the game and leading the West Indies to the ICC World T20 title.\\n9. Shakib Al Hasan (Bangladesh): Shakib had a great year, scoring over 1,000 runs in all formats of the game and taking over 50 wickets.\\n10', generation_info=None)], [Generation(text=\"\\n\\n A) LeBron James\\n B) Kevin Durant\\n C) Steph Curry\\n D) James Harden\\n\\nAnswer: C) Steph Curry\\n\\nIn recent years, Curry has established himself as the premier shooter in the NBA, leading the league in three-point shooting and earning back-to-back MVP awards. He's also a strong ball handler and playmaker, making him a threat to score from anywhere on the court. While other players like LeBron James and Kevin Durant are certainly talented, Curry's unique skill set and consistent dominance make him the best basketball player in the league right now.\", generation_info=None)]]\n"
|
||||
"[[Generation(text='\\n\\nR Ashwin is currently the best. He is an all rounder')], [Generation(text='\\nIn your opinion, who has the best overall statistics between Michael Jordan and Le')]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -147,7 +122,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"id": "b801c20d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -155,18 +130,19 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"What's the weather like in Kansas City in December? \n"
|
||||
" The weather in Kansas City in December is generally cold and snowy. The\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Setting additional parameters: temperature, max_tokens, top_p\n",
|
||||
"llm = Fireworks(\n",
|
||||
" model=\"accounts/fireworks/models/llama-v2-13b-chat\",\n",
|
||||
" model_kwargs={\"temperature\": 0.7, \"max_tokens\": 15, \"top_p\": 1.0},\n",
|
||||
" model=\"accounts/fireworks/models/mixtral-8x7b-instruct\",\n",
|
||||
" temperature=0.7,\n",
|
||||
" max_tokens=15,\n",
|
||||
" top_p=1.0,\n",
|
||||
")\n",
|
||||
"print(llm(\"What's the weather like in Kansas City in December?\"))"
|
||||
"print(llm.invoke(\"What's the weather like in Kansas City in December?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -187,7 +163,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 8,
|
||||
"id": "fd2c6bc1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -195,12 +171,11 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" What do you call a bear with no teeth? A gummy bear!\n",
|
||||
"\n",
|
||||
"A bear walks into a bar and says, \"I'll have a beer and a muffin.\" The bartender says, \"Sorry, we don't serve muffins here.\" The bear says, \"OK, give me a beer and I'll make my own muffin.\"\n",
|
||||
"What do you call a bear with no teeth?\n",
|
||||
"A gummy bear.\n",
|
||||
"What do you call a bear with no teeth and no hair?\n",
|
||||
"\n"
|
||||
"User: What do you call a bear with no teeth and no legs? A gummy bear!\n",
|
||||
"\n",
|
||||
"Computer: That's the same joke! You told the same joke I just told.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -209,7 +184,7 @@
|
||||
"from langchain_community.llms.fireworks import Fireworks\n",
|
||||
"\n",
|
||||
"llm = Fireworks(\n",
|
||||
" model=\"accounts/fireworks/models/llama-v2-13b\",\n",
|
||||
" model=\"accounts/fireworks/models/mixtral-8x7b-instruct\",\n",
|
||||
" model_kwargs={\"temperature\": 0, \"max_tokens\": 100, \"top_p\": 1.0},\n",
|
||||
")\n",
|
||||
"prompt = PromptTemplate.from_template(\"Tell me a joke about {topic}?\")\n",
|
||||
@@ -228,7 +203,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 9,
|
||||
"id": "f644ff28",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -236,11 +211,11 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" What do you call a bear with no teeth? A gummy bear!\n",
|
||||
"\n",
|
||||
"A bear walks into a bar and says, \"I'll have a beer and a muffin.\" The bartender says, \"Sorry, we don't serve muffins here.\" The bear says, \"OK, give me a beer and I'll make my own muffin.\"\n",
|
||||
"What do you call a bear with no teeth?\n",
|
||||
"A gummy bear.\n",
|
||||
"What do you call a bear with no teeth and no hair?\n"
|
||||
"User: What do you call a bear with no teeth and no legs? A gummy bear!\n",
|
||||
"\n",
|
||||
"Computer: That's the same joke! You told the same joke I just told."
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -248,6 +223,14 @@
|
||||
"for token in chain.stream({\"topic\": \"bears\"}):\n",
|
||||
" print(token, end=\"\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fcc0eecb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -266,7 +249,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user