mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-08 02:00:06 +00:00
Compare commits
1 Commits
erick/ci-t
...
jacob/peop
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d06cf8e10f |
10
.github/DISCUSSION_TEMPLATE/q-a.yml
vendored
10
.github/DISCUSSION_TEMPLATE/q-a.yml
vendored
@@ -3,18 +3,18 @@ body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for your interest in LangChain 🦜️🔗!
|
||||
Thanks for your interest in 🦜️🔗 LangChain!
|
||||
|
||||
Please follow these instructions, fill every question, and do every step. 🙏
|
||||
|
||||
We're asking for this because answering questions and solving problems in GitHub takes a lot of time --
|
||||
this is time that we cannot spend on adding new features, fixing bugs, writing documentation or reviewing pull requests.
|
||||
this is time that we cannot spend on adding new features, fixing bugs, write documentation or reviewing pull requests.
|
||||
|
||||
By asking questions in a structured way (following this) it will be much easier for us to help you.
|
||||
By asking questions in a structured way (following this) it will be much easier to help you.
|
||||
|
||||
There's a high chance that by following this process, you'll find the solution on your own, eliminating the need to submit a question and wait for an answer. 😎
|
||||
And there's a high chance that you will find the solution along the way and you won't even have to submit it and wait for an answer. 😎
|
||||
|
||||
As there are many questions submitted every day, we will **DISCARD** and close the incomplete ones.
|
||||
As there are too many questions, we will **DISCARD** and close the incomplete ones.
|
||||
|
||||
That will allow us (and others) to focus on helping people like you that follow the whole process. 🤓
|
||||
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
2
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
@@ -35,8 +35,6 @@ body:
|
||||
required: true
|
||||
- label: I am sure that this is a bug in LangChain rather than my code.
|
||||
required: true
|
||||
- label: The bug is not resolved by updating to the latest stable version of LangChain (or the specific integration package).
|
||||
required: true
|
||||
- type: textarea
|
||||
id: reproduction
|
||||
validations:
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/privileged.yml
vendored
2
.github/ISSUE_TEMPLATE/privileged.yml
vendored
@@ -9,7 +9,7 @@ body:
|
||||
If you are not a LangChain maintainer or were not asked directly by a maintainer to create an issue, then please start the conversation in a [Question in GitHub Discussions](https://github.com/langchain-ai/langchain/discussions/categories/q-a) instead.
|
||||
|
||||
You are a LangChain maintainer if you maintain any of the packages inside of the LangChain repository
|
||||
or are a regular contributor to LangChain with previous merged pull requests.
|
||||
or are a regular contributor to LangChain with previous merged merged pull requests.
|
||||
- type: checkboxes
|
||||
id: privileged
|
||||
attributes:
|
||||
|
||||
61
.github/scripts/check_diff.py
vendored
61
.github/scripts/check_diff.py
vendored
@@ -1,23 +1,17 @@
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
from typing import Dict
|
||||
|
||||
LANGCHAIN_DIRS = [
|
||||
LANGCHAIN_DIRS = {
|
||||
"libs/core",
|
||||
"libs/langchain",
|
||||
"libs/experimental",
|
||||
"libs/community",
|
||||
]
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
files = sys.argv[1:]
|
||||
|
||||
dirs_to_run: Dict[str, set] = {
|
||||
"lint": set(),
|
||||
"test": set(),
|
||||
"extended-test": set(),
|
||||
}
|
||||
dirs_to_run = set()
|
||||
|
||||
if len(files) == 300:
|
||||
# max diff length is 300 files - there are likely files missing
|
||||
@@ -30,42 +24,27 @@ if __name__ == "__main__":
|
||||
".github/workflows",
|
||||
".github/tools",
|
||||
".github/actions",
|
||||
"libs/core",
|
||||
".github/scripts/check_diff.py",
|
||||
)
|
||||
):
|
||||
# add all LANGCHAIN_DIRS for infra changes
|
||||
dirs_to_run["extended-test"].update(LANGCHAIN_DIRS)
|
||||
dirs_to_run["lint"].add(".")
|
||||
|
||||
if any(file.startswith(dir_) for dir_ in LANGCHAIN_DIRS):
|
||||
# add that dir and all dirs after in LANGCHAIN_DIRS
|
||||
# for extended testing
|
||||
found = False
|
||||
for dir_ in LANGCHAIN_DIRS:
|
||||
if file.startswith(dir_):
|
||||
found = True
|
||||
if found:
|
||||
dirs_to_run["extended-test"].add(dir_)
|
||||
elif file.startswith("libs/partners"):
|
||||
dirs_to_run.update(LANGCHAIN_DIRS)
|
||||
elif "libs/community" in file:
|
||||
dirs_to_run.update(
|
||||
("libs/community", "libs/langchain", "libs/experimental")
|
||||
)
|
||||
elif "libs/partners" in file:
|
||||
partner_dir = file.split("/")[2]
|
||||
if os.path.isdir(f"libs/partners/{partner_dir}"):
|
||||
dirs_to_run["test"].add(f"libs/partners/{partner_dir}")
|
||||
dirs_to_run.add(f"libs/partners/{partner_dir}")
|
||||
# Skip if the directory was deleted
|
||||
elif "libs/langchain" in file:
|
||||
dirs_to_run.update(("libs/langchain", "libs/experimental"))
|
||||
elif "libs/experimental" in file:
|
||||
dirs_to_run.add("libs/experimental")
|
||||
elif file.startswith("libs/"):
|
||||
raise ValueError(
|
||||
f"Unknown lib: {file}. check_diff.py likely needs "
|
||||
"an update for this new library!"
|
||||
)
|
||||
elif any(file.startswith(p) for p in ["docs/", "templates/", "cookbook/"]):
|
||||
dirs_to_run["lint"].add(".")
|
||||
|
||||
outputs = {
|
||||
"dirs-to-lint": list(
|
||||
dirs_to_run["lint"] | dirs_to_run["test"] | dirs_to_run["extended-test"]
|
||||
),
|
||||
"dirs-to-test": list(dirs_to_run["test"] | dirs_to_run["extended-test"]),
|
||||
"dirs-to-extended-test": list(dirs_to_run["extended-test"]),
|
||||
}
|
||||
for key, value in outputs.items():
|
||||
json_output = json.dumps(value)
|
||||
print(f"{key}={json_output}") # noqa: T201
|
||||
dirs_to_run.update(LANGCHAIN_DIRS)
|
||||
else:
|
||||
pass
|
||||
json_output = json.dumps(list(dirs_to_run))
|
||||
print(f"dirs-to-run={json_output}") # noqa: T201
|
||||
|
||||
110
.github/workflows/_all_ci.yml
vendored
Normal file
110
.github/workflows/_all_ci.yml
vendored
Normal file
@@ -0,0 +1,110 @@
|
||||
---
|
||||
name: langchain CI
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
working-directory:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
working-directory:
|
||||
required: true
|
||||
type: choice
|
||||
default: 'libs/langchain'
|
||||
options:
|
||||
- libs/langchain
|
||||
- libs/core
|
||||
- libs/experimental
|
||||
- libs/community
|
||||
|
||||
|
||||
# If another push to the same PR or branch happens while this workflow is still running,
|
||||
# cancel the earlier run in favor of the next run.
|
||||
#
|
||||
# There's no point in testing an outdated version of the code. GitHub only allows
|
||||
# a limited number of job runners to be active at the same time, so it's better to cancel
|
||||
# pointless jobs early so that more useful jobs can run sooner.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ inputs.working-directory }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.7.1"
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
name: "-"
|
||||
uses: ./.github/workflows/_lint.yml
|
||||
with:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
secrets: inherit
|
||||
|
||||
test:
|
||||
name: "-"
|
||||
uses: ./.github/workflows/_test.yml
|
||||
with:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
secrets: inherit
|
||||
|
||||
compile-integration-tests:
|
||||
name: "-"
|
||||
uses: ./.github/workflows/_compile_integration_test.yml
|
||||
with:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
secrets: inherit
|
||||
|
||||
dependencies:
|
||||
name: "-"
|
||||
uses: ./.github/workflows/_dependencies.yml
|
||||
with:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
secrets: inherit
|
||||
|
||||
extended-tests:
|
||||
name: "make extended_tests #${{ matrix.python-version }}"
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
if: ${{ ! startsWith(inputs.working-directory, 'libs/partners/') }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: extended
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Running extended tests, installing dependencies with poetry..."
|
||||
poetry install -E extended_testing --with test
|
||||
|
||||
- name: Run extended tests
|
||||
run: make extended_tests
|
||||
|
||||
- name: Ensure the tests did not create any additional files
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
STATUS="$(git status)"
|
||||
echo "$STATUS"
|
||||
|
||||
# grep will exit non-zero if the target message isn't found,
|
||||
# and `set -e` above will cause the step to fail.
|
||||
echo "$STATUS" | grep 'nothing to commit, working tree clean'
|
||||
4
.github/workflows/_dependencies.yml
vendored
4
.github/workflows/_dependencies.yml
vendored
@@ -63,8 +63,6 @@ jobs:
|
||||
- name: Install the opposite major version of pydantic
|
||||
# If normal tests use pydantic v1, here we'll use v2, and vice versa.
|
||||
shell: bash
|
||||
# airbyte currently doesn't support pydantic v2
|
||||
if: ${{ !startsWith(inputs.working-directory, 'libs/partners/airbyte') }}
|
||||
run: |
|
||||
# Determine the major part of pydantic version
|
||||
REGULAR_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
|
||||
@@ -99,8 +97,6 @@ jobs:
|
||||
fi
|
||||
echo "Found pydantic version ${CURRENT_VERSION}, as expected"
|
||||
- name: Run pydantic compatibility tests
|
||||
# airbyte currently doesn't support pydantic v2
|
||||
if: ${{ !startsWith(inputs.working-directory, 'libs/partners/airbyte') }}
|
||||
shell: bash
|
||||
run: make test
|
||||
|
||||
|
||||
4
.github/workflows/_integration_test.yml
vendored
4
.github/workflows/_integration_test.yml
vendored
@@ -70,10 +70,6 @@ jobs:
|
||||
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
|
||||
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
|
||||
ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }}
|
||||
ES_URL: ${{ secrets.ES_URL }}
|
||||
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
|
||||
ES_API_KEY: ${{ secrets.ES_API_KEY }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # for airbyte
|
||||
run: |
|
||||
make integration_tests
|
||||
|
||||
|
||||
5
.github/workflows/_release.yml
vendored
5
.github/workflows/_release.yml
vendored
@@ -181,7 +181,6 @@ jobs:
|
||||
NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
|
||||
GOOGLE_SEARCH_API_KEY: ${{ secrets.GOOGLE_SEARCH_API_KEY }}
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
|
||||
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
|
||||
NOMIC_API_KEY: ${{ secrets.NOMIC_API_KEY }}
|
||||
WATSONX_APIKEY: ${{ secrets.WATSONX_APIKEY }}
|
||||
@@ -191,10 +190,6 @@ jobs:
|
||||
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
|
||||
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
|
||||
ASTRA_DB_KEYSPACE: ${{ secrets.ASTRA_DB_KEYSPACE }}
|
||||
ES_URL: ${{ secrets.ES_URL }}
|
||||
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
|
||||
ES_API_KEY: ${{ secrets.ES_API_KEY }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # for airbyte
|
||||
run: make integration_tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
|
||||
21
.github/workflows/api_doc_build.yml
vendored
21
.github/workflows/api_doc_build.yml
vendored
@@ -15,48 +15,32 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
ref: bagatur/api_docs_build
|
||||
path: langchain
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: langchain-ai/langchain-google
|
||||
path: langchain-google
|
||||
- name: Move google libs
|
||||
run: |
|
||||
rm -rf langchain/libs/partners/google-genai langchain/libs/partners/google-vertexai
|
||||
mv langchain-google/libs/genai langchain/libs/partners/google-genai
|
||||
mv langchain-google/libs/vertexai langchain/libs/partners/google-vertexai
|
||||
|
||||
- name: Set Git config
|
||||
working-directory: langchain
|
||||
run: |
|
||||
git config --local user.email "actions@github.com"
|
||||
git config --local user.name "Github Actions"
|
||||
|
||||
- name: Merge master
|
||||
working-directory: langchain
|
||||
run: |
|
||||
git fetch origin master
|
||||
git merge origin/master -m "Merge master" --allow-unrelated-histories -X theirs
|
||||
|
||||
- name: Set up Python ${{ env.PYTHON_VERSION }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./langchain/.github/actions/poetry_setup"
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
cache-key: api-docs
|
||||
working-directory: langchain
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: langchain
|
||||
run: |
|
||||
poetry run python -m pip install --upgrade --no-cache-dir pip setuptools
|
||||
poetry run python -m pip install --upgrade --no-cache-dir sphinx readthedocs-sphinx-ext
|
||||
# skip airbyte and ibm due to pandas dependency issue
|
||||
poetry run python -m pip install $(ls ./libs/partners | grep -vE "airbyte|ibm" | xargs -I {} echo "./libs/partners/{}")
|
||||
poetry run python -m pip install ./libs/partners/*
|
||||
poetry run python -m pip install --exists-action=w --no-cache-dir -r docs/api_reference/requirements.txt
|
||||
|
||||
- name: Build docs
|
||||
working-directory: langchain
|
||||
run: |
|
||||
poetry run python -m pip install --upgrade --no-cache-dir pip setuptools
|
||||
poetry run python docs/api_reference/create_api_rst.py
|
||||
@@ -65,5 +49,4 @@ jobs:
|
||||
# https://github.com/marketplace/actions/add-commit
|
||||
- uses: EndBug/add-and-commit@v9
|
||||
with:
|
||||
cwd: langchain
|
||||
message: 'Update API docs build'
|
||||
|
||||
121
.github/workflows/check_diffs.yml
vendored
121
.github/workflows/check_diffs.yml
vendored
@@ -16,9 +16,6 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.7.1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -33,125 +30,15 @@ jobs:
|
||||
run: |
|
||||
python .github/scripts/check_diff.py ${{ steps.files.outputs.all }} >> $GITHUB_OUTPUT
|
||||
outputs:
|
||||
dirs-to-lint: ${{ steps.set-matrix.outputs.dirs-to-lint }}
|
||||
dirs-to-test: ${{ steps.set-matrix.outputs.dirs-to-test }}
|
||||
dirs-to-extended-test: ${{ steps.set-matrix.outputs.dirs-to-extended-test }}
|
||||
lint:
|
||||
dirs-to-run: ${{ steps.set-matrix.outputs.dirs-to-run }}
|
||||
ci:
|
||||
name: cd ${{ matrix.working-directory }}
|
||||
needs: [ build ]
|
||||
if: ${{ needs.build.outputs.dirs-to-lint != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-lint) }}
|
||||
uses: ./.github/workflows/_lint.yml
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-run) }}
|
||||
uses: ./.github/workflows/_all_ci.yml
|
||||
with:
|
||||
working-directory: ${{ matrix.working-directory }}
|
||||
secrets: inherit
|
||||
|
||||
test:
|
||||
name: cd ${{ matrix.working-directory }}
|
||||
needs: [ build ]
|
||||
if: ${{ needs.build.outputs.dirs-to-test != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-test) }}
|
||||
uses: ./.github/workflows/_test.yml
|
||||
with:
|
||||
working-directory: ${{ matrix.working-directory }}
|
||||
secrets: inherit
|
||||
|
||||
compile-integration-tests:
|
||||
name: cd ${{ matrix.working-directory }}
|
||||
needs: [ build ]
|
||||
if: ${{ needs.build.outputs.dirs-to-test != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-test) }}
|
||||
uses: ./.github/workflows/_compile_integration_test.yml
|
||||
with:
|
||||
working-directory: ${{ matrix.working-directory }}
|
||||
secrets: inherit
|
||||
|
||||
dependencies:
|
||||
name: cd ${{ matrix.working-directory }}
|
||||
needs: [ build ]
|
||||
if: ${{ needs.build.outputs.dirs-to-test != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-test) }}
|
||||
uses: ./.github/workflows/_dependencies.yml
|
||||
with:
|
||||
working-directory: ${{ matrix.working-directory }}
|
||||
secrets: inherit
|
||||
|
||||
extended-tests:
|
||||
name: "cd ${{ matrix.working-directory }} / make extended_tests #${{ matrix.python-version }}"
|
||||
needs: [ build ]
|
||||
if: ${{ needs.build.outputs.dirs-to-extended-test != '[]' }}
|
||||
strategy:
|
||||
matrix:
|
||||
# note different variable for extended test dirs
|
||||
working-directory: ${{ fromJson(needs.build.outputs.dirs-to-extended-test) }}
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ matrix.working-directory }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ matrix.working-directory }}
|
||||
cache-key: extended
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Running extended tests, installing dependencies with poetry..."
|
||||
poetry install -E extended_testing --with test
|
||||
|
||||
- name: Run extended tests
|
||||
run: |
|
||||
echo "sleeping 150"
|
||||
sleep 150
|
||||
echo "sleeping 151"
|
||||
sleep 151
|
||||
echo "done sleeping lets test"
|
||||
make extended_tests
|
||||
|
||||
- name: Ensure the tests did not create any additional files
|
||||
shell: bash
|
||||
run: |
|
||||
set -eu
|
||||
|
||||
STATUS="$(git status)"
|
||||
echo "$STATUS"
|
||||
|
||||
# grep will exit non-zero if the target message isn't found,
|
||||
# and `set -e` above will cause the step to fail.
|
||||
echo "$STATUS" | grep 'nothing to commit, working tree clean'
|
||||
ci_success:
|
||||
name: "CI Success"
|
||||
needs: [build, lint, test, compile-integration-tests, dependencies, extended-tests]
|
||||
if: |
|
||||
always()
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
JOBS_JSON: ${{ toJSON(needs) }}
|
||||
RESULTS_JSON: ${{ toJSON(needs.*.result) }}
|
||||
EXIT_CODE: ${{!contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') && '0' || '1'}}
|
||||
steps:
|
||||
- name: "CI Success"
|
||||
run: |
|
||||
echo $JOBS_JSON
|
||||
echo $RESULTS_JSON
|
||||
echo "Exiting with $EXIT_CODE"
|
||||
exit $EXIT_CODE
|
||||
|
||||
2
.github/workflows/codespell.yml
vendored
2
.github/workflows/codespell.yml
vendored
@@ -32,6 +32,6 @@ jobs:
|
||||
- name: Codespell
|
||||
uses: codespell-project/actions-codespell@v2
|
||||
with:
|
||||
skip: guide_imports.json,*.ambr,./cookbook/data/imdb_top_1000.csv
|
||||
skip: guide_imports.json
|
||||
ignore_words_list: ${{ steps.extract_ignore_words.outputs.ignore_words_list }}
|
||||
exclude_file: libs/community/langchain_community/llms/yuan2.py
|
||||
|
||||
37
.github/workflows/doc_lint.yml
vendored
Normal file
37
.github/workflows/doc_lint.yml
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
---
|
||||
name: CI / cd .
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
pull_request:
|
||||
paths:
|
||||
- 'docs/**'
|
||||
- 'templates/**'
|
||||
- 'cookbook/**'
|
||||
- '.github/workflows/_lint.yml'
|
||||
- '.github/workflows/doc_lint.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
check:
|
||||
name: Check for "from langchain import x" imports
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Run import check
|
||||
run: |
|
||||
# We should not encourage imports directly from main init file
|
||||
# Expect for hub
|
||||
git grep 'from langchain import' {docs/docs,templates,cookbook} | grep -vE 'from langchain import (hub)' && exit 1 || exit 0
|
||||
|
||||
lint:
|
||||
name: "-"
|
||||
uses:
|
||||
./.github/workflows/_lint.yml
|
||||
with:
|
||||
working-directory: "."
|
||||
secrets: inherit
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -115,10 +115,13 @@ celerybeat.pid
|
||||
# Environments
|
||||
.env
|
||||
.envrc
|
||||
.venv*
|
||||
.venv
|
||||
.venvs
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
|
||||
2
Makefile
2
Makefile
@@ -50,13 +50,11 @@ lint lint_package lint_tests:
|
||||
poetry run ruff docs templates cookbook
|
||||
poetry run ruff format docs templates cookbook --diff
|
||||
poetry run ruff --select I docs templates cookbook
|
||||
git grep 'from langchain import' {docs/docs,templates,cookbook} | grep -vE 'from langchain import (hub)' && exit 1 || exit 0
|
||||
|
||||
format format_diff:
|
||||
poetry run ruff format docs templates cookbook
|
||||
poetry run ruff --select I --fix docs templates cookbook
|
||||
|
||||
|
||||
######################
|
||||
# HELP
|
||||
######################
|
||||
|
||||
@@ -520,7 +520,7 @@
|
||||
"source": [
|
||||
"import re\n",
|
||||
"\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain_core.runnables import RunnableLambda\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
@@ -1,200 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-airbyte"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"GITHUB_TOKEN = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_airbyte import AirbyteLoader\n",
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"loader = AirbyteLoader(\n",
|
||||
" source=\"source-github\",\n",
|
||||
" stream=\"pull_requests\",\n",
|
||||
" config={\n",
|
||||
" \"credentials\": {\"personal_access_token\": GITHUB_TOKEN},\n",
|
||||
" \"repositories\": [\"langchain-ai/langchain\"],\n",
|
||||
" },\n",
|
||||
" template=PromptTemplate.from_template(\n",
|
||||
" \"\"\"# {title}\n",
|
||||
"by {user[login]}\n",
|
||||
"\n",
|
||||
"{body}\"\"\"\n",
|
||||
" ),\n",
|
||||
" include_metadata=False,\n",
|
||||
")\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"# Updated partners/ibm README\n",
|
||||
"by williamdevena\n",
|
||||
"\n",
|
||||
"## PR title\n",
|
||||
"partners: changed the README file for the IBM Watson AI integration in the libs/partners/ibm folder.\n",
|
||||
"\n",
|
||||
"## PR message\n",
|
||||
"Description: Changed the README file of partners/ibm following the docs on https://python.langchain.com/docs/integrations/llms/ibm_watsonx\n",
|
||||
"\n",
|
||||
"The README includes:\n",
|
||||
"\n",
|
||||
"- Brief description\n",
|
||||
"- Installation\n",
|
||||
"- Setting-up instructions (API key, project id, ...)\n",
|
||||
"- Basic usage:\n",
|
||||
" - Loading the model\n",
|
||||
" - Direct inference\n",
|
||||
" - Chain invoking\n",
|
||||
" - Streaming the model output\n",
|
||||
" \n",
|
||||
"Issue: https://github.com/langchain-ai/langchain/issues/17545\n",
|
||||
"\n",
|
||||
"Dependencies: None\n",
|
||||
"\n",
|
||||
"Twitter handle: None\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[-2].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"10283"
|
||||
]
|
||||
},
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tiktoken\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"enc = tiktoken.get_encoding(\"cl100k_base\")\n",
|
||||
"\n",
|
||||
"vectorstore = Chroma.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embedding=OpenAIEmbeddings(\n",
|
||||
" disallowed_special=(enc.special_tokens_set - {\"<|endofprompt|>\"})\n",
|
||||
" ),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 40,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = vectorstore.as_retriever()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='# Updated partners/ibm README\\nby williamdevena\\n\\n## PR title\\r\\npartners: changed the README file for the IBM Watson AI integration in the libs/partners/ibm folder.\\r\\n\\r\\n## PR message\\r\\nDescription: Changed the README file of partners/ibm following the docs on https://python.langchain.com/docs/integrations/llms/ibm_watsonx\\r\\n\\r\\nThe README includes:\\r\\n\\r\\n- Brief description\\r\\n- Installation\\r\\n- Setting-up instructions (API key, project id, ...)\\r\\n- Basic usage:\\r\\n - Loading the model\\r\\n - Direct inference\\r\\n - Chain invoking\\r\\n - Streaming the model output\\r\\n \\r\\nIssue: https://github.com/langchain-ai/langchain/issues/17545\\r\\n\\r\\nDependencies: None\\r\\n\\r\\nTwitter handle: None'),\n",
|
||||
" Document(page_content='# Updated partners/ibm README\\nby williamdevena\\n\\n## PR title\\r\\npartners: changed the README file for the IBM Watson AI integration in the `libs/partners/ibm` folder. \\r\\n\\r\\n\\r\\n\\r\\n## PR message\\r\\n- **Description:** Changed the README file of partners/ibm following the docs on https://python.langchain.com/docs/integrations/llms/ibm_watsonx\\r\\n\\r\\n The README includes:\\r\\n - Brief description\\r\\n - Installation\\r\\n - Setting-up instructions (API key, project id, ...)\\r\\n - Basic usage:\\r\\n - Loading the model\\r\\n - Direct inference\\r\\n - Chain invoking\\r\\n - Streaming the model output\\r\\n\\r\\n\\r\\n- **Issue:** #17545\\r\\n- **Dependencies:** None\\r\\n- **Twitter handle:** None'),\n",
|
||||
" Document(page_content='# IBM: added partners package `langchain_ibm`, added llm\\nby MateuszOssGit\\n\\n - **Description:** Added `langchain_ibm` as an langchain partners package of IBM [watsonx.ai](https://www.ibm.com/products/watsonx-ai) LLM provider (`WatsonxLLM`)\\r\\n - **Dependencies:** [ibm-watsonx-ai](https://pypi.org/project/ibm-watsonx-ai/),\\r\\n - **Tag maintainer:** : \\r\\n\\r\\nPlease make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. ✅'),\n",
|
||||
" Document(page_content='# Add WatsonX support\\nby baptistebignaud\\n\\nIt is a connector to use a LLM from WatsonX.\\r\\nIt requires python SDK \"ibm-generative-ai\"\\r\\n\\r\\n(It might not be perfect since it is my first PR on a public repository 😄)')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever.invoke(\"pull requests related to IBM\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -167,7 +167,7 @@
|
||||
"from langchain.llms import LlamaCpp\n",
|
||||
"from langchain.memory import ConversationTokenBufferMemory\n",
|
||||
"from langchain.prompts import PromptTemplate, load_prompt\n",
|
||||
"from langchain_core.messages import SystemMessage\n",
|
||||
"from langchain.schema import SystemMessage\n",
|
||||
"from langchain_experimental.chat_models import Llama2Chat\n",
|
||||
"from quixstreams import Application, State, message_key\n",
|
||||
"\n",
|
||||
|
||||
@@ -42,9 +42,9 @@
|
||||
")\n",
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.prompts import StringPromptTemplate\n",
|
||||
"from langchain.schema import AgentAction, AgentFinish\n",
|
||||
"from langchain_community.agent_toolkits import NLAToolkit\n",
|
||||
"from langchain_community.tools.plugin import AIPlugin\n",
|
||||
"from langchain_core.agents import AgentAction, AgentFinish\n",
|
||||
"from langchain_openai import OpenAI"
|
||||
]
|
||||
},
|
||||
@@ -114,8 +114,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -67,9 +67,9 @@
|
||||
")\n",
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.prompts import StringPromptTemplate\n",
|
||||
"from langchain.schema import AgentAction, AgentFinish\n",
|
||||
"from langchain_community.agent_toolkits import NLAToolkit\n",
|
||||
"from langchain_community.tools.plugin import AIPlugin\n",
|
||||
"from langchain_core.agents import AgentAction, AgentFinish\n",
|
||||
"from langchain_openai import OpenAI"
|
||||
]
|
||||
},
|
||||
@@ -138,8 +138,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -40,8 +40,8 @@
|
||||
")\n",
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.prompts import StringPromptTemplate\n",
|
||||
"from langchain.schema import AgentAction, AgentFinish\n",
|
||||
"from langchain_community.utilities import SerpAPIWrapper\n",
|
||||
"from langchain_core.agents import AgentAction, AgentFinish\n",
|
||||
"from langchain_openai import OpenAI"
|
||||
]
|
||||
},
|
||||
@@ -103,8 +103,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain_community.vectorstores import FAISS\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -72,7 +72,7 @@
|
||||
"source": [
|
||||
"from typing import Any, List, Tuple, Union\n",
|
||||
"\n",
|
||||
"from langchain_core.agents import AgentAction, AgentFinish\n",
|
||||
"from langchain.schema import AgentAction, AgentFinish\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class FakeAgent(BaseMultiActionAgent):\n",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,245 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0fc0309d-4d49-4bb5-bec0-bd92c6fddb28",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fireworks.AI + LangChain + RAG\n",
|
||||
" \n",
|
||||
"[Fireworks AI](https://python.langchain.com/docs/integrations/llms/fireworks) wants to provide the best experience when working with LangChain, and here is an example of Fireworks + LangChain doing RAG\n",
|
||||
"\n",
|
||||
"See [our models page](https://fireworks.ai/models) for the full list of models. We use `accounts/fireworks/models/mixtral-8x7b-instruct` for RAG In this tutorial.\n",
|
||||
"\n",
|
||||
"For the RAG target, we will use the Gemma technical report https://storage.googleapis.com/deepmind-media/gemma/gemma-report.pdf "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "d12fb75a-f707-48d5-82a5-efe2d041813c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||
"Found existing installation: langchain-fireworks 0.0.1\n",
|
||||
"Uninstalling langchain-fireworks-0.0.1:\n",
|
||||
" Successfully uninstalled langchain-fireworks-0.0.1\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||
"Obtaining file:///mnt/disks/data/langchain/libs/partners/fireworks\n",
|
||||
" Installing build dependencies ... \u001b[?25ldone\n",
|
||||
"\u001b[?25h Checking if build backend supports build_editable ... \u001b[?25ldone\n",
|
||||
"\u001b[?25h Getting requirements to build editable ... \u001b[?25ldone\n",
|
||||
"\u001b[?25h Preparing editable metadata (pyproject.toml) ... \u001b[?25ldone\n",
|
||||
"\u001b[?25hRequirement already satisfied: aiohttp<4.0.0,>=3.9.1 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-fireworks==0.0.1) (3.9.3)\n",
|
||||
"Requirement already satisfied: fireworks-ai<0.13.0,>=0.12.0 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-fireworks==0.0.1) (0.12.0)\n",
|
||||
"Requirement already satisfied: langchain-core<0.2,>=0.1 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-fireworks==0.0.1) (0.1.23)\n",
|
||||
"Requirement already satisfied: requests<3,>=2 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-fireworks==0.0.1) (2.31.0)\n",
|
||||
"Requirement already satisfied: aiosignal>=1.1.2 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.9.1->langchain-fireworks==0.0.1) (1.3.1)\n",
|
||||
"Requirement already satisfied: attrs>=17.3.0 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.9.1->langchain-fireworks==0.0.1) (23.1.0)\n",
|
||||
"Requirement already satisfied: frozenlist>=1.1.1 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.9.1->langchain-fireworks==0.0.1) (1.4.0)\n",
|
||||
"Requirement already satisfied: multidict<7.0,>=4.5 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.9.1->langchain-fireworks==0.0.1) (6.0.4)\n",
|
||||
"Requirement already satisfied: yarl<2.0,>=1.0 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.9.1->langchain-fireworks==0.0.1) (1.9.2)\n",
|
||||
"Requirement already satisfied: async-timeout<5.0,>=4.0 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.9.1->langchain-fireworks==0.0.1) (4.0.3)\n",
|
||||
"Requirement already satisfied: httpx in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (0.26.0)\n",
|
||||
"Requirement already satisfied: httpx-sse in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (0.4.0)\n",
|
||||
"Requirement already satisfied: pydantic in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (2.4.2)\n",
|
||||
"Requirement already satisfied: Pillow in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (10.2.0)\n",
|
||||
"Requirement already satisfied: PyYAML>=5.3 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (6.0.1)\n",
|
||||
"Requirement already satisfied: anyio<5,>=3 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (3.7.1)\n",
|
||||
"Requirement already satisfied: jsonpatch<2.0,>=1.33 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (1.33)\n",
|
||||
"Requirement already satisfied: langsmith<0.2.0,>=0.1.0 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (0.1.5)\n",
|
||||
"Requirement already satisfied: packaging<24.0,>=23.2 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (23.2)\n",
|
||||
"Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (8.2.3)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from requests<3,>=2->langchain-fireworks==0.0.1) (3.3.0)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from requests<3,>=2->langchain-fireworks==0.0.1) (3.4)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from requests<3,>=2->langchain-fireworks==0.0.1) (2.0.6)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from requests<3,>=2->langchain-fireworks==0.0.1) (2023.7.22)\n",
|
||||
"Requirement already satisfied: sniffio>=1.1 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from anyio<5,>=3->langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (1.3.0)\n",
|
||||
"Requirement already satisfied: exceptiongroup in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from anyio<5,>=3->langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (1.1.3)\n",
|
||||
"Requirement already satisfied: jsonpointer>=1.9 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.2,>=0.1->langchain-fireworks==0.0.1) (2.4)\n",
|
||||
"Requirement already satisfied: annotated-types>=0.4.0 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from pydantic->fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (0.5.0)\n",
|
||||
"Requirement already satisfied: pydantic-core==2.10.1 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from pydantic->fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (2.10.1)\n",
|
||||
"Requirement already satisfied: typing-extensions>=4.6.1 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from pydantic->fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (4.8.0)\n",
|
||||
"Requirement already satisfied: httpcore==1.* in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from httpx->fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (1.0.2)\n",
|
||||
"Requirement already satisfied: h11<0.15,>=0.13 in /mnt/disks/data/langchain/.venv/lib/python3.9/site-packages (from httpcore==1.*->httpx->fireworks-ai<0.13.0,>=0.12.0->langchain-fireworks==0.0.1) (0.14.0)\n",
|
||||
"Building wheels for collected packages: langchain-fireworks\n",
|
||||
" Building editable for langchain-fireworks (pyproject.toml) ... \u001b[?25ldone\n",
|
||||
"\u001b[?25h Created wheel for langchain-fireworks: filename=langchain_fireworks-0.0.1-py3-none-any.whl size=2228 sha256=564071b120b09ec31f2dc737733448a33bbb26e40b49fcde0c129ad26045259d\n",
|
||||
" Stored in directory: /tmp/pip-ephem-wheel-cache-oz368vdk/wheels/e0/ad/31/d7e76dd73d61905ff7f369f5b0d21a4b5e7af4d3cb7487aece\n",
|
||||
"Successfully built langchain-fireworks\n",
|
||||
"Installing collected packages: langchain-fireworks\n",
|
||||
"Successfully installed langchain-fireworks-0.0.1\n",
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --quiet pypdf chromadb tiktoken openai \n",
|
||||
"%pip uninstall -y langchain-fireworks\n",
|
||||
"%pip install --editable /mnt/disks/data/langchain/libs/partners/fireworks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "cf719376",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<module 'fireworks' from '/mnt/disks/data/langchain/.venv/lib/python3.9/site-packages/fireworks/__init__.py'>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import fireworks\n",
|
||||
"\n",
|
||||
"print(fireworks)\n",
|
||||
"import fireworks.client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9ab49327-0532-4480-804c-d066c302a322",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load\n",
|
||||
"import requests\n",
|
||||
"from langchain_community.document_loaders import PyPDFLoader\n",
|
||||
"\n",
|
||||
"# Download the PDF from a URL and save it to a temporary location\n",
|
||||
"url = \"https://storage.googleapis.com/deepmind-media/gemma/gemma-report.pdf\"\n",
|
||||
"response = requests.get(url, stream=True)\n",
|
||||
"file_name = \"temp_file.pdf\"\n",
|
||||
"with open(file_name, \"wb\") as pdf:\n",
|
||||
" pdf.write(response.content)\n",
|
||||
"\n",
|
||||
"loader = PyPDFLoader(file_name)\n",
|
||||
"data = loader.load()\n",
|
||||
"\n",
|
||||
"# Split\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)\n",
|
||||
"all_splits = text_splitter.split_documents(data)\n",
|
||||
"\n",
|
||||
"# Add to vectorDB\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_fireworks.embeddings import FireworksEmbeddings\n",
|
||||
"\n",
|
||||
"vectorstore = Chroma.from_documents(\n",
|
||||
" documents=all_splits,\n",
|
||||
" collection_name=\"rag-chroma\",\n",
|
||||
" embedding=FireworksEmbeddings(),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"retriever = vectorstore.as_retriever()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "4efaddd9-3dbb-455c-ba54-0ad7f2d2ce0f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.pydantic_v1 import BaseModel\n",
|
||||
"from langchain_core.runnables import RunnableParallel, RunnablePassthrough\n",
|
||||
"\n",
|
||||
"# RAG prompt\n",
|
||||
"template = \"\"\"Answer the question based only on the following context:\n",
|
||||
"{context}\n",
|
||||
"\n",
|
||||
"Question: {question}\n",
|
||||
"\"\"\"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(template)\n",
|
||||
"\n",
|
||||
"# LLM\n",
|
||||
"from langchain_together import Together\n",
|
||||
"\n",
|
||||
"llm = Together(\n",
|
||||
" model=\"mistralai/Mixtral-8x7B-Instruct-v0.1\",\n",
|
||||
" temperature=0.0,\n",
|
||||
" max_tokens=2000,\n",
|
||||
" top_k=1,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# RAG chain\n",
|
||||
"chain = (\n",
|
||||
" RunnableParallel({\"context\": retriever, \"question\": RunnablePassthrough()})\n",
|
||||
" | prompt\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "88b1ee51-1b0f-4ebf-bb32-e50e843f0eeb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\nAnswer: The architectural details of Mixtral are as follows:\\n- Dimension (dim): 4096\\n- Number of layers (n\\\\_layers): 32\\n- Dimension of each head (head\\\\_dim): 128\\n- Hidden dimension (hidden\\\\_dim): 14336\\n- Number of heads (n\\\\_heads): 32\\n- Number of kv heads (n\\\\_kv\\\\_heads): 8\\n- Context length (context\\\\_len): 32768\\n- Vocabulary size (vocab\\\\_size): 32000\\n- Number of experts (num\\\\_experts): 8\\n- Number of top k experts (top\\\\_k\\\\_experts): 2\\n\\nMixtral is based on a transformer architecture and uses the same modifications as described in [18], with the notable exceptions that Mixtral supports a fully dense context length of 32k tokens, and the feedforward block picks from a set of 8 distinct groups of parameters. At every layer, for every token, a router network chooses two of these groups (the “experts”) to process the token and combine their output additively. This technique increases the number of parameters of a model while controlling cost and latency, as the model only uses a fraction of the total set of parameters per token. Mixtral is pretrained with multilingual data using a context size of 32k tokens. It either matches or exceeds the performance of Llama 2 70B and GPT-3.5, over several benchmarks. In particular, Mixtral vastly outperforms Llama 2 70B on mathematics, code generation, and multilingual benchmarks.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke(\"What are the Architectural details of Mixtral?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "755cf871-26b7-4e30-8b91-9ffd698470f4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Trace: \n",
|
||||
"\n",
|
||||
"https://smith.langchain.com/public/935fd642-06a6-4b42-98e3-6074f93115cd/r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -73,9 +73,8 @@
|
||||
" AsyncCallbackManagerForRetrieverRun,\n",
|
||||
" CallbackManagerForRetrieverRun,\n",
|
||||
")\n",
|
||||
"from langchain.schema import BaseRetriever, Document\n",
|
||||
"from langchain_community.utilities import GoogleSerperAPIWrapper\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_core.retrievers import BaseRetriever\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAI"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -358,7 +358,7 @@
|
||||
"\n",
|
||||
"from langchain.chains.openai_functions import create_qa_with_structure_chain\n",
|
||||
"from langchain.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"from pydantic import BaseModel, Field"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -1,648 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c7fe38bc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Optimization\n",
|
||||
"\n",
|
||||
"This notebook goes over how to optimize chains using LangChain and [LangSmith](https://smith.langchain.com)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2f87ccd5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up\n",
|
||||
"\n",
|
||||
"We will set an environment variable for LangSmith, and load the relevant data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "236bedc5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGCHAIN_PROJECT\"] = \"movie-qa\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a3fed0dd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "7cfff337",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pd.read_csv(\"data/imdb_top_1000.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "2d20fb9c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df[\"Released_Year\"] = df[\"Released_Year\"].astype(int, errors=\"ignore\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09fc8fe2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the initial retrieval chain\n",
|
||||
"\n",
|
||||
"We will use a self-query retriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "f71e24e2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "8881ea8e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"records = df.to_dict(\"records\")\n",
|
||||
"documents = [Document(page_content=d[\"Overview\"], metadata=d) for d in records]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "8f495423",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vectorstore = Chroma.from_documents(documents, embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "31d33d62",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.query_constructor.base import AttributeInfo\n",
|
||||
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"metadata_field_info = [\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"Released_Year\",\n",
|
||||
" description=\"The year the movie was released\",\n",
|
||||
" type=\"int\",\n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"Series_Title\",\n",
|
||||
" description=\"The title of the movie\",\n",
|
||||
" type=\"str\",\n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"Genre\",\n",
|
||||
" description=\"The genre of the movie\",\n",
|
||||
" type=\"string\",\n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"IMDB_Rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"document_content_description = \"Brief summary of a movie\"\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"retriever = SelfQueryRetriever.from_llm(\n",
|
||||
" llm, vectorstore, document_content_description, metadata_field_info, verbose=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "a731533b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.runnables import RunnablePassthrough"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "05181849",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "feed4be6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt = ChatPromptTemplate.from_template(\n",
|
||||
" \"\"\"Answer the user's question based on the below information:\n",
|
||||
"\n",
|
||||
"Information:\n",
|
||||
"\n",
|
||||
"{info}\n",
|
||||
"\n",
|
||||
"Question: {question}\"\"\"\n",
|
||||
")\n",
|
||||
"generator = (prompt | ChatOpenAI() | StrOutputParser()).with_config(\n",
|
||||
" run_name=\"generator\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "eb16cc9a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = (\n",
|
||||
" RunnablePassthrough.assign(info=(lambda x: x[\"question\"]) | retriever) | generator\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c70911cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run examples\n",
|
||||
"\n",
|
||||
"Run examples through the chain. This can either be manually, or using a list of examples, or production traffic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "19a88d13",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'One of the horror movies released in the early 2000s is \"The Ring\" (2002), directed by Gore Verbinski.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke({\"question\": \"what is a horror movie released in early 2000s\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "17f9cdae",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Annotate\n",
|
||||
"\n",
|
||||
"Now, go to LangSmitha and annotate those examples as correct or incorrect"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5e211da6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Dataset\n",
|
||||
"\n",
|
||||
"We can now create a dataset from those runs.\n",
|
||||
"\n",
|
||||
"What we will do is find the runs marked as correct, then grab the sub-chains from them. Specifically, the query generator sub chain and the final generation step"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "e4024267",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langsmith import Client\n",
|
||||
"\n",
|
||||
"client = Client()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "3814efc5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"14"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"runs = list(\n",
|
||||
" client.list_runs(\n",
|
||||
" project_name=\"movie-qa\",\n",
|
||||
" execution_order=1,\n",
|
||||
" filter=\"and(eq(feedback_key, 'correctness'), eq(feedback_score, 1))\",\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"len(runs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "3eb123e0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"gen_runs = []\n",
|
||||
"query_runs = []\n",
|
||||
"for r in runs:\n",
|
||||
" gen_runs.extend(\n",
|
||||
" list(\n",
|
||||
" client.list_runs(\n",
|
||||
" project_name=\"movie-qa\",\n",
|
||||
" filter=\"eq(name, 'generator')\",\n",
|
||||
" trace_id=r.trace_id,\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" query_runs.extend(\n",
|
||||
" list(\n",
|
||||
" client.list_runs(\n",
|
||||
" project_name=\"movie-qa\",\n",
|
||||
" filter=\"eq(name, 'query_constructor')\",\n",
|
||||
" trace_id=r.trace_id,\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "a4397026",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'question': 'what is a high school comedy released in early 2000s'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"runs[0].inputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "3fa6ad2a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output': 'One high school comedy released in the early 2000s is \"Mean Girls\" starring Lindsay Lohan, Rachel McAdams, and Tina Fey.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"runs[0].outputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "1fda5b4b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'query': 'what is a high school comedy released in early 2000s'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query_runs[0].inputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "1a1a51e6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output': {'query': 'high school comedy',\n",
|
||||
" 'filter': {'operator': 'and',\n",
|
||||
" 'arguments': [{'comparator': 'eq', 'attribute': 'Genre', 'value': 'comedy'},\n",
|
||||
" {'operator': 'and',\n",
|
||||
" 'arguments': [{'comparator': 'gte',\n",
|
||||
" 'attribute': 'Released_Year',\n",
|
||||
" 'value': 2000},\n",
|
||||
" {'comparator': 'lt', 'attribute': 'Released_Year', 'value': 2010}]}]}}}"
|
||||
]
|
||||
},
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query_runs[0].outputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "e9d9966b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'question': 'what is a high school comedy released in early 2000s',\n",
|
||||
" 'info': []}"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"gen_runs[0].inputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "bc113f3d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'output': 'One high school comedy released in the early 2000s is \"Mean Girls\" starring Lindsay Lohan, Rachel McAdams, and Tina Fey.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"gen_runs[0].outputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6cca74e5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create datasets\n",
|
||||
"\n",
|
||||
"We can now create datasets for the query generation and final generation step.\n",
|
||||
"We do this so that (1) we can inspect the datapoints, (2) we can edit them if needed, (3) we can add to them over time"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "69966f0e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"client.create_dataset(\"movie-query_constructor\")\n",
|
||||
"\n",
|
||||
"inputs = [r.inputs for r in query_runs]\n",
|
||||
"outputs = [r.outputs for r in query_runs]\n",
|
||||
"\n",
|
||||
"client.create_examples(\n",
|
||||
" inputs=inputs, outputs=outputs, dataset_name=\"movie-query_constructor\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "7e15770e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"client.create_dataset(\"movie-generator\")\n",
|
||||
"\n",
|
||||
"inputs = [r.inputs for r in gen_runs]\n",
|
||||
"outputs = [r.outputs for r in gen_runs]\n",
|
||||
"\n",
|
||||
"client.create_examples(inputs=inputs, outputs=outputs, dataset_name=\"movie-generator\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "61cf9bcd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use as few shot examples\n",
|
||||
"\n",
|
||||
"We can now pull down a dataset and use them as few shot examples in a future chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "d9c79173",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"examples = list(client.list_examples(dataset_name=\"movie-query_constructor\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "a1771dd0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def filter_to_string(_filter):\n",
|
||||
" if \"operator\" in _filter:\n",
|
||||
" args = [filter_to_string(f) for f in _filter[\"arguments\"]]\n",
|
||||
" return f\"{_filter['operator']}({','.join(args)})\"\n",
|
||||
" else:\n",
|
||||
" comparator = _filter[\"comparator\"]\n",
|
||||
" attribute = json.dumps(_filter[\"attribute\"])\n",
|
||||
" value = json.dumps(_filter[\"value\"])\n",
|
||||
" return f\"{comparator}({attribute}, {value})\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "e67a3530",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_examples = []\n",
|
||||
"\n",
|
||||
"for e in examples:\n",
|
||||
" if \"filter\" in e.outputs[\"output\"]:\n",
|
||||
" string_filter = filter_to_string(e.outputs[\"output\"][\"filter\"])\n",
|
||||
" else:\n",
|
||||
" string_filter = \"NO_FILTER\"\n",
|
||||
" model_examples.append(\n",
|
||||
" (\n",
|
||||
" e.inputs[\"query\"],\n",
|
||||
" {\"query\": e.outputs[\"output\"][\"query\"], \"filter\": string_filter},\n",
|
||||
" )\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "84593135",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever1 = SelfQueryRetriever.from_llm(\n",
|
||||
" llm,\n",
|
||||
" vectorstore,\n",
|
||||
" document_content_description,\n",
|
||||
" metadata_field_info,\n",
|
||||
" verbose=True,\n",
|
||||
" chain_kwargs={\"examples\": model_examples},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "4ec9bb92",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain1 = (\n",
|
||||
" RunnablePassthrough.assign(info=(lambda x: x[\"question\"]) | retriever1) | generator\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "64eb88e2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'1. \"Saving Private Ryan\" (1998) - Directed by Steven Spielberg, this war film follows a group of soldiers during World War II as they search for a missing paratrooper.\\n\\n2. \"The Matrix\" (1999) - Directed by the Wachowskis, this science fiction action film follows a computer hacker who discovers the truth about the reality he lives in.\\n\\n3. \"Lethal Weapon 4\" (1998) - Directed by Richard Donner, this action-comedy film follows two mismatched detectives as they investigate a Chinese immigrant smuggling ring.\\n\\n4. \"The Fifth Element\" (1997) - Directed by Luc Besson, this science fiction action film follows a cab driver who must protect a mysterious woman who holds the key to saving the world.\\n\\n5. \"The Rock\" (1996) - Directed by Michael Bay, this action thriller follows a group of rogue military men who take over Alcatraz and threaten to launch missiles at San Francisco.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain1.invoke(\n",
|
||||
" {\"question\": \"what are good action movies made before 2000 but after 1997?\"}\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e1ee8b55",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -19,9 +19,7 @@
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"For this example, we will use Pinecone and some fake data. To configure Pinecone, set the following environment variable:\n",
|
||||
"\n",
|
||||
"- `PINECONE_API_KEY`: Your Pinecone API key"
|
||||
"For this example, we will use Pinecone and some fake data"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -31,8 +29,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pinecone\n",
|
||||
"from langchain_community.vectorstores import Pinecone\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"from langchain_pinecone import PineconeVectorStore"
|
||||
"\n",
|
||||
"pinecone.init(api_key=\"...\", environment=\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -63,7 +64,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vectorstore = PineconeVectorStore.from_texts(\n",
|
||||
"vectorstore = Pinecone.from_texts(\n",
|
||||
" list(all_documents.values()), OpenAIEmbeddings(), index_name=\"rag-fusion\"\n",
|
||||
")"
|
||||
]
|
||||
@@ -161,7 +162,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vectorstore = PineconeVectorStore.from_existing_index(\"rag-fusion\", OpenAIEmbeddings())\n",
|
||||
"vectorstore = Pinecone.from_existing_index(\"rag-fusion\", OpenAIEmbeddings())\n",
|
||||
"retriever = vectorstore.as_retriever()"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -51,10 +51,10 @@
|
||||
"from langchain.chains.base import Chain\n",
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain.prompts.base import StringPromptTemplate\n",
|
||||
"from langchain.schema import AgentAction, AgentFinish\n",
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.llms import BaseLLM\n",
|
||||
"from langchain_community.vectorstores import Chroma\n",
|
||||
"from langchain_core.agents import AgentAction, AgentFinish\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings\n",
|
||||
"from pydantic import BaseModel, Field"
|
||||
]
|
||||
|
||||
@@ -1083,7 +1083,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_elasticsearch import ElasticsearchStore\n",
|
||||
"from langchain_community.vectorstores import ElasticsearchStore\n",
|
||||
"from langchain_openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
|
||||
@@ -401,7 +401,7 @@
|
||||
")\n",
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.prompts import StringPromptTemplate\n",
|
||||
"from langchain_core.agents import AgentAction, AgentFinish"
|
||||
"from langchain.schema import AgentAction, AgentFinish"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,10 +1,5 @@
|
||||
# docker-compose to make it easier to spin up integration tests.
|
||||
# Services should use NON standard ports to avoid collision with
|
||||
# any existing services that might be used for development.
|
||||
# ATTENTION: When adding a service below use a non-standard port
|
||||
# increment by one from the preceding port.
|
||||
# For credentials always use `langchain` and `langchain` for the
|
||||
# username and password.
|
||||
version: "3"
|
||||
name: langchain-tests
|
||||
|
||||
@@ -24,34 +19,3 @@ services:
|
||||
image: graphdb
|
||||
ports:
|
||||
- "6021:7200"
|
||||
mongo:
|
||||
image: mongo:latest
|
||||
container_name: mongo_container
|
||||
ports:
|
||||
- "6022:27017"
|
||||
environment:
|
||||
MONGO_INITDB_ROOT_USERNAME: langchain
|
||||
MONGO_INITDB_ROOT_PASSWORD: langchain
|
||||
postgres:
|
||||
image: postgres:16
|
||||
environment:
|
||||
POSTGRES_DB: langchain
|
||||
POSTGRES_USER: langchain
|
||||
POSTGRES_PASSWORD: langchain
|
||||
ports:
|
||||
- "6023:5432"
|
||||
command: |
|
||||
postgres -c log_statement=all
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD-SHELL",
|
||||
"psql postgresql://langchain:langchain@localhost/langchain --command 'SELECT 1;' || exit 1",
|
||||
]
|
||||
interval: 5s
|
||||
retries: 60
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
|
||||
1
docs/.gitignore
vendored
1
docs/.gitignore
vendored
@@ -1 +0,0 @@
|
||||
/.quarto/
|
||||
@@ -49,7 +49,7 @@ class ExampleLinksDirective(SphinxDirective):
|
||||
class_or_func_name = self.arguments[0]
|
||||
links = imported_classes.get(class_or_func_name, {})
|
||||
list_node = nodes.bullet_list()
|
||||
for doc_name, link in sorted(links.items()):
|
||||
for doc_name, link in links.items():
|
||||
item_node = nodes.list_item()
|
||||
para_node = nodes.paragraph()
|
||||
link_node = nodes.reference()
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
import importlib
|
||||
import inspect
|
||||
import os
|
||||
import sys
|
||||
import typing
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
@@ -345,29 +344,28 @@ def _doc_first_line(package_name: str) -> str:
|
||||
return f".. {package_name.replace('-', '_')}_api_reference:\n\n"
|
||||
|
||||
|
||||
def main(dirs: Optional[list] = None) -> None:
|
||||
def main() -> None:
|
||||
"""Generate the api_reference.rst file for each package."""
|
||||
print("Starting to build API reference files.")
|
||||
if not dirs:
|
||||
dirs = [
|
||||
dir_
|
||||
for dir_ in os.listdir(ROOT_DIR / "libs")
|
||||
if dir_ not in ("cli", "partners")
|
||||
]
|
||||
dirs += os.listdir(ROOT_DIR / "libs" / "partners")
|
||||
for dir_ in dirs:
|
||||
for dir in os.listdir(ROOT_DIR / "libs"):
|
||||
# Skip any hidden directories
|
||||
# Some of these could be present by mistake in the code base
|
||||
# e.g., .pytest_cache from running tests from the wrong location.
|
||||
if dir_.startswith("."):
|
||||
print("Skipping dir:", dir_)
|
||||
if not dir.startswith("."):
|
||||
print("Skipping dir:", dir)
|
||||
continue
|
||||
|
||||
if dir in ("cli", "partners"):
|
||||
continue
|
||||
else:
|
||||
print("Building package:", dir_)
|
||||
_build_rst_file(package_name=dir_)
|
||||
print("Building package:", dir)
|
||||
_build_rst_file(package_name=dir)
|
||||
partner_packages = os.listdir(ROOT_DIR / "libs" / "partners")
|
||||
print("Building partner packages:", partner_packages)
|
||||
for dir in partner_packages:
|
||||
_build_rst_file(package_name=dir)
|
||||
print("API reference files built.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
dirs = sys.argv[1:] or None
|
||||
main(dirs=dirs)
|
||||
main()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -25,7 +25,6 @@ Below are links to tutorials and courses on LangChain. For written guides on com
|
||||
⛓ [LangChain Cheatsheet](https://pub.towardsai.net/langchain-cheatsheet-all-secrets-on-a-single-page-8be26b721cde) by **Ivan Reznikov**
|
||||
|
||||
### Short Tutorials
|
||||
|
||||
[LangChain Explained in 13 Minutes | QuickStart Tutorial for Beginners](https://youtu.be/aywZrzNaKjs) by [Rabbitmetrics](https://www.youtube.com/@rabbitmetrics)
|
||||
|
||||
[LangChain Crash Course: Build an AutoGPT app in 25 minutes](https://youtu.be/MlK6SIjcjE8) by [Nicholas Renotte](https://www.youtube.com/@NicholasRenotte)
|
||||
@@ -34,14 +33,6 @@ Below are links to tutorials and courses on LangChain. For written guides on com
|
||||
|
||||
⛓ [LangChain 101 Course](https://medium.com/@ivanreznikov/langchain-101-course-updated-668f7b41d6cb) by **Ivan Reznikov**
|
||||
|
||||
### Code Alongs
|
||||
|
||||
DataCamp has developed a [Become a Generative AI Developer series](https://www.datacamp.com/ai-code-alongs) featuring 9 free code-alongs, including ones on building chatbots using LangChain and the OpenAI and Pinecone APIs. When you start a code along, you are launched into a fully configured notebook environment with an expert-led video to guide you through the project.
|
||||
|
||||
⛓ [Prompt Engineering with GPT & LangChain](https://www.datacamp.com/code-along/prompt-engineering-gpt-langchain)
|
||||
|
||||
⛓ [Retrieval Augmented Generation with the OpenAI API & Pinecone]https://www.datacamp.com/code-along/retrieval-augmented-generation-openai-api-pinecone
|
||||
|
||||
## Tutorials
|
||||
|
||||
### [LangChain for Gen AI and LLMs](https://www.youtube.com/playlist?list=PLIUOU7oqGTLieV9uTIFMm6_4PXg-hlN6F) by [James Briggs](https://www.youtube.com/@jamesbriggs)
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"source": [
|
||||
"# Agents\n",
|
||||
"\n",
|
||||
"You can pass a Runnable into an agent. Make sure you have `langchainhub` installed: `pip install langchainhub`"
|
||||
"You can pass a Runnable into an agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -98,7 +98,7 @@
|
||||
"source": [
|
||||
"Building an agent from a runnable usually involves a few things:\n",
|
||||
"\n",
|
||||
"1. Data processing for the intermediate steps. These need to be represented in a way that the language model can recognize them. This should be pretty tightly coupled to the instructions in the prompt\n",
|
||||
"1. Data processing for the intermediate steps. These need to represented in a way that the language model can recognize them. This should be pretty tightly coupled to the instructions in the prompt\n",
|
||||
"\n",
|
||||
"2. The prompt itself\n",
|
||||
"\n",
|
||||
|
||||
@@ -47,7 +47,7 @@
|
||||
"source": [
|
||||
"from operator import itemgetter\n",
|
||||
"\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain.schema import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
|
||||
@@ -169,8 +169,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import format_document\n",
|
||||
"from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string\n",
|
||||
"from langchain_core.prompts import format_document\n",
|
||||
"from langchain_core.runnables import RunnableParallel"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain.schema import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"cell_type": "markdown",
|
||||
"id": "9e45e81c-e16e-4c6c-b6a3-2362e5193827",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -25,42 +25,53 @@
|
||||
"\n",
|
||||
"There are two ways to perform routing:\n",
|
||||
"\n",
|
||||
"1. Conditionally return runnables from a [`RunnableLambda`](./functions) (recommended)\n",
|
||||
"2. Using a `RunnableBranch`.\n",
|
||||
"1. Using a `RunnableBranch`.\n",
|
||||
"2. Writing custom factory function that takes the input of a previous step and returns a **runnable**. Importantly, this should return a **runnable** and NOT actually execute.\n",
|
||||
"\n",
|
||||
"We'll illustrate both methods using a two step sequence where the first step classifies an input question as being about `LangChain`, `Anthropic`, or `Other`, then routes to a corresponding prompt chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c1c6edac",
|
||||
"id": "f885113d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using a RunnableBranch\n",
|
||||
"\n",
|
||||
"A `RunnableBranch` is initialized with a list of (condition, runnable) pairs and a default runnable. It selects which branch by passing each condition the input it's invoked with. It selects the first condition to evaluate to True, and runs the corresponding runnable to that condition with the input. \n",
|
||||
"\n",
|
||||
"If no provided conditions match, it runs the default runnable.\n",
|
||||
"\n",
|
||||
"Here's an example of what it looks like in action:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "1aa13c1d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import PromptTemplate\n",
|
||||
"from langchain_community.chat_models import ChatAnthropic\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ed84c59a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example Setup\n",
|
||||
"First, let's create a chain that will identify incoming questions as being about `LangChain`, `Anthropic`, or `Other`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8a8a1967",
|
||||
"execution_count": 2,
|
||||
"id": "3ec03886",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Anthropic'"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models import ChatAnthropic\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"chain = (\n",
|
||||
" PromptTemplate.from_template(\n",
|
||||
" \"\"\"Given the user question below, classify it as either being about `LangChain`, `Anthropic`, or `Other`.\n",
|
||||
@@ -75,14 +86,33 @@
|
||||
" )\n",
|
||||
" | ChatAnthropic()\n",
|
||||
" | StrOutputParser()\n",
|
||||
")\n",
|
||||
"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "87ae7c1c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' Anthropic'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.invoke({\"question\": \"how do I call Anthropic?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7655555f",
|
||||
"id": "8aa0a365",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's create three sub chains:"
|
||||
@@ -90,8 +120,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "89d7722d",
|
||||
"execution_count": 4,
|
||||
"id": "d479962a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -128,12 +158,101 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "593eab06",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.runnables import RunnableBranch\n",
|
||||
"\n",
|
||||
"branch = RunnableBranch(\n",
|
||||
" (lambda x: \"anthropic\" in x[\"topic\"].lower(), anthropic_chain),\n",
|
||||
" (lambda x: \"langchain\" in x[\"topic\"].lower(), langchain_chain),\n",
|
||||
" general_chain,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "752c732e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"full_chain = {\"topic\": chain, \"question\": lambda x: x[\"question\"]} | branch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "29231bb8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" As Dario Amodei told me, here are some ways to use Anthropic:\\n\\n- Sign up for an account on Anthropic's website to access tools like Claude, Constitutional AI, and Writer. \\n\\n- Use Claude for tasks like email generation, customer service chat, and QA. Claude can understand natural language prompts and provide helpful responses.\\n\\n- Use Constitutional AI if you need an AI assistant that is harmless, honest, and helpful. It is designed to be safe and aligned with human values.\\n\\n- Use Writer to generate natural language content for things like marketing copy, stories, reports, and more. Give it a topic and prompt and it will create high-quality written content.\\n\\n- Check out Anthropic's documentation and blog for tips, tutorials, examples, and announcements about new capabilities as they continue to develop their AI technology.\\n\\n- Follow Anthropic on social media or subscribe to their newsletter to stay up to date on new features and releases.\\n\\n- For most people, the easiest way to leverage Anthropic's technology is through their website - just create an account to get started!\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"full_chain.invoke({\"question\": \"how do I use Anthropic?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "c67d8733",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' As Harrison Chase told me, here is how you use LangChain:\\n\\nLangChain is an AI assistant that can have conversations, answer questions, and generate text. To use LangChain, you simply type or speak your input and LangChain will respond. \\n\\nYou can ask LangChain questions, have discussions, get summaries or explanations about topics, and request it to generate text on a subject. Some examples of interactions:\\n\\n- Ask general knowledge questions and LangChain will try to answer factually. For example \"What is the capital of France?\"\\n\\n- Have conversations on topics by taking turns speaking. You can prompt the start of a conversation by saying something like \"Let\\'s discuss machine learning\"\\n\\n- Ask for summaries or high-level explanations on subjects. For example \"Can you summarize the main themes in Shakespeare\\'s Hamlet?\" \\n\\n- Give creative writing prompts or requests to have LangChain generate text in different styles. For example \"Write a short children\\'s story about a mouse\" or \"Generate a poem in the style of Robert Frost about nature\"\\n\\n- Correct LangChain if it makes an inaccurate statement and provide the right information. This helps train it.\\n\\nThe key is interacting naturally and giving it clear prompts and requests', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"full_chain.invoke({\"question\": \"how do I use LangChain?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "935ad949",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' 2 + 2 = 4', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"full_chain.invoke({\"question\": \"whats 2 + 2\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6d8d042c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using a custom function (Recommended)\n",
|
||||
"## Using a custom function\n",
|
||||
"\n",
|
||||
"You can also use a custom function to route between different outputs. Here's an example:"
|
||||
]
|
||||
@@ -231,89 +350,13 @@
|
||||
"full_chain.invoke({\"question\": \"whats 2 + 2\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5147b827",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using a RunnableBranch\n",
|
||||
"\n",
|
||||
"A `RunnableBranch` is a special type of runnable that allows you to define a set of conditions and runnables to execute based on the input. It does **not** offer anything that you can't achieve in a custom function as described above, so we recommend using a custom function instead.\n",
|
||||
"\n",
|
||||
"A `RunnableBranch` is initialized with a list of (condition, runnable) pairs and a default runnable. It selects which branch by passing each condition the input it's invoked with. It selects the first condition to evaluate to True, and runs the corresponding runnable to that condition with the input. \n",
|
||||
"\n",
|
||||
"If no provided conditions match, it runs the default runnable.\n",
|
||||
"\n",
|
||||
"Here's an example of what it looks like in action:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2a101418",
|
||||
"id": "46802d04",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" As Dario Amodei told me, here are some ways to use Anthropic:\\n\\n- Sign up for an account on Anthropic's website to access tools like Claude, Constitutional AI, and Writer. \\n\\n- Use Claude for tasks like email generation, customer service chat, and QA. Claude can understand natural language prompts and provide helpful responses.\\n\\n- Use Constitutional AI if you need an AI assistant that is harmless, honest, and helpful. It is designed to be safe and aligned with human values.\\n\\n- Use Writer to generate natural language content for things like marketing copy, stories, reports, and more. Give it a topic and prompt and it will create high-quality written content.\\n\\n- Check out Anthropic's documentation and blog for tips, tutorials, examples, and announcements about new capabilities as they continue to develop their AI technology.\\n\\n- Follow Anthropic on social media or subscribe to their newsletter to stay up to date on new features and releases.\\n\\n- For most people, the easiest way to leverage Anthropic's technology is through their website - just create an account to get started!\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.runnables import RunnableBranch\n",
|
||||
"\n",
|
||||
"branch = RunnableBranch(\n",
|
||||
" (lambda x: \"anthropic\" in x[\"topic\"].lower(), anthropic_chain),\n",
|
||||
" (lambda x: \"langchain\" in x[\"topic\"].lower(), langchain_chain),\n",
|
||||
" general_chain,\n",
|
||||
")\n",
|
||||
"full_chain = {\"topic\": chain, \"question\": lambda x: x[\"question\"]} | branch\n",
|
||||
"full_chain.invoke({\"question\": \"how do I use Anthropic?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8d8caf9b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' As Harrison Chase told me, here is how you use LangChain:\\n\\nLangChain is an AI assistant that can have conversations, answer questions, and generate text. To use LangChain, you simply type or speak your input and LangChain will respond. \\n\\nYou can ask LangChain questions, have discussions, get summaries or explanations about topics, and request it to generate text on a subject. Some examples of interactions:\\n\\n- Ask general knowledge questions and LangChain will try to answer factually. For example \"What is the capital of France?\"\\n\\n- Have conversations on topics by taking turns speaking. You can prompt the start of a conversation by saying something like \"Let\\'s discuss machine learning\"\\n\\n- Ask for summaries or high-level explanations on subjects. For example \"Can you summarize the main themes in Shakespeare\\'s Hamlet?\" \\n\\n- Give creative writing prompts or requests to have LangChain generate text in different styles. For example \"Write a short children\\'s story about a mouse\" or \"Generate a poem in the style of Robert Frost about nature\"\\n\\n- Correct LangChain if it makes an inaccurate statement and provide the right information. This helps train it.\\n\\nThe key is interacting naturally and giving it clear prompts and requests', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"full_chain.invoke({\"question\": \"how do I use LangChain?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "26159af7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' 2 + 2 = 4', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"full_chain.invoke({\"question\": \"whats 2 + 2\"})"
|
||||
]
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -68,7 +68,7 @@
|
||||
"source": [
|
||||
"# Showing the example using anthropic, but you can use\n",
|
||||
"# your favorite chat model!\n",
|
||||
"from langchain_community.chat_models import ChatAnthropic\n",
|
||||
"from langchain.chat_models import ChatAnthropic\n",
|
||||
"\n",
|
||||
"model = ChatAnthropic()\n",
|
||||
"\n",
|
||||
@@ -464,12 +464,12 @@
|
||||
"id": "6fd3e71b-439e-418f-8a8a-5232fba3d9fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Stream just yielded the final result from that component.\n",
|
||||
"Stream just yielded the final result from that component. \n",
|
||||
"\n",
|
||||
"This is OK 🥹! Not all components have to implement streaming -- in some cases streaming is either unnecessary, difficult or just doesn't make sense.\n",
|
||||
"\n",
|
||||
":::{.callout-tip}\n",
|
||||
"An LCEL chain constructed using non-streaming components, will still be able to stream in a lot of cases, with streaming of partial output starting after the last non-streaming step in the chain.\n",
|
||||
"An LCEL chain constructed using using non-streaming components, will still be able to stream in a lot of cases, with streaming of partial output starting after the last non-streaming step in the chain.\n",
|
||||
":::"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -58,17 +58,17 @@ LangChain enables building application that connect external sources of data and
|
||||
In this quickstart, we will walk through a few different ways of doing that.
|
||||
We will start with a simple LLM chain, which just relies on information in the prompt template to respond.
|
||||
Next, we will build a retrieval chain, which fetches data from a separate database and passes that into the prompt template.
|
||||
We will then add in chat history, to create a conversation retrieval chain. This allows you to interact in a chat manner with this LLM, so it remembers previous questions.
|
||||
We will then add in chat history, to create a conversation retrieval chain. This allows you interact in a chat manner with this LLM, so it remembers previous questions.
|
||||
Finally, we will build an agent - which utilizes an LLM to determine whether or not it needs to fetch data to answer questions.
|
||||
We will cover these at a high level, but there are lot of details to all of these!
|
||||
We will link to relevant docs.
|
||||
|
||||
## LLM Chain
|
||||
|
||||
We'll show how to use models available via API, like OpenAI and Cohere, and local open source models, using integrations like Ollama.
|
||||
For this getting started guide, we will provide two options: using OpenAI (a popular model available via API) or using a local open source model.
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="openai" label="OpenAI (API)" default>
|
||||
<TabItem value="openai" label="OpenAI" default>
|
||||
|
||||
First we'll need to import the LangChain x OpenAI integration package.
|
||||
|
||||
@@ -99,7 +99,7 @@ llm = ChatOpenAI(openai_api_key="...")
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="local" label="Local (using Ollama)">
|
||||
<TabItem value="local" label="Local">
|
||||
|
||||
[Ollama](https://ollama.ai/) allows you to run open-source large language models, such as Llama 2, locally.
|
||||
|
||||
@@ -112,37 +112,6 @@ Then, make sure the Ollama server is running. After that, you can do:
|
||||
```python
|
||||
from langchain_community.llms import Ollama
|
||||
llm = Ollama(model="llama2")
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="cohere" label="Cohere (API)" default>
|
||||
|
||||
First we'll need to import the Cohere SDK package.
|
||||
|
||||
```shell
|
||||
pip install cohere
|
||||
```
|
||||
|
||||
Accessing the API requires an API key, which you can get by creating an account and heading [here](https://dashboard.cohere.com/api-keys). Once we have a key we'll want to set it as an environment variable by running:
|
||||
|
||||
```shell
|
||||
export COHERE_API_KEY="..."
|
||||
```
|
||||
|
||||
We can then initialize the model:
|
||||
|
||||
```python
|
||||
from langchain_community.chat_models import ChatCohere
|
||||
|
||||
llm = ChatCohere()
|
||||
```
|
||||
|
||||
If you'd prefer not to set an environment variable you can pass the key in directly via the `cohere_api_key` named parameter when initiating the Cohere LLM class:
|
||||
|
||||
```python
|
||||
from langchain_community.chat_models import ChatCohere
|
||||
|
||||
llm = ChatCohere(cohere_api_key="...")
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
@@ -231,10 +200,10 @@ docs = loader.load()
|
||||
|
||||
Next, we need to index it into a vectorstore. This requires a few components, namely an [embedding model](/docs/modules/data_connection/text_embedding) and a [vectorstore](/docs/modules/data_connection/vectorstores).
|
||||
|
||||
For embedding models, we once again provide examples for accessing via API or by running local models.
|
||||
For embedding models, we once again provide examples for accessing via OpenAI or via local models.
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="openai" label="OpenAI (API)" default>
|
||||
<TabItem value="openai" label="OpenAI" default>
|
||||
|
||||
Make sure you have the `langchain_openai` package installed an the appropriate environment variables set (these are the same as needed for the LLM).
|
||||
|
||||
@@ -245,7 +214,7 @@ embeddings = OpenAIEmbeddings()
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="local" label="Local (using Ollama)">
|
||||
<TabItem value="local" label="Local">
|
||||
|
||||
Make sure you have Ollama running (same set up as with the LLM).
|
||||
|
||||
@@ -255,17 +224,6 @@ from langchain_community.embeddings import OllamaEmbeddings
|
||||
embeddings = OllamaEmbeddings()
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="cohere" label="Cohere (API)" default>
|
||||
|
||||
Make sure you have the `cohere` package installed an the appropriate environment variables set (these are the same as needed for the LLM).
|
||||
|
||||
```python
|
||||
from langchain_community.embeddings import CohereEmbeddings
|
||||
|
||||
embeddings = CohereEmbeddings()
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
Now, we can use this embedding model to ingest documents into a vectorstore.
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
"\n",
|
||||
"from langchain.chains import LLMChain\n",
|
||||
"from langchain.evaluation import AgentTrajectoryEvaluator\n",
|
||||
"from langchain_core.agents import AgentAction\n",
|
||||
"from langchain.schema import AgentAction\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
@@ -90,7 +90,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain.schema import Document\n",
|
||||
"\n",
|
||||
"documents = [Document(page_content=document_content)]"
|
||||
]
|
||||
@@ -879,7 +879,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts.prompt import PromptTemplate\n",
|
||||
"from langchain_core.prompts import format_document\n",
|
||||
"from langchain.schema import format_document\n",
|
||||
"\n",
|
||||
"DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template=\"{page_content}\")\n",
|
||||
"\n",
|
||||
|
||||
@@ -1,391 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6e3f0f72",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# [beta] Structured Output\n",
|
||||
"\n",
|
||||
"It is often crucial to have LLMs return structured output. This is because often times the outputs of the LLMs are used in downstream applications, where specific arguments are required. Having the LLM return structured output reliably is necessary for that.\n",
|
||||
"\n",
|
||||
"There are a few different high level strategies that are used to do this:\n",
|
||||
"\n",
|
||||
"- Prompting: This is when you ask the LLM (very nicely) to return output in the desired format (JSON, XML). This is nice because works with all LLMs, this is not nice because it doesn't garuntee that the LLM returns in the right format.\n",
|
||||
"- Function calling: This is when the LLM is finetuned to be able to not just generate a completion, but also generate a function call. The functions the LLM can call are generally passed as extra parameters to the model API. The function names and descriptions should be treated as part of the prompt (they usually count against token counts, and are used by the LLM to decide what to do).\n",
|
||||
"- Tool calling: A technique similar to function calling, but it allows the LLM to call multiple functions at the same time.\n",
|
||||
"- JSON mode: This is when the LLM is garunteed to return JSON.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Different models may support different variants of these, with slightly different parameters. In order to make it easy to get LLMs to return structured output, we have added a common interface to LangChain models: `.with_structured_output`. \n",
|
||||
"\n",
|
||||
"By invoking this method (and passing in a JSON schema or a Pydantic model) the model will add whatever model parameters + output parsers are necessary to get back the structured output. There may be more than one way to do this (eg function calling vs JSON mode) - you can configure which method to use by passing into that method.\n",
|
||||
"\n",
|
||||
"Let's look at some examples of this in action!\n",
|
||||
"\n",
|
||||
"We will use Pydantic to easily structure the response schema."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "08029f4e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "070bf702",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class Joke(BaseModel):\n",
|
||||
" setup: str = Field(description=\"The setup of the joke\")\n",
|
||||
" punchline: str = Field(description=\"The punchline to the joke\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98f6edfa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## OpenAI\n",
|
||||
"\n",
|
||||
"OpenAI exposes a few different ways to get structured outputs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "3fe7caf0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "deddb6d3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Function Calling\n",
|
||||
"\n",
|
||||
"By default, we will use `function_calling`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "6700994a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = ChatOpenAI()\n",
|
||||
"model_with_structure = model.with_structured_output(Joke)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "c55a61b8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Joke(setup='Why was the cat sitting on the computer?', punchline='It wanted to keep an eye on the mouse!')"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_with_structure.invoke(\"Tell me a joke about cats\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "39d7a555",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### JSON Mode\n",
|
||||
"\n",
|
||||
"We also support JSON mode. Note that we need to specify in the prompt the format that it should respond in."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "df0370e3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_with_structure = model.with_structured_output(Joke, method=\"json_mode\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "23844a26",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Joke(setup=\"Why don't cats play poker in the jungle?\", punchline='Too many cheetahs!')"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_with_structure.invoke(\n",
|
||||
" \"Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8f3cce9e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fireworks\n",
|
||||
"\n",
|
||||
"[Fireworks](https://fireworks.ai/) similarly supports function calling and JSON mode for select models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "ad45fdd8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_fireworks import ChatFireworks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "36270ed5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Function Calling\n",
|
||||
"\n",
|
||||
"By default, we will use `function_calling`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "49a20847",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = ChatFireworks(model=\"accounts/fireworks/models/firefunction-v1\")\n",
|
||||
"model_with_structure = model.with_structured_output(Joke)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "e3093a6c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Joke(setup=\"Why don't cats play poker in the jungle?\", punchline='Too many cheetahs!')"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_with_structure.invoke(\"Tell me a joke about cats\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ddb6b3ba",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### JSON Mode\n",
|
||||
"\n",
|
||||
"We also support JSON mode. Note that we need to specify in the prompt the format that it should respond in."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "ea0c22c1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_with_structure = model.with_structured_output(Joke, method=\"json_mode\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "649f9632",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Joke(setup='Why did the dog sit in the shade?', punchline='To avoid getting burned.')"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_with_structure.invoke(\n",
|
||||
" \"Tell me a joke about dogs, respond in JSON with `setup` and `punchline` keys\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ff70609a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Mistral\n",
|
||||
"\n",
|
||||
"We also support structured output with Mistral models, although we only support function calling."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "bffd3fad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_mistralai import ChatMistralAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "c8bd7549",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = ChatMistralAI(model=\"mistral-large-latest\")\n",
|
||||
"model_with_structure = model.with_structured_output(Joke)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "17b15816",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_with_structure.invoke(\"Tell me a joke about cats\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6bbbb698",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Together\n",
|
||||
"\n",
|
||||
"Since [TogetherAI](https://www.together.ai/) is just a drop in replacement for OpenAI, we can just use the OpenAI integration"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "9b9617e3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "90549664",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = ChatOpenAI(\n",
|
||||
" base_url=\"https://api.together.xyz/v1\",\n",
|
||||
" api_key=os.environ[\"TOGETHER_API_KEY\"],\n",
|
||||
" model=\"mistralai/Mixtral-8x7B-Instruct-v0.1\",\n",
|
||||
")\n",
|
||||
"model_with_structure = model.with_structured_output(Joke)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "01da39be",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Joke(setup='Why did the cat sit on the computer?', punchline='To keep an eye on the mouse!')"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_with_structure.invoke(\"Tell me a joke about cats\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3066b2af",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,215 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0cebf93b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fiddler Langchain integration Quick Start Guide\n",
|
||||
"\n",
|
||||
"Fiddler is the pioneer in enterprise Generative and Predictive system ops, offering a unified platform that enables Data Science, MLOps, Risk, Compliance, Analytics, and other LOB teams to monitor, explain, analyze, and improve ML deployments at enterprise scale. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "38d746c2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Installation and Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e0151955",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# langchain langchain-community langchain-openai fiddler-client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5662f2e5-d510-4eef-b44b-fa929e5b4ad4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Fiddler connection details "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "64fac323",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"*Before you can add information about your model with Fiddler*\n",
|
||||
"\n",
|
||||
"1. The URL you're using to connect to Fiddler\n",
|
||||
"2. Your organization ID\n",
|
||||
"3. Your authorization token\n",
|
||||
"\n",
|
||||
"These can be found by navigating to the *Settings* page of your Fiddler environment."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f6f8b73e-d350-40f0-b7a4-fb1e68a65a22",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"URL = \"\" # Your Fiddler instance URL, Make sure to include the full URL (including https://). For example: https://demo.fiddler.ai\n",
|
||||
"ORG_NAME = \"\"\n",
|
||||
"AUTH_TOKEN = \"\" # Your Fiddler instance auth token\n",
|
||||
"\n",
|
||||
"# Fiddler project and model names, used for model registration\n",
|
||||
"PROJECT_NAME = \"\"\n",
|
||||
"MODEL_NAME = \"\" # Model name in Fiddler"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0645805a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Create a fiddler callback handler instance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "13de4f9a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.callbacks.fiddler_callback import FiddlerCallbackHandler\n",
|
||||
"\n",
|
||||
"fiddler_handler = FiddlerCallbackHandler(\n",
|
||||
" url=URL,\n",
|
||||
" org=ORG_NAME,\n",
|
||||
" project=PROJECT_NAME,\n",
|
||||
" model=MODEL_NAME,\n",
|
||||
" api_key=AUTH_TOKEN,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2276368e-f1dc-46be-afe3-18796e7a66f2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example 1 : Basic Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c9de0fd1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"# Note : Make sure openai API key is set in the environment variable OPENAI_API_KEY\n",
|
||||
"llm = OpenAI(temperature=0, streaming=True, callbacks=[fiddler_handler])\n",
|
||||
"output_parser = StrOutputParser()\n",
|
||||
"\n",
|
||||
"chain = llm | output_parser\n",
|
||||
"\n",
|
||||
"# Invoke the chain. Invocation will be logged to Fiddler, and metrics automatically generated\n",
|
||||
"chain.invoke(\"How far is moon from earth?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "309bde0b-e1ce-446c-98ac-3690c26a2676",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Few more invocations\n",
|
||||
"chain.invoke(\"What is the temperature on Mars?\")\n",
|
||||
"chain.invoke(\"How much is 2 + 200000?\")\n",
|
||||
"chain.invoke(\"Which movie won the oscars this year?\")\n",
|
||||
"chain.invoke(\"Can you write me a poem about insomnia?\")\n",
|
||||
"chain.invoke(\"How are you doing today?\")\n",
|
||||
"chain.invoke(\"What is the meaning of life?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "48fa4782-c867-4510-9430-4ffa3de3b5eb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example 2 : Chain with prompt templates"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2aa2c220-8946-4844-8d3c-8f69d744d13f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import (\n",
|
||||
" ChatPromptTemplate,\n",
|
||||
" FewShotChatMessagePromptTemplate,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"examples = [\n",
|
||||
" {\"input\": \"2+2\", \"output\": \"4\"},\n",
|
||||
" {\"input\": \"2+3\", \"output\": \"5\"},\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"example_prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" (\"ai\", \"{output}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"few_shot_prompt = FewShotChatMessagePromptTemplate(\n",
|
||||
" example_prompt=example_prompt,\n",
|
||||
" examples=examples,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"final_prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You are a wondrous wizard of math.\"),\n",
|
||||
" few_shot_prompt,\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Note : Make sure openai API key is set in the environment variable OPENAI_API_KEY\n",
|
||||
"llm = OpenAI(temperature=0, streaming=True, callbacks=[fiddler_handler])\n",
|
||||
"\n",
|
||||
"chain = final_prompt | llm\n",
|
||||
"\n",
|
||||
"# Invoke the chain. Invocation will be logged to Fiddler, and metrics automatically generated\n",
|
||||
"chain.invoke({\"input\": \"What's the square of a triangle?\"})"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -242,7 +242,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks import LabelStudioCallbackHandler\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"chat_llm = ChatOpenAI(\n",
|
||||
|
||||
@@ -53,7 +53,7 @@ Example:
|
||||
|
||||
```python
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain_core.messages import SystemMessage, HumanMessage
|
||||
from langchain.schema import SystemMessage, HumanMessage
|
||||
from langchain.agents import OpenAIFunctionsAgent, AgentExecutor, tool
|
||||
from langchain.callbacks import LLMonitorCallbackHandler
|
||||
|
||||
|
||||
@@ -267,7 +267,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks import TrubricsCallbackHandler\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -17,44 +17,40 @@
|
||||
"source": [
|
||||
"# ChatAnthropic\n",
|
||||
"\n",
|
||||
"This notebook covers how to get started with Anthropic chat models.\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"For setup instructions, please see the Installation and Environment Setup sections of the [Anthropic Platform page](/docs/integrations/platforms/anthropic.mdx)."
|
||||
"This notebook covers how to get started with Anthropic chat models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "91be2e12",
|
||||
"metadata": {},
|
||||
"execution_count": 1,
|
||||
"id": "d4a7c55d-b235-4ca4-a579-c90cc9570da9",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-19T11:25:00.590587Z",
|
||||
"start_time": "2024-01-19T11:25:00.127293Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-anthropic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "584ed5ec",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Environment Setup\n",
|
||||
"\n",
|
||||
"We'll need to get a [Anthropic](https://console.anthropic.com/settings/keys) and set the `ANTHROPIC_API_KEY` environment variable:"
|
||||
"from langchain_community.chat_models import ChatAnthropic\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "01578ae3",
|
||||
"metadata": {},
|
||||
"execution_count": 2,
|
||||
"id": "70cf04e8-423a-4ff6-8b09-f11fb711c817",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-19T11:25:04.349676Z",
|
||||
"start_time": "2024-01-19T11:25:03.964930Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"ANTHROPIC_API_KEY\"] = getpass()"
|
||||
"chat = ChatAnthropic(temperature=0, model_name=\"claude-2\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -86,9 +82,7 @@
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' 저는 파이썬을 좋아합니다.')"
|
||||
]
|
||||
"text/plain": "AIMessage(content=' 저는 파이썬을 좋아합니다.')"
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
@@ -96,11 +90,6 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_anthropic import ChatAnthropic\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"chat = ChatAnthropic(temperature=0, model_name=\"claude-2\")\n",
|
||||
"\n",
|
||||
"system = (\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\"\n",
|
||||
")\n",
|
||||
@@ -139,9 +128,7 @@
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" Why don't bears like fast food? Because they can't catch it!\")"
|
||||
]
|
||||
"text/plain": "AIMessage(content=\" Why don't bears like fast food? Because they can't catch it!\")"
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
@@ -202,6 +189,154 @@
|
||||
"for chunk in chain.stream({}):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3737fc8d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ChatAnthropicMessages\n",
|
||||
"\n",
|
||||
"LangChain also offers the beta Anthropic Messages endpoint through the new `langchain-anthropic` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c253883f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-anthropic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "07c47c2a",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-19T11:25:25.288133Z",
|
||||
"start_time": "2024-01-19T11:25:24.438968Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "AIMessage(content='파이썬을 사랑합니다.')"
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_anthropic import ChatAnthropicMessages\n",
|
||||
"\n",
|
||||
"chat = ChatAnthropicMessages(model_name=\"claude-instant-1.2\")\n",
|
||||
"system = (\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\"\n",
|
||||
")\n",
|
||||
"human = \"{text}\"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n",
|
||||
"\n",
|
||||
"chain = prompt | chat\n",
|
||||
"chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"input_language\": \"English\",\n",
|
||||
" \"output_language\": \"Korean\",\n",
|
||||
" \"text\": \"I love Python\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "19e53d75935143fd",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"ChatAnthropicMessages also requires the anthropic_api_key argument, or the ANTHROPIC_API_KEY environment variable must be set. \n",
|
||||
"\n",
|
||||
"ChatAnthropicMessages also supports async and streaming functionality:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "e20a139d30e3d333",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-19T11:25:26.012325Z",
|
||||
"start_time": "2024-01-19T11:25:25.288358Z"
|
||||
},
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "AIMessage(content='파이썬을 사랑합니다.')"
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"await chain.ainvoke(\n",
|
||||
" {\n",
|
||||
" \"input_language\": \"English\",\n",
|
||||
" \"output_language\": \"Korean\",\n",
|
||||
" \"text\": \"I love Python\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "6f34f1073d7e7120",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-01-19T11:25:28.323455Z",
|
||||
"start_time": "2024-01-19T11:25:26.012040Z"
|
||||
},
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Here are some of the most famous tourist attractions in Japan:\n",
|
||||
"\n",
|
||||
"- Tokyo Tower - A communication and observation tower in Tokyo modeled after the Eiffel Tower. It offers stunning views of the city.\n",
|
||||
"\n",
|
||||
"- Mount Fuji - Japan's highest and most famous mountain. It's a iconic symbol of Japan and a UNESCO World Heritage Site. \n",
|
||||
"\n",
|
||||
"- Itsukushima Shrine (Miyajima) - A shrine located on an island in Hiroshima prefecture, known for its \"floating\" torii gate that seems to float on water during high tide.\n",
|
||||
"\n",
|
||||
"- Himeji Castle - A UNESCO World Heritage Site famous for having withstood numerous battles without destruction to its intricate white walls and sloping, triangular roofs. \n",
|
||||
"\n",
|
||||
"- Kawaguchiko Station - Near Mount Fuji, this area is known for its scenic Fuji Five Lakes region. \n",
|
||||
"\n",
|
||||
"- Hiroshima Peace Memorial Park and Museum - Commemorates the world's first atomic bombing in Hiroshima on August 6, 1945. \n",
|
||||
"\n",
|
||||
"- Arashiyama Bamboo Grove - A renowned bamboo forest located in Kyoto that draws many visitors.\n",
|
||||
"\n",
|
||||
"- Kegon Falls - One of Japan's largest waterfalls"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [(\"human\", \"Give me a list of famous tourist attractions in Japan\")]\n",
|
||||
")\n",
|
||||
"chain = prompt | chat\n",
|
||||
"for chunk in chain.stream({}):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -83,7 +83,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.messages import HumanMessage"
|
||||
"from langchain.schema import HumanMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -109,7 +109,7 @@
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" SystemMessage(content=\"You are a helpful AI that shares everything you know.\"),\n",
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_openai import AzureChatOpenAI"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -74,11 +74,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models.azureml_endpoint import (\n",
|
||||
" AzureMLEndpointApiType,\n",
|
||||
" LlamaChatContentFormatter,\n",
|
||||
")\n",
|
||||
"from langchain_core.messages import HumanMessage"
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -105,8 +105,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models.azureml_endpoint import LlamaContentFormatter\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"\n",
|
||||
"chat = AzureMLChatOnlineEndpoint(\n",
|
||||
" endpoint_url=\"https://<your-endpoint>.<your_region>.inference.ml.azure.com/score\",\n",
|
||||
|
||||
@@ -29,8 +29,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models import ChatBaichuan\n",
|
||||
"from langchain_core.messages import HumanMessage"
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models import ChatBaichuan"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -47,8 +47,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models import BedrockChat\n",
|
||||
"from langchain_core.messages import HumanMessage"
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models import BedrockChat"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -68,8 +68,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models import ChatDeepInfra\n",
|
||||
"from langchain_core.messages import HumanMessage"
|
||||
"from langchain.chat_models import ChatDeepInfra\n",
|
||||
"from langchain.schema import HumanMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -216,7 +216,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -76,8 +76,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models import ErnieBotChat\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"\n",
|
||||
"chat = ErnieBotChat(\n",
|
||||
" ernie_client_id=\"YOUR_CLIENT_ID\", ernie_client_secret=\"YOUR_CLIENT_SECRET\"\n",
|
||||
|
||||
@@ -73,8 +73,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"from langchain_community.chat_models import ChatEverlyAI\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" SystemMessage(content=\"You are a helpful AI that shares everything you know.\"),\n",
|
||||
@@ -127,8 +127,8 @@
|
||||
],
|
||||
"source": [
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"from langchain_community.chat_models import ChatEverlyAI\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" SystemMessage(content=\"You are a humorous AI that delights people.\"),\n",
|
||||
@@ -185,8 +185,8 @@
|
||||
],
|
||||
"source": [
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"from langchain_community.chat_models import ChatEverlyAI\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" SystemMessage(content=\"You are a humorous AI that delights people.\"),\n",
|
||||
|
||||
@@ -23,14 +23,6 @@
|
||||
"This example goes over how to use LangChain to interact with `ChatFireworks` models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "4a7c795e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"%pip install langchain-fireworks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
@@ -43,8 +35,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from langchain_fireworks import ChatFireworks"
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"from langchain_community.chat_models.fireworks import ChatFireworks"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -54,7 +48,7 @@
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"\n",
|
||||
"1. Make sure the `langchain-fireworks` package is installed in your environment.\n",
|
||||
"1. Make sure the `fireworks-ai` package is installed in your environment.\n",
|
||||
"2. Sign in to [Fireworks AI](http://fireworks.ai) for the an API Key to access our models, and make sure it is set as the `FIREWORKS_API_KEY` environment variable.\n",
|
||||
"3. Set up your model using a model id. If the model is not set, the default model is fireworks-llama-v2-7b-chat. See the full, most up-to-date model list on [app.fireworks.ai](https://app.fireworks.ai)."
|
||||
]
|
||||
@@ -73,7 +67,7 @@
|
||||
" os.environ[\"FIREWORKS_API_KEY\"] = getpass.getpass(\"Fireworks API Key:\")\n",
|
||||
"\n",
|
||||
"# Initialize a Fireworks chat model\n",
|
||||
"chat = ChatFireworks(model=\"accounts/fireworks/models/mixtral-8x7b-instruct\")"
|
||||
"chat = ChatFireworks(model=\"accounts/fireworks/models/llama-v2-13b-chat\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -88,17 +82,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"id": "72340871-ae2f-415f-b399-0777d32dc379",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Hello! I'm an AI language model, a helpful assistant designed to chat and assist you with any questions or information you might need. I'm here to make your experience as smooth and enjoyable as possible. How can I assist you today?\")"
|
||||
"AIMessage(content=\"Hello! My name is LLaMA, I'm a large language model trained by a team of researcher at Meta AI. My primary function is to assist and converse with users like you, answering questions and engaging in discussion to the best of my ability. I'm here to help and provide information on a wide range of topics, so feel free to ask me anything!\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -108,22 +102,22 @@
|
||||
"system_message = SystemMessage(content=\"You are to chat with the user.\")\n",
|
||||
"human_message = HumanMessage(content=\"Who are you?\")\n",
|
||||
"\n",
|
||||
"chat.invoke([system_message, human_message])"
|
||||
"chat([system_message, human_message])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "68c6b1fa-2ff7-4a63-8d88-3cec302180b8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"I'm an AI and do not have the ability to experience the weather firsthand. However,\")"
|
||||
"AIMessage(content=\"Oh hello there! *giggle* It's such a beautiful day today, isn\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -131,70 +125,200 @@
|
||||
"source": [
|
||||
"# Setting additional parameters: temperature, max_tokens, top_p\n",
|
||||
"chat = ChatFireworks(\n",
|
||||
" model=\"accounts/fireworks/models/mixtral-8x7b-instruct\",\n",
|
||||
" temperature=1,\n",
|
||||
" max_tokens=20,\n",
|
||||
" model=\"accounts/fireworks/models/llama-v2-13b-chat\",\n",
|
||||
" model_kwargs={\"temperature\": 1, \"max_tokens\": 20, \"top_p\": 1},\n",
|
||||
")\n",
|
||||
"system_message = SystemMessage(content=\"You are to chat with the user.\")\n",
|
||||
"human_message = HumanMessage(content=\"How's the weather today?\")\n",
|
||||
"chat.invoke([system_message, human_message])"
|
||||
"chat([system_message, human_message])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8c44cb36",
|
||||
"id": "d93aa186-39cf-4e1a-aa32-01ed31d43bc8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tool Calling\n",
|
||||
"# Simple Chat Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "28763fbc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can use chat models on fireworks, with system prompts and memory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "cbe29efc-37c3-4c83-8b84-b8bba1a1e589",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain_community.chat_models import ChatFireworks\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"\n",
|
||||
"Fireworks offers the [`FireFunction-v1` tool calling model](https://fireworks.ai/blog/firefunction-v1-gpt-4-level-function-calling). You can use it for structured output and function calling use cases:"
|
||||
"llm = ChatFireworks(\n",
|
||||
" model=\"accounts/fireworks/models/llama-v2-13b-chat\",\n",
|
||||
" model_kwargs={\"temperature\": 0, \"max_tokens\": 64, \"top_p\": 1.0},\n",
|
||||
")\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You are a helpful chatbot that speaks like a pirate.\"),\n",
|
||||
" MessagesPlaceholder(variable_name=\"history\"),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "02991e05-a38e-47d4-9ab3-7e630a8ead55",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Initially, there is no chat memory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "e2fd186f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'history': []}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"memory = ConversationBufferMemory(return_messages=True)\n",
|
||||
"memory.load_memory_variables({})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bee461da",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create a simple chain with memory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "86972e54",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = (\n",
|
||||
" RunnablePassthrough.assign(\n",
|
||||
" history=memory.load_memory_variables | (lambda x: x[\"history\"])\n",
|
||||
" )\n",
|
||||
" | prompt\n",
|
||||
" | llm.bind(stop=[\"\\n\\n\"])\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f48cb142",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the chain with a simple question, expecting an answer aligned with the system message provided."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "db3ad5b1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Ahoy there, me hearty! Yer a fine lookin' swashbuckler, I can see that! *adjusts eye patch* What be bringin' ye to these waters? Are ye here to plunder some booty or just to enjoy the sea breeze?\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"inputs = {\"input\": \"hi im bob\"}\n",
|
||||
"response = chain.invoke(inputs)\n",
|
||||
"response"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "338f4bae",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Save the memory context, then read it back to inspect contents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "ee2db682",
|
||||
"id": "257eec01",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'function': {'arguments': '{\"name\": \"Erick\", \"age\": 27}',\n",
|
||||
" 'name': 'ExtractFields'},\n",
|
||||
" 'id': 'call_J0WYP2TLenaFw3UeVU0UnWqx',\n",
|
||||
" 'index': 0,\n",
|
||||
" 'type': 'function'}\n"
|
||||
]
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'history': [HumanMessage(content='hi im bob', additional_kwargs={}, example=False),\n",
|
||||
" AIMessage(content=\"Ahoy there, me hearty! Yer a fine lookin' swashbuckler, I can see that! *adjusts eye patch* What be bringin' ye to these waters? Are ye here to plunder some booty or just to enjoy the sea breeze?\", additional_kwargs={}, example=False)]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from pprint import pprint\n",
|
||||
"\n",
|
||||
"from langchain_core.pydantic_v1 import BaseModel\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class ExtractFields(BaseModel):\n",
|
||||
" name: str\n",
|
||||
" age: int\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"chat = ChatFireworks(\n",
|
||||
" model=\"accounts/fireworks/models/firefunction-v1\",\n",
|
||||
").bind_tools([ExtractFields])\n",
|
||||
"\n",
|
||||
"result = chat.invoke(\"I am a 27 year old named Erick\")\n",
|
||||
"\n",
|
||||
"pprint(result.additional_kwargs[\"tool_calls\"][0])"
|
||||
"memory.save_context(inputs, {\"output\": response.content})\n",
|
||||
"memory.load_memory_variables({})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "08441347",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now as another question that requires use of the memory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2321a4e6",
|
||||
"execution_count": 12,
|
||||
"id": "7f5f2820",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Arrrr, ye be askin' about yer name, eh? Well, me matey, I be knowin' ye as Bob, the scurvy dog! *winks* But if ye want me to call ye somethin' else, just let me know, and I\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"inputs = {\"input\": \"whats my name\"}\n",
|
||||
"chain.invoke(inputs)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -213,7 +337,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -75,7 +75,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" SystemMessage(\n",
|
||||
|
||||
@@ -70,9 +70,9 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models import GPTRouter\n",
|
||||
"from langchain_community.chat_models.gpt_router import GPTRouterModel\n",
|
||||
"from langchain_core.messages import HumanMessage"
|
||||
"from langchain_community.chat_models.gpt_router import GPTRouterModel"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,179 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Groq\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Groq\n",
|
||||
"\n",
|
||||
"Install the langchain-groq package if not already installed:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install langchain-groq\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Request an [API key](https://wow.groq.com) and set it as an environment variable:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"export GROQ_API_KEY=<YOUR API KEY>\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Alternatively, you may configure the API key when you initialize ChatGroq."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Import the ChatGroq class and initialize it with a model:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_groq import ChatGroq"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat = ChatGroq(temperature=0, model_name=\"mixtral-8x7b-32768\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can view the available models [here](https://console.groq.com/docs/models).\n",
|
||||
"\n",
|
||||
"If you do not want to set your API key in the environment, you can pass it directly to the client:\n",
|
||||
"```python\n",
|
||||
"chat = ChatGroq(temperature=0, groq_api_key=\"YOUR_API_KEY\", model_name=\"mixtral-8x7b-32768\")\n",
|
||||
"\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Write a prompt and invoke ChatGroq to create completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Low Latency Large Language Models (LLMs) are a type of artificial intelligence model that can understand and generate human-like text. The term \"low latency\" refers to the model\\'s ability to process and respond to inputs quickly, with minimal delay.\\n\\nThe importance of low latency in LLMs can be explained through the following points:\\n\\n1. Improved user experience: In real-time applications such as chatbots, virtual assistants, and interactive games, users expect quick and responsive interactions. Low latency LLMs can provide instant feedback and responses, creating a more seamless and engaging user experience.\\n\\n2. Better decision-making: In time-sensitive scenarios, such as financial trading or autonomous vehicles, low latency LLMs can quickly process and analyze vast amounts of data, enabling faster and more informed decision-making.\\n\\n3. Enhanced accessibility: For individuals with disabilities, low latency LLMs can help create more responsive and inclusive interfaces, such as voice-controlled assistants or real-time captioning systems.\\n\\n4. Competitive advantage: In industries where real-time data analysis and decision-making are crucial, low latency LLMs can provide a competitive edge by enabling businesses to react more quickly to market changes, customer needs, or emerging opportunities.\\n\\n5. Scalability: Low latency LLMs can efficiently handle a higher volume of requests and interactions, making them more suitable for large-scale applications and services.\\n\\nIn summary, low latency is an essential aspect of LLMs, as it significantly impacts user experience, decision-making, accessibility, competitiveness, and scalability. By minimizing delays and response times, low latency LLMs can unlock new possibilities and applications for artificial intelligence in various industries and scenarios.')"
|
||||
]
|
||||
},
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"system = \"You are a helpful assistant.\"\n",
|
||||
"human = \"{text}\"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n",
|
||||
"\n",
|
||||
"chain = prompt | chat\n",
|
||||
"chain.invoke({\"text\": \"Explain the importance of low latency LLMs.\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## `ChatGroq` also supports async and streaming functionality:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"There's a star that shines up in the sky,\\nThe Sun, that makes the day bright and spry.\\nIt rises and sets,\\nIn a daily, predictable bet,\\nGiving life to the world, oh my!\")"
|
||||
]
|
||||
},
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat = ChatGroq(temperature=0, model_name=\"mixtral-8x7b-32768\")\n",
|
||||
"prompt = ChatPromptTemplate.from_messages([(\"human\", \"Write a Limerick about {topic}\")])\n",
|
||||
"chain = prompt | chat\n",
|
||||
"await chain.ainvoke({\"topic\": \"The Sun\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The moon's gentle glow\n",
|
||||
"Illuminates the night sky\n",
|
||||
"Peaceful and serene"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat = ChatGroq(temperature=0, model_name=\"llama2-70b-4096\")\n",
|
||||
"prompt = ChatPromptTemplate.from_messages([(\"human\", \"Write a haiku about {topic}\")])\n",
|
||||
"chain = prompt | chat\n",
|
||||
"for chunk in chain.stream({\"topic\": \"The Moon\"}):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -24,8 +24,8 @@
|
||||
" HumanMessagePromptTemplate,\n",
|
||||
" SystemMessagePromptTemplate,\n",
|
||||
")\n",
|
||||
"from langchain_community.chat_models import JinaChat\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage"
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"from langchain_community.chat_models import JinaChat"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,654 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Kinetica\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Kinetica SqlAssist LLM Demo\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to use Kinetica to transform natural language into SQL\n",
|
||||
"and simplify the process of data retrieval. This demo is intended to show the mechanics\n",
|
||||
"of creating and using a chain as opposed to the capabilities of the LLM.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"With the Kinetica LLM workflow you create an LLM context in the database that provides\n",
|
||||
"information needed for infefencing that includes tables, annotations, rules, and\n",
|
||||
"samples. Invoking ``ChatKinetica.load_messages_from_context()`` will retrieve the\n",
|
||||
"context information from the database so that it can be used to create a chat prompt.\n",
|
||||
"\n",
|
||||
"The chat prompt consists of a ``SystemMessage`` and pairs of\n",
|
||||
"``HumanMessage``/``AIMessage`` that contain the samples which are question/SQL\n",
|
||||
"pairs. You can append pairs samples to this list but it is not intended to\n",
|
||||
"facilitate a typical natural language conversation.\n",
|
||||
"\n",
|
||||
"When you create a chain from the chat prompt and execute it, the Kinetica LLM will\n",
|
||||
"generate SQL from the input. Optionally you can use ``KineticaSqlOutputParser`` to\n",
|
||||
"execute the SQL and return the result as a dataframe.\n",
|
||||
"\n",
|
||||
"Currently, 2 LLM's are supported for SQL generation: \n",
|
||||
"\n",
|
||||
"1. **Kinetica SQL-GPT**: This LLM is based on OpenAI ChatGPT API.\n",
|
||||
"2. **Kinetica SqlAssist**: This LLM is purpose built to integrate with the Kinetica\n",
|
||||
" database and it can run in a secure customer premise.\n",
|
||||
"\n",
|
||||
"For this demo we will be using **SqlAssist**. See the [Kinetica Documentation\n",
|
||||
"site](https://docs.kinetica.com/7.1/sql-gpt/concepts/) for more information.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"To get started you will need a Kinetica DB instance. If you don't have one you can\n",
|
||||
"obtain a [free development instance](https://cloud.kinetica.com/trynow).\n",
|
||||
"\n",
|
||||
"You will need to install the following packages..."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Install Langchain community and core packages\n",
|
||||
"%pip install --upgrade --quiet langchain-core langchain-community\n",
|
||||
"\n",
|
||||
"# Install Kineitca DB connection package\n",
|
||||
"%pip install --upgrade --quiet gpudb typeguard\n",
|
||||
"\n",
|
||||
"# Install packages needed for this tutorial\n",
|
||||
"%pip install --upgrade --quiet faker"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Database Connection\n",
|
||||
"\n",
|
||||
"You must set the database connection in the following environment variables. If you are using a virtual environment you can set them in the `.env` file of the project:\n",
|
||||
"* `KINETICA_URL`: Database connection URL\n",
|
||||
"* `KINETICA_USER`: Database user\n",
|
||||
"* `KINETICA_PASSWD`: Secure password.\n",
|
||||
"\n",
|
||||
"If you can create an instance of `KineticaChatLLM` then you are successfully connected."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models.kinetica import ChatKinetica\n",
|
||||
"\n",
|
||||
"kinetica_llm = ChatKinetica()\n",
|
||||
"\n",
|
||||
"# Test table we will create\n",
|
||||
"table_name = \"demo.user_profiles\"\n",
|
||||
"\n",
|
||||
"# LLM Context we will create\n",
|
||||
"kinetica_ctx = \"demo.test_llm_ctx\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create test data\n",
|
||||
"\n",
|
||||
"Before we can generate SQL we will need to create a Kinetica table and an LLM context that can inference the table.\n",
|
||||
"\n",
|
||||
"### Create some fake user profiles\n",
|
||||
"\n",
|
||||
"We will use the `faker` package to create a dataframe with 100 fake profiles."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>username</th>\n",
|
||||
" <th>name</th>\n",
|
||||
" <th>sex</th>\n",
|
||||
" <th>address</th>\n",
|
||||
" <th>mail</th>\n",
|
||||
" <th>birthdate</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>id</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>eduardo69</td>\n",
|
||||
" <td>Haley Beck</td>\n",
|
||||
" <td>F</td>\n",
|
||||
" <td>59836 Carla Causeway Suite 939\\nPort Eugene, I...</td>\n",
|
||||
" <td>meltondenise@yahoo.com</td>\n",
|
||||
" <td>1997-09-09</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>lbarrera</td>\n",
|
||||
" <td>Joshua Stephens</td>\n",
|
||||
" <td>M</td>\n",
|
||||
" <td>3108 Christina Forges\\nPort Timothychester, KY...</td>\n",
|
||||
" <td>erica80@hotmail.com</td>\n",
|
||||
" <td>1924-05-05</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>bburton</td>\n",
|
||||
" <td>Paula Kaiser</td>\n",
|
||||
" <td>F</td>\n",
|
||||
" <td>Unit 7405 Box 3052\\nDPO AE 09858</td>\n",
|
||||
" <td>timothypotts@gmail.com</td>\n",
|
||||
" <td>1933-09-06</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>melissa49</td>\n",
|
||||
" <td>Wendy Reese</td>\n",
|
||||
" <td>F</td>\n",
|
||||
" <td>6408 Christopher Hill Apt. 459\\nNew Benjamin, ...</td>\n",
|
||||
" <td>dadams@gmail.com</td>\n",
|
||||
" <td>1988-07-28</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>melissacarter</td>\n",
|
||||
" <td>Manuel Rios</td>\n",
|
||||
" <td>M</td>\n",
|
||||
" <td>2241 Bell Gardens Suite 723\\nScottside, CA 38463</td>\n",
|
||||
" <td>williamayala@gmail.com</td>\n",
|
||||
" <td>1930-12-19</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" username name sex \\\n",
|
||||
"id \n",
|
||||
"0 eduardo69 Haley Beck F \n",
|
||||
"1 lbarrera Joshua Stephens M \n",
|
||||
"2 bburton Paula Kaiser F \n",
|
||||
"3 melissa49 Wendy Reese F \n",
|
||||
"4 melissacarter Manuel Rios M \n",
|
||||
"\n",
|
||||
" address mail \\\n",
|
||||
"id \n",
|
||||
"0 59836 Carla Causeway Suite 939\\nPort Eugene, I... meltondenise@yahoo.com \n",
|
||||
"1 3108 Christina Forges\\nPort Timothychester, KY... erica80@hotmail.com \n",
|
||||
"2 Unit 7405 Box 3052\\nDPO AE 09858 timothypotts@gmail.com \n",
|
||||
"3 6408 Christopher Hill Apt. 459\\nNew Benjamin, ... dadams@gmail.com \n",
|
||||
"4 2241 Bell Gardens Suite 723\\nScottside, CA 38463 williamayala@gmail.com \n",
|
||||
"\n",
|
||||
" birthdate \n",
|
||||
"id \n",
|
||||
"0 1997-09-09 \n",
|
||||
"1 1924-05-05 \n",
|
||||
"2 1933-09-06 \n",
|
||||
"3 1988-07-28 \n",
|
||||
"4 1930-12-19 "
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from typing import Generator\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"from faker import Faker\n",
|
||||
"\n",
|
||||
"Faker.seed(5467)\n",
|
||||
"faker = Faker(locale=\"en-US\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def profile_gen(count: int) -> Generator:\n",
|
||||
" for id in range(0, count):\n",
|
||||
" rec = dict(id=id, **faker.simple_profile())\n",
|
||||
" rec[\"birthdate\"] = pd.Timestamp(rec[\"birthdate\"])\n",
|
||||
" yield rec\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"load_df = pd.DataFrame.from_records(data=profile_gen(100), index=\"id\")\n",
|
||||
"load_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a Kinetica table from the Dataframe"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>name</th>\n",
|
||||
" <th>type</th>\n",
|
||||
" <th>properties</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>username</td>\n",
|
||||
" <td>string</td>\n",
|
||||
" <td>[char32]</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>name</td>\n",
|
||||
" <td>string</td>\n",
|
||||
" <td>[char32]</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>sex</td>\n",
|
||||
" <td>string</td>\n",
|
||||
" <td>[char1]</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>address</td>\n",
|
||||
" <td>string</td>\n",
|
||||
" <td>[char64]</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>mail</td>\n",
|
||||
" <td>string</td>\n",
|
||||
" <td>[char32]</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>5</th>\n",
|
||||
" <td>birthdate</td>\n",
|
||||
" <td>long</td>\n",
|
||||
" <td>[timestamp]</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" name type properties\n",
|
||||
"0 username string [char32]\n",
|
||||
"1 name string [char32]\n",
|
||||
"2 sex string [char1]\n",
|
||||
"3 address string [char64]\n",
|
||||
"4 mail string [char32]\n",
|
||||
"5 birthdate long [timestamp]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from gpudb import GPUdbTable\n",
|
||||
"\n",
|
||||
"gpudb_table = GPUdbTable.from_df(\n",
|
||||
" load_df,\n",
|
||||
" db=kinetica_llm.kdbc,\n",
|
||||
" table_name=table_name,\n",
|
||||
" clear_table=True,\n",
|
||||
" load_data=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# See the Kinetica column types\n",
|
||||
"gpudb_table.type_as_df()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create the LLM context\n",
|
||||
"\n",
|
||||
"You can create an LLM Context using the Kinetica Workbench UI or you can manually create it with the `CREATE OR REPLACE CONTEXT` syntax. \n",
|
||||
"\n",
|
||||
"Here we create a context from the SQL syntax referencing the table we created."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'status': 'OK',\n",
|
||||
" 'message': '',\n",
|
||||
" 'data_type': 'execute_sql_response',\n",
|
||||
" 'response_time': 0.0148}"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# create an LLM context for the table.\n",
|
||||
"\n",
|
||||
"from gpudb import GPUdbException\n",
|
||||
"\n",
|
||||
"sql = f\"\"\"\n",
|
||||
"CREATE OR REPLACE CONTEXT {kinetica_ctx}\n",
|
||||
"(\n",
|
||||
" TABLE = demo.test_profiles\n",
|
||||
" COMMENT = 'Contains user profiles.'\n",
|
||||
"),\n",
|
||||
"(\n",
|
||||
" SAMPLES = (\n",
|
||||
" 'How many male users are there?' = \n",
|
||||
" 'select count(1) as num_users\n",
|
||||
" from demo.test_profiles\n",
|
||||
" where sex = ''M'';')\n",
|
||||
")\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def _check_error(response: dict) -> None:\n",
|
||||
" status = response[\"status_info\"][\"status\"]\n",
|
||||
" if status != \"OK\":\n",
|
||||
" message = response[\"status_info\"][\"message\"]\n",
|
||||
" raise GPUdbException(\"[%s]: %s\" % (status, message))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"response = kinetica_llm.kdbc.execute_sql(sql)\n",
|
||||
"_check_error(response)\n",
|
||||
"response[\"status_info\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use Langchain for inferencing\n",
|
||||
"\n",
|
||||
"In the example below we will create a chain from the previously created table and LLM context. This chain will generate SQL and return the resulting data as a dataframe.\n",
|
||||
"\n",
|
||||
"### Load the chat prompt from the Kinetica DB\n",
|
||||
"\n",
|
||||
"The `load_messages_from_context()` function will retrieve a context from the DB and convert it into a list of chat messages that we use to create a ``ChatPromptTemplate``."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"================================\u001b[1m System Message \u001b[0m================================\n",
|
||||
"\n",
|
||||
"CREATE TABLE demo.test_profiles AS\n",
|
||||
"(\n",
|
||||
" username VARCHAR (32) NOT NULL,\n",
|
||||
" name VARCHAR (32) NOT NULL,\n",
|
||||
" sex VARCHAR (1) NOT NULL,\n",
|
||||
" address VARCHAR (64) NOT NULL,\n",
|
||||
" mail VARCHAR (32) NOT NULL,\n",
|
||||
" birthdate TIMESTAMP NOT NULL\n",
|
||||
");\n",
|
||||
"COMMENT ON TABLE demo.test_profiles IS 'Contains user profiles.';\n",
|
||||
"\n",
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"How many male users are there?\n",
|
||||
"\n",
|
||||
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
||||
"\n",
|
||||
"select count(1) as num_users\n",
|
||||
" from demo.test_profiles\n",
|
||||
" where sex = 'M';\n",
|
||||
"\n",
|
||||
"================================\u001b[1m Human Message \u001b[0m=================================\n",
|
||||
"\n",
|
||||
"\u001b[33;1m\u001b[1;3m{input}\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"# load the context from the database\n",
|
||||
"ctx_messages = kinetica_llm.load_messages_from_context(kinetica_ctx)\n",
|
||||
"\n",
|
||||
"# Add the input prompt. This is where input question will be substituted.\n",
|
||||
"ctx_messages.append((\"human\", \"{input}\"))\n",
|
||||
"\n",
|
||||
"# Create the prompt template.\n",
|
||||
"prompt_template = ChatPromptTemplate.from_messages(ctx_messages)\n",
|
||||
"prompt_template.pretty_print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create the chain\n",
|
||||
"\n",
|
||||
"The last element of this chain is `KineticaSqlOutputParser` that will execute the SQL and return a dataframe. This is optional and if we left it out then only SQL would be returned."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models.kinetica import (\n",
|
||||
" KineticaSqlOutputParser,\n",
|
||||
" KineticaSqlResponse,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt_template | kinetica_llm | KineticaSqlOutputParser(kdbc=kinetica_llm.kdbc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate the SQL\n",
|
||||
"\n",
|
||||
"The chain we created will take a question as input and return a ``KineticaSqlResponse`` containing the generated SQL and data. The question must be relevant to the to LLM context we used to create the prompt."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"SQL: SELECT username, name\n",
|
||||
" FROM demo.test_profiles\n",
|
||||
" WHERE sex = 'F'\n",
|
||||
" ORDER BY username;\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>username</th>\n",
|
||||
" <th>name</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>alexander40</td>\n",
|
||||
" <td>Tina Ramirez</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>bburton</td>\n",
|
||||
" <td>Paula Kaiser</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>brian12</td>\n",
|
||||
" <td>Stefanie Williams</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>brownanna</td>\n",
|
||||
" <td>Jennifer Rowe</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>carl19</td>\n",
|
||||
" <td>Amanda Potts</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" username name\n",
|
||||
"0 alexander40 Tina Ramirez\n",
|
||||
"1 bburton Paula Kaiser\n",
|
||||
"2 brian12 Stefanie Williams\n",
|
||||
"3 brownanna Jennifer Rowe\n",
|
||||
"4 carl19 Amanda Potts"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Here you must ask a question relevant to the LLM context provided in the prompt template.\n",
|
||||
"response: KineticaSqlResponse = chain.invoke(\n",
|
||||
" {\"input\": \"What are the female users ordered by username?\"}\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(f\"SQL: {response.sql}\")\n",
|
||||
"response.dataframe.head()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -40,8 +40,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models import ChatKonko\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage"
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"from langchain_community.chat_models import ChatKonko"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -32,8 +32,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models import ChatLiteLLM\n",
|
||||
"from langchain_core.messages import HumanMessage"
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models import ChatLiteLLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -38,8 +38,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models import ChatLiteLLMRouter\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from litellm import Router"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -54,7 +54,7 @@
|
||||
" HumanMessagePromptTemplate,\n",
|
||||
" MessagesPlaceholder,\n",
|
||||
")\n",
|
||||
"from langchain_core.messages import SystemMessage\n",
|
||||
"from langchain.schema import SystemMessage\n",
|
||||
"\n",
|
||||
"template_messages = [\n",
|
||||
" SystemMessage(content=\"You are a helpful assistant.\"),\n",
|
||||
|
||||
@@ -39,8 +39,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models import MiniMaxChat\n",
|
||||
"from langchain_core.messages import HumanMessage"
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models import MiniMaxChat"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -278,7 +278,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
@@ -313,8 +313,8 @@
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models import ChatOllama\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
@@ -463,8 +463,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models import ChatOllama\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"\n",
|
||||
"llm = ChatOllama(model=\"bakllava\", temperature=0)\n",
|
||||
"\n",
|
||||
|
||||
@@ -102,7 +102,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"\n",
|
||||
"model.invoke(\"what is the weather in Boston?\")"
|
||||
]
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
" HumanMessagePromptTemplate,\n",
|
||||
" SystemMessagePromptTemplate,\n",
|
||||
")\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -62,8 +62,8 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_community.chat_models import PromptLayerChatOpenAI\n",
|
||||
"from langchain_core.messages import HumanMessage"
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models import PromptLayerChatOpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -30,8 +30,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\"\"\"For basic init and call\"\"\"\n",
|
||||
"from langchain_community.chat_models import ChatSparkLLM\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langchain.chat_models import ChatSparkLLM\n",
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"\n",
|
||||
"chat = ChatSparkLLM(\n",
|
||||
" spark_app_id=\"<app_id>\", spark_api_key=\"<api_key>\", spark_api_secret=\"<api_secret>\"\n",
|
||||
|
||||
@@ -36,8 +36,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models import ChatHunyuan\n",
|
||||
"from langchain_core.messages import HumanMessage"
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models import ChatHunyuan"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -100,8 +100,8 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models.tongyi import ChatTongyi\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"\n",
|
||||
"chatLLM = ChatTongyi(\n",
|
||||
" streaming=True,\n",
|
||||
@@ -128,7 +128,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" SystemMessage(\n",
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
" HumanMessagePromptTemplate,\n",
|
||||
" SystemMessagePromptTemplate,\n",
|
||||
")\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -48,8 +48,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models import VolcEngineMaasChat\n",
|
||||
"from langchain_core.messages import HumanMessage"
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"from langchain_community.chat_models import VolcEngineMaasChat"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -58,8 +58,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models import ChatYandexGPT\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage"
|
||||
"from langchain.schema import HumanMessage, SystemMessage\n",
|
||||
"from langchain_community.chat_models import ChatYandexGPT"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -79,8 +79,8 @@
|
||||
"import re\n",
|
||||
"from typing import Iterator, List\n",
|
||||
"\n",
|
||||
"from langchain.schema import BaseMessage, HumanMessage\n",
|
||||
"from langchain_community.chat_loaders import base as chat_loaders\n",
|
||||
"from langchain_core.messages import BaseMessage, HumanMessage\n",
|
||||
"\n",
|
||||
"logger = logging.getLogger()\n",
|
||||
"\n",
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
"import json\n",
|
||||
"\n",
|
||||
"from langchain.adapters.openai import convert_message_to_dict\n",
|
||||
"from langchain_core.messages import AIMessage"
|
||||
"from langchain.schema import AIMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -78,8 +78,8 @@
|
||||
"import re\n",
|
||||
"from typing import Iterator, List\n",
|
||||
"\n",
|
||||
"from langchain.schema import BaseMessage, HumanMessage\n",
|
||||
"from langchain_community.chat_loaders import base as chat_loaders\n",
|
||||
"from langchain_core.messages import BaseMessage, HumanMessage\n",
|
||||
"\n",
|
||||
"logger = logging.getLogger()\n",
|
||||
"\n",
|
||||
|
||||
@@ -1,292 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3a5ebf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AirbyteLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35ac77b1-449b-44f7-b8f3-3494d55c286e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
">[Airbyte](https://github.com/airbytehq/airbyte) is a data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes. It has the largest catalog of ELT connectors to data warehouses and databases.\n",
|
||||
"\n",
|
||||
"This covers how to load any source from Airbyte into LangChain documents\n",
|
||||
"\n",
|
||||
"## Installation\n",
|
||||
"\n",
|
||||
"In order to use `AirbyteLoader` you need to install the `langchain-airbyte` integration package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "180c8b74",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"% pip install -qU langchain-airbyte"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3dd92c62",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Loading Documents\n",
|
||||
"\n",
|
||||
"By default, the `AirbyteLoader` will load any structured data from a stream and output yaml-formatted documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "721d9316",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"```yaml\n",
|
||||
"academic_degree: PhD\n",
|
||||
"address:\n",
|
||||
" city: Lauderdale Lakes\n",
|
||||
" country_code: FI\n",
|
||||
" postal_code: '75466'\n",
|
||||
" province: New Jersey\n",
|
||||
" state: Hawaii\n",
|
||||
" street_name: Stoneyford\n",
|
||||
" street_number: '1112'\n",
|
||||
"age: 44\n",
|
||||
"blood_type: \"O\\u2212\"\n",
|
||||
"created_at: '2004-04-02T13:05:27+00:00'\n",
|
||||
"email: bread2099+1@outlook.com\n",
|
||||
"gender: Fluid\n",
|
||||
"height: '1.62'\n",
|
||||
"id: 1\n",
|
||||
"language: Belarusian\n",
|
||||
"name: Moses\n",
|
||||
"nationality: Dutch\n",
|
||||
"occupation: Track Worker\n",
|
||||
"telephone: 1-467-194-2318\n",
|
||||
"title: M.Sc.Tech.\n",
|
||||
"updated_at: '2024-02-27T16:41:01+00:00'\n",
|
||||
"weight: 6\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_airbyte import AirbyteLoader\n",
|
||||
"\n",
|
||||
"loader = AirbyteLoader(\n",
|
||||
" source=\"source-faker\",\n",
|
||||
" stream=\"users\",\n",
|
||||
" config={\"count\": 10},\n",
|
||||
")\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0].page_content[:500])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fca024cb",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"source": [
|
||||
"You can also specify a custom prompt template for formatting documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "9fa002a5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"My name is Verdie and I am 1.73 meters tall.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"\n",
|
||||
"loader_templated = AirbyteLoader(\n",
|
||||
" source=\"source-faker\",\n",
|
||||
" stream=\"users\",\n",
|
||||
" config={\"count\": 10},\n",
|
||||
" template=PromptTemplate.from_template(\n",
|
||||
" \"My name is {name} and I am {height} meters tall.\"\n",
|
||||
" ),\n",
|
||||
")\n",
|
||||
"docs_templated = loader_templated.load()\n",
|
||||
"print(docs_templated[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d3e6d887",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Loading Documents\n",
|
||||
"\n",
|
||||
"One of the powerful features of `AirbyteLoader` is its ability to load large documents from upstream sources. When working with large datasets, the default `.load()` behavior can be slow and memory-intensive. To avoid this, you can use the `.lazy_load()` method to load documents in a more memory-efficient manner."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "684b9187",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Just calling lazy load is quick! This took 0.0001 seconds\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"\n",
|
||||
"loader = AirbyteLoader(\n",
|
||||
" source=\"source-faker\",\n",
|
||||
" stream=\"users\",\n",
|
||||
" config={\"count\": 3},\n",
|
||||
" template=PromptTemplate.from_template(\n",
|
||||
" \"My name is {name} and I am {height} meters tall.\"\n",
|
||||
" ),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"start_time = time.time()\n",
|
||||
"my_iterator = loader.lazy_load()\n",
|
||||
"print(\n",
|
||||
" f\"Just calling lazy load is quick! This took {time.time() - start_time:.4f} seconds\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6b24a64b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And you can iterate over documents as they're yielded:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "3e8355d0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"My name is Andera and I am 1.91 meters tall.\n",
|
||||
"My name is Jody and I am 1.85 meters tall.\n",
|
||||
"My name is Zonia and I am 1.53 meters tall.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for doc in my_iterator:\n",
|
||||
" print(doc.page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d1040d81",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also lazy load documents in an async manner with `.alazy_load()`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "dc5d0911",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"My name is Carmelina and I am 1.74 meters tall.\n",
|
||||
"My name is Ali and I am 1.90 meters tall.\n",
|
||||
"My name is Rochell and I am 1.83 meters tall.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = AirbyteLoader(\n",
|
||||
" source=\"source-faker\",\n",
|
||||
" stream=\"users\",\n",
|
||||
" config={\"count\": 3},\n",
|
||||
" template=PromptTemplate.from_template(\n",
|
||||
" \"My name is {name} and I am {height} meters tall.\"\n",
|
||||
" ),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"my_async_iterator = loader.alazy_load()\n",
|
||||
"\n",
|
||||
"async for doc in my_async_iterator:\n",
|
||||
" print(doc.page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ba4ede33",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configuration\n",
|
||||
"\n",
|
||||
"`AirbyteLoader` can be configured with the following options:\n",
|
||||
"\n",
|
||||
"- `source` (str, required): The name of the Airbyte source to load from.\n",
|
||||
"- `stream` (str, required): The name of the stream to load from (Airbyte sources can return multiple streams)\n",
|
||||
"- `config` (dict, required): The configuration for the Airbyte source\n",
|
||||
"- `template` (PromptTemplate, optional): A custom prompt template for formatting documents\n",
|
||||
"- `include_metadata` (bool, optional, default True): Whether to include all fields as metadata in the output documents\n",
|
||||
"\n",
|
||||
"The majority of the configuration will be in `config`, and you can find the specific configuration options in the \"Config field reference\" for each source in the [Airbyte documentation](https://docs.airbyte.com/integrations/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2e2ed269",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
-- Provisioning table "mlb_teams_2012".
|
||||
--
|
||||
-- psql postgresql://postgres@localhost < mlb_teams_2012.sql
|
||||
|
||||
DROP TABLE IF EXISTS mlb_teams_2012;
|
||||
CREATE TABLE mlb_teams_2012 ("Team" VARCHAR, "Payroll (millions)" FLOAT, "Wins" BIGINT);
|
||||
INSERT INTO mlb_teams_2012
|
||||
("Team", "Payroll (millions)", "Wins")
|
||||
VALUES
|
||||
('Nationals', 81.34, 98),
|
||||
('Reds', 82.20, 97),
|
||||
('Yankees', 197.96, 95),
|
||||
('Giants', 117.62, 94),
|
||||
('Braves', 83.31, 94),
|
||||
('Athletics', 55.37, 94),
|
||||
('Rangers', 120.51, 93),
|
||||
('Orioles', 81.43, 93),
|
||||
('Rays', 64.17, 90),
|
||||
('Angels', 154.49, 89),
|
||||
('Tigers', 132.30, 88),
|
||||
('Cardinals', 110.30, 88),
|
||||
('Dodgers', 95.14, 86),
|
||||
('White Sox', 96.92, 85),
|
||||
('Brewers', 97.65, 83),
|
||||
('Phillies', 174.54, 81),
|
||||
('Diamondbacks', 74.28, 81),
|
||||
('Pirates', 63.43, 79),
|
||||
('Padres', 55.24, 76),
|
||||
('Mariners', 81.97, 75),
|
||||
('Mets', 93.35, 74),
|
||||
('Blue Jays', 75.48, 73),
|
||||
('Royals', 60.91, 72),
|
||||
('Marlins', 118.07, 69),
|
||||
('Red Sox', 173.18, 69),
|
||||
('Indians', 78.43, 68),
|
||||
('Twins', 94.08, 66),
|
||||
('Rockies', 78.06, 64),
|
||||
('Cubs', 88.19, 61),
|
||||
('Astros', 60.65, 55)
|
||||
;
|
||||
@@ -1,380 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "E_RJy7C1bpCT"
|
||||
},
|
||||
"source": [
|
||||
"# Google AlloyDB for PostgreSQL\n",
|
||||
"\n",
|
||||
"> [AlloyDB](https://cloud.google.com/alloydb) is a fully managed relational database service that offers high performance, seamless integration, and impressive scalability. AlloyDB is 100% compatible with PostgreSQL. Extend your database application to build AI-powered experiences leveraging AlloyDB's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use `AlloyDB for PostgreSQL` to load Documents with the `AlloyDBLoader` class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "xjcxaw6--Xyy"
|
||||
},
|
||||
"source": [
|
||||
"## Before you begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
" * [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
" * [Enable the AlloyDB Admin API.](https://console.cloud.google.com/flows/enableapi?apiid=alloydb.googleapis.com)\n",
|
||||
" * [Create a AlloyDB cluster and instance.](https://cloud.google.com/alloydb/docs/cluster-create)\n",
|
||||
" * [Create a AlloyDB database.](https://cloud.google.com/alloydb/docs/quickstart/create-and-connect)\n",
|
||||
" * [Add a User to the database.](https://cloud.google.com/alloydb/docs/database-users/about)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "IR54BmgvdHT_"
|
||||
},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"Install the integration library, `langchain-google-alloydb-pg`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 1000
|
||||
},
|
||||
"id": "0ZITIDE160OD",
|
||||
"outputId": "90e0636e-ff34-4e1e-ad37-d2a6db4a317e"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-google-alloydb-pg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "v40bB_GMcr9f"
|
||||
},
|
||||
"source": [
|
||||
"**Colab only:** Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "6o0iGVIdDD6K"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "cTXTbj4UltKf"
|
||||
},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"* If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"* If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Uj02bMRAc9_c"
|
||||
},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "wnp1R1PYc9_c",
|
||||
"outputId": "6502c721-a2fd-451f-b946-9f7b850d5966"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @title Project { display-mode: \"form\" }\n",
|
||||
"PROJECT_ID = \"gcp_project_id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"! gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "rEWWNoNnKOgq",
|
||||
"metadata": {
|
||||
"id": "rEWWNoNnKOgq"
|
||||
},
|
||||
"source": [
|
||||
"### 💡 API Enablement\n",
|
||||
"The `langchain-google-alloydb-pg` package requires that you [enable the AlloyDB Admin API](https://console.cloud.google.com/flows/enableapi?apiid=alloydb.googleapis.com) in your Google Cloud Project."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "5utKIdq7KYi5",
|
||||
"metadata": {
|
||||
"id": "5utKIdq7KYi5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# enable AlloyDB Admin API\n",
|
||||
"!gcloud services enable alloydb.googleapis.com"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f8f2830ee9ca1e01",
|
||||
"metadata": {
|
||||
"id": "f8f2830ee9ca1e01"
|
||||
},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "OMvzMWRrR6n7",
|
||||
"metadata": {
|
||||
"id": "OMvzMWRrR6n7"
|
||||
},
|
||||
"source": [
|
||||
"### Set AlloyDB database variables\n",
|
||||
"Find your database values, in the [AlloyDB Instances page](https://console.cloud.google.com/alloydb/clusters)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "irl7eMFnSPZr",
|
||||
"metadata": {
|
||||
"id": "irl7eMFnSPZr"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @title Set Your Values Here { display-mode: \"form\" }\n",
|
||||
"REGION = \"us-central1\" # @param {type: \"string\"}\n",
|
||||
"CLUSTER = \"my-cluster\" # @param {type: \"string\"}\n",
|
||||
"INSTANCE = \"my-primary\" # @param {type: \"string\"}\n",
|
||||
"DATABASE = \"my-database\" # @param {type: \"string\"}\n",
|
||||
"TABLE_NAME = \"vector_store\" # @param {type: \"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "QuQigs4UoFQ2",
|
||||
"metadata": {
|
||||
"id": "QuQigs4UoFQ2"
|
||||
},
|
||||
"source": [
|
||||
"### AlloyDBEngine Connection Pool\n",
|
||||
"\n",
|
||||
"One of the requirements and arguments to establish AlloyDB as a vector store is a `AlloyDBEngine` object. The `AlloyDBEngine` configures a connection pool to your AlloyDB database, enabling successful connections from your application and following industry best practices.\n",
|
||||
"\n",
|
||||
"To create a `AlloyDBEngine` using `AlloyDBEngine.from_instance()` you need to provide only 5 things:\n",
|
||||
"\n",
|
||||
"1. `project_id` : Project ID of the Google Cloud Project where the AlloyDB instance is located.\n",
|
||||
"1. `region` : Region where the AlloyDB instance is located.\n",
|
||||
"1. `cluster`: The name of the AlloyDB cluster.\n",
|
||||
"1. `instance` : The name of the AlloyDB instance.\n",
|
||||
"1. `database` : The name of the database to connect to on the AlloyDB instance.\n",
|
||||
"\n",
|
||||
"By default, [IAM database authentication](https://cloud.google.com/alloydb/docs/connect-iam) will be used as the method of database authentication. This library uses the IAM principal belonging to the [Application Default Credentials (ADC)](https://cloud.google.com/docs/authentication/application-default-credentials) sourced from the environment.\n",
|
||||
"\n",
|
||||
"Optionally, [built-in database authentication](https://cloud.google.com/alloydb/docs/database-users/about) using a username and password to access the AlloyDB database can also be used. Just provide the optional `user` and `password` arguments to `AlloyDBEngine.from_instance()`:\n",
|
||||
"\n",
|
||||
"* `user` : Database user to use for built-in database authentication and login\n",
|
||||
"* `password` : Database password to use for built-in database authentication and login.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Note**: This tutorial demonstrates the async interface. All async methods have corresponding sync methods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_alloydb_pg import AlloyDBEngine\n",
|
||||
"\n",
|
||||
"engine = await AlloyDBEngine.afrom_instance(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" region=REGION,\n",
|
||||
" cluster=CLUSTER,\n",
|
||||
" instance=INSTANCE,\n",
|
||||
" database=DATABASE,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "e1tl0aNx7SWy"
|
||||
},
|
||||
"source": [
|
||||
"### Create AlloyDBLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "z-AZyzAQ7bsf"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_alloydb_pg import AlloyDBLoader\n",
|
||||
"\n",
|
||||
"# Creating a basic AlloyDBLoader object\n",
|
||||
"loader = await AlloyDBLoader.create(engine, table_name=TABLE_NAME)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "PeOMpftjc9_e"
|
||||
},
|
||||
"source": [
|
||||
"### Load Documents via default table\n",
|
||||
"The loader returns a list of Documents from the table using the first column as page_content and all other columns as metadata. The default table will have the first column as\n",
|
||||
"page_content and the second column as metadata (JSON). Each row becomes a document."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "cwvi_O5Wc9_e"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = await loader.aload()\n",
|
||||
"print(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "kSkL9l1Hc9_e"
|
||||
},
|
||||
"source": [
|
||||
"### Load documents via custom table/metadata or custom page content columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = await AlloyDBLoader.create(\n",
|
||||
" engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
" content_columns=[\"product_name\"], # Optional\n",
|
||||
" metadata_columns=[\"id\"], # Optional\n",
|
||||
")\n",
|
||||
"docs = await loader.aload()\n",
|
||||
"print(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "5R6h0_Cvc9_f"
|
||||
},
|
||||
"source": [
|
||||
"### Set page content format\n",
|
||||
"The loader returns a list of Documents, with one document per row, with page content in specified string format, i.e. text (space separated concatenation), JSON, YAML, CSV, etc. JSON and YAML formats include headers, while text and CSV do not include field headers.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "NGNdS7cqc9_f"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = AlloyDBLoader.create(\n",
|
||||
" engine,\n",
|
||||
" table_name=\"products\",\n",
|
||||
" content_columns=[\"product_name\", \"description\"],\n",
|
||||
" format=\"YAML\",\n",
|
||||
")\n",
|
||||
"docs = await loader.aload()\n",
|
||||
"print(docs)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"toc_visible": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,469 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Bigtable\n",
|
||||
"\n",
|
||||
"> [Bigtable](https://cloud.google.com/bigtable) is a key-value and wide-column store, ideal for fast access to structured, semi-structured, or unstructured data. Extend your database application to build AI-powered experiences leveraging Bigtable's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use [Bigtable](https://cloud.google.com/bigtable) to [save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/) with `BigtableLoader` and `BigtableSaver`.\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/googleapis/langchain-google-bigtable-python/blob/main/docs/document_loader.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
"* [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
"* [Create a Bigtable instance](https://cloud.google.com/bigtable/docs/creating-instance)\n",
|
||||
"* [Create a Bigtable table](https://cloud.google.com/bigtable/docs/managing-tables)\n",
|
||||
"* [Create Bigtable access credentials](https://developers.google.com/workspace/guides/create-credentials)\n",
|
||||
"\n",
|
||||
"After confirmed access to database in the runtime environment of this notebook, filling the following values and run the cell before running example scripts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please specify an instance and a table for demo purpose.\n",
|
||||
"INSTANCE_ID = \"my_instance\" # @param {type:\"string\"}\n",
|
||||
"TABLE_ID = \"my_table\" # @param {type:\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-bigtable` package, so we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -upgrade --quiet langchain-google-bigtable"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Colab only**: Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the value below with your Google Cloud project ID and then run the cell.\n",
|
||||
"\n",
|
||||
"PROJECT_ID = \"my-project-id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"!gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"- If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"- If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using the saver\n",
|
||||
"\n",
|
||||
"Save langchain documents with `BigtableSaver.add_documents(<documents>)`. To initialize `BigtableSaver` class you need to provide 2 things:\n",
|
||||
"\n",
|
||||
"1. `instance_id` - An instance of Bigtable.\n",
|
||||
"1. `table_id` - The name of the table within the Bigtable to store langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_google_bigtable import BigtableSaver\n",
|
||||
"\n",
|
||||
"test_docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Apple Granny Smith 150 0.99 1\",\n",
|
||||
" metadata={\"fruit_id\": 1},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Banana Cavendish 200 0.59 0\",\n",
|
||||
" metadata={\"fruit_id\": 2},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Orange Navel 80 1.29 1\",\n",
|
||||
" metadata={\"fruit_id\": 3},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"saver = BigtableSaver(\n",
|
||||
" instance_id=INSTANCE_ID,\n",
|
||||
" table_id=TABLE_ID,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"saver.add_documents(test_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Querying for Documents from Bigtable\n",
|
||||
"For more details on connecting to a Bigtable table, please check the [Python SDK documentation](https://cloud.google.com/python/docs/reference/bigtable/latest/client)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Load documents from table\n",
|
||||
"\n",
|
||||
"Load langchain documents with `BigtableLoader.load()` or `BigtableLoader.lazy_load()`. `lazy_load` returns a generator that only queries database during the iteration. To initialize `BigtableLoader` class you need to provide:\n",
|
||||
"\n",
|
||||
"1. `instance_id` - An instance of Bigtable.\n",
|
||||
"1. `table_id` - The name of the table within the Bigtable to store langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_bigtable import BigtableLoader\n",
|
||||
"\n",
|
||||
"loader = BigtableLoader(\n",
|
||||
" instance_id=INSTANCE_ID,\n",
|
||||
" table_id=TABLE_ID,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" print(doc)\n",
|
||||
" break"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents\n",
|
||||
"\n",
|
||||
"Delete a list of langchain documents from Bigtable table with `BigtableSaver.delete(<documents>)`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_bigtable import BigtableSaver\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete: \", docs)\n",
|
||||
"\n",
|
||||
"onedoc = test_docs[0]\n",
|
||||
"saver.delete([onedoc])\n",
|
||||
"print(\"Documents after delete: \", loader.load())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Limiting the returned rows\n",
|
||||
"There are two ways to limit the returned rows:\n",
|
||||
"\n",
|
||||
"1. Using a [filter](https://cloud.google.com/python/docs/reference/bigtable/latest/row-filters)\n",
|
||||
"2. Using a [row_set](https://cloud.google.com/python/docs/reference/bigtable/latest/row-set#google.cloud.bigtable.row_set.RowSet)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import google.cloud.bigtable.row_filters as row_filters\n",
|
||||
"\n",
|
||||
"filter_loader = BigtableLoader(\n",
|
||||
" INSTANCE_ID, TABLE_ID, filter=row_filters.ColumnQualifierRegexFilter(b\"os_build\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from google.cloud.bigtable.row_set import RowSet\n",
|
||||
"\n",
|
||||
"row_set = RowSet()\n",
|
||||
"row_set.add_row_range_from_keys(\n",
|
||||
" start_key=\"phone#4c410523#20190501\", end_key=\"phone#4c410523#201906201\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"row_set_loader = BigtableLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" TABLE_ID,\n",
|
||||
" row_set=row_set,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Custom client\n",
|
||||
"The client created by default is the default client, using only admin=True option. To use a non-default, a [custom client](https://cloud.google.com/python/docs/reference/bigtable/latest/client#class-googlecloudbigtableclientclientprojectnone-credentialsnone-readonlyfalse-adminfalse-clientinfonone-clientoptionsnone-adminclientoptionsnone-channelnone) can be passed to the constructor."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.cloud import bigtable\n",
|
||||
"\n",
|
||||
"custom_client_loader = BigtableLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" TABLE_ID,\n",
|
||||
" client=bigtable.Client(...),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Custom content\n",
|
||||
"The BigtableLoader assumes there is a column family called `langchain`, that has a column called `content`, that contains values encoded in UTF-8. These defaults can be changed like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_bigtable import Encoding\n",
|
||||
"\n",
|
||||
"custom_content_loader = BigtableLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" TABLE_ID,\n",
|
||||
" content_encoding=Encoding.ASCII,\n",
|
||||
" content_column_family=\"my_content_family\",\n",
|
||||
" content_column_name=\"my_content_column_name\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Metadata mapping\n",
|
||||
"By default, the `metadata` map on the `Document` object will contain a single key, `rowkey`, with the value of the row's rowkey value. To add more items to that map, use metadata_mapping."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"from langchain_google_bigtable import MetadataMapping\n",
|
||||
"\n",
|
||||
"metadata_mapping_loader = BigtableLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" TABLE_ID,\n",
|
||||
" metadata_mappings=[\n",
|
||||
" MetadataMapping(\n",
|
||||
" column_family=\"my_int_family\",\n",
|
||||
" column_name=\"my_int_column\",\n",
|
||||
" metadata_key=\"key_in_metadata_map\",\n",
|
||||
" encoding=Encoding.INT_BIG_ENDIAN,\n",
|
||||
" ),\n",
|
||||
" MetadataMapping(\n",
|
||||
" column_family=\"my_custom_family\",\n",
|
||||
" column_name=\"my_custom_column\",\n",
|
||||
" metadata_key=\"custom_key\",\n",
|
||||
" encoding=Encoding.CUSTOM,\n",
|
||||
" custom_decoding_func=lambda input: json.loads(input.decode()),\n",
|
||||
" custom_encoding_func=lambda input: str.encode(json.dumps(input)),\n",
|
||||
" ),\n",
|
||||
" ],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Metadata as JSON\n",
|
||||
"\n",
|
||||
"If there is a column in Bigtable that contains a JSON string that you would like to have added to the output document metadata, it is possible to add the following parameters to BigtableLoader. Note, the default value for `metadata_as_json_encoding` is UTF-8."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metadata_as_json_loader = BigtableLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" TABLE_ID,\n",
|
||||
" metadata_as_json_encoding=Encoding.ASCII,\n",
|
||||
" metadata_as_json_family=\"my_metadata_as_json_family\",\n",
|
||||
" metadata_as_json_name=\"my_metadata_as_json_column_name\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Customize BigtableSaver\n",
|
||||
"\n",
|
||||
"The BigtableSaver is also customizable similar to BigtableLoader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"saver = BigtableSaver(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" TABLE_ID,\n",
|
||||
" client=bigtable.Client(...),\n",
|
||||
" content_encoding=Encoding.ASCII,\n",
|
||||
" content_column_family=\"my_content_family\",\n",
|
||||
" content_column_name=\"my_content_column_name\",\n",
|
||||
" metadata_mappings=[\n",
|
||||
" MetadataMapping(\n",
|
||||
" column_family=\"my_int_family\",\n",
|
||||
" column_name=\"my_int_column\",\n",
|
||||
" metadata_key=\"key_in_metadata_map\",\n",
|
||||
" encoding=Encoding.INT_BIG_ENDIAN,\n",
|
||||
" ),\n",
|
||||
" MetadataMapping(\n",
|
||||
" column_family=\"my_custom_family\",\n",
|
||||
" column_name=\"my_custom_column\",\n",
|
||||
" metadata_key=\"custom_key\",\n",
|
||||
" encoding=Encoding.CUSTOM,\n",
|
||||
" custom_decoding_func=lambda input: json.loads(input.decode()),\n",
|
||||
" custom_encoding_func=lambda input: str.encode(json.dumps(input)),\n",
|
||||
" ),\n",
|
||||
" ],\n",
|
||||
" metadata_as_json_encoding=Encoding.ASCII,\n",
|
||||
" metadata_as_json_family=\"my_metadata_as_json_family\",\n",
|
||||
" metadata_as_json_name=\"my_metadata_as_json_column_name\",\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,629 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Cloud SQL for SQL Server\n",
|
||||
"\n",
|
||||
"> [Cloud SQL](https://cloud.google.com/sql) is a fully managed relational database service that offers high performance, seamless integration, and impressive scalability. It offers [MySQL](https://cloud.google.com/sql/mysql), [PostgreSQL](https://cloud.google.com/sql/postgres), and [SQL Server](https://cloud.google.com/sql/sqlserver) database engines. Extend your database application to build AI-powered experiences leveraging Cloud SQL's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use [Cloud SQL for SQL Server](https://cloud.google.com/sql/sqlserver) to [save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/) with `MSSQLLoader` and `MSSQLDocumentSaver`.\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/googleapis/langchain-google-cloud-sql-mssql-python/blob/main/docs/document_loader.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
"* [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
"* [Create a Cloud SQL for SQL Server instance](https://cloud.google.com/sql/docs/sqlserver/create-instance)\n",
|
||||
"* [Create a Cloud SQL database](https://cloud.google.com/sql/docs/mssql/create-manage-databases)\n",
|
||||
"* [Add an IAM database user to the database](https://cloud.google.com/sql/docs/sqlserver/add-manage-iam-users#creating-a-database-user) (Optional)\n",
|
||||
"\n",
|
||||
"After confirmed access to database in the runtime environment of this notebook, filling the following values and run the cell before running example scripts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the both the Google Cloud region and name of your Cloud SQL instance.\n",
|
||||
"REGION = \"us-central1\" # @param {type:\"string\"}\n",
|
||||
"INSTANCE = \"test-instance\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# @markdown Please fill in user name and password of your Cloud SQL instance.\n",
|
||||
"DB_USER = \"sqlserver\" # @param {type:\"string\"}\n",
|
||||
"DB_PASS = \"password\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# @markdown Please specify a database and a table for demo purpose.\n",
|
||||
"DATABASE = \"test\" # @param {type:\"string\"}\n",
|
||||
"TABLE_NAME = \"test-default\" # @param {type:\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-cloud-sql-mssql` package, so we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-google-cloud-sql-mssql"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Colab only**: Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"- If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"- If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the value below with your Google Cloud project ID and then run the cell.\n",
|
||||
"\n",
|
||||
"PROJECT_ID = \"my-project-id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"!gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 💡 API Enablement\n",
|
||||
"The `langchain-google-cloud-sql-mssql` package requires that you [enable the Cloud SQL Admin API](https://console.cloud.google.com/flows/enableapi?apiid=sqladmin.googleapis.com) in your Google Cloud Project."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# enable Cloud SQL Admin API\n",
|
||||
"!gcloud services enable sqladmin.googleapis.com"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### MSSQLEngine Connection Pool\n",
|
||||
"\n",
|
||||
"Before saving or loading documents from MSSQL table, we need first configures a connection pool to Cloud SQL database. The `MSSQLEngine` configures a [SQLAlchemy connection pool](https://docs.sqlalchemy.org/en/20/core/pooling.html#module-sqlalchemy.pool) to your Cloud SQL database, enabling successful connections from your application and following industry best practices.\n",
|
||||
"\n",
|
||||
"To create a `MSSQLEngine` using `MSSQLEngine.from_instance()` you need to provide only 6 things:\n",
|
||||
"\n",
|
||||
"1. `project_id` : Project ID of the Google Cloud Project where the Cloud SQL instance is located.\n",
|
||||
"1. `region` : Region where the Cloud SQL instance is located.\n",
|
||||
"1. `instance` : The name of the Cloud SQL instance.\n",
|
||||
"1. `database` : The name of the database to connect to on the Cloud SQL instance.\n",
|
||||
"1. `user` : Database user to use for built-in database authentication and login.\n",
|
||||
"1. `password` : Database password to use for built-in database authentication and login."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mssql import MSSQLEngine\n",
|
||||
"\n",
|
||||
"engine = MSSQLEngine.from_instance(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" region=REGION,\n",
|
||||
" instance=INSTANCE,\n",
|
||||
" database=DATABASE,\n",
|
||||
" user=DB_USER,\n",
|
||||
" password=DB_PASS,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialize a table\n",
|
||||
"\n",
|
||||
"Initialize a table of default schema via `MSSQLEngine.init_document_table(<table_name>)`. Table Columns:\n",
|
||||
"- page_content (type: text)\n",
|
||||
"- langchain_metadata (type: JSON)\n",
|
||||
"\n",
|
||||
"`overwrite_existing=True` flag means the newly initialized table will replace any existing table of the same name."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"engine.init_document_table(TABLE_NAME, overwrite_existing=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save documents\n",
|
||||
"\n",
|
||||
"Save langchain documents with `MSSQLDocumentSaver.add_documents(<documents>)`. To initialize `MSSQLDocumentSaver` class you need to provide 2 things:\n",
|
||||
"1. `engine` - An instance of a `MSSQLEngine` engine.\n",
|
||||
"2. `table_name` - The name of the table within the Cloud SQL database to store langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_google_cloud_sql_mssql import MSSQLDocumentSaver\n",
|
||||
"\n",
|
||||
"test_docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Apple Granny Smith 150 0.99 1\",\n",
|
||||
" metadata={\"fruit_id\": 1},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Banana Cavendish 200 0.59 0\",\n",
|
||||
" metadata={\"fruit_id\": 2},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Orange Navel 80 1.29 1\",\n",
|
||||
" metadata={\"fruit_id\": 3},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"saver = MSSQLDocumentSaver(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"saver.add_documents(test_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load langchain documents with `MSSQLLoader.load()` or `MSSQLLoader.lazy_load()`. `lazy_load` returns a generator that only queries database during the iteration. To initialize `MSSQLDocumentSaver` class you need to provide:\n",
|
||||
"1. `engine` - An instance of a `MSSQLEngine` engine.\n",
|
||||
"2. `table_name` - The name of the table within the Cloud SQL database to store langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mssql import MSSQLLoader\n",
|
||||
"\n",
|
||||
"loader = MSSQLLoader(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"docs = loader.lazy_load()\n",
|
||||
"for doc in docs:\n",
|
||||
" print(\"Loaded documents:\", doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents via query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Other than loading documents from a table, we can also choose to load documents from a view generated from a SQL query. For example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mssql import MSSQLLoader\n",
|
||||
"\n",
|
||||
"loader = MSSQLLoader(\n",
|
||||
" engine=engine,\n",
|
||||
" query=f\"select * from \\\"{TABLE_NAME}\\\" where JSON_VALUE(langchain_metadata, '$.fruit_id') = 1;\",\n",
|
||||
")\n",
|
||||
"onedoc = loader.load()\n",
|
||||
"onedoc"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The view generated from SQL query can have different schema than default table. In such cases, the behavior of MSSQLLoader is the same as loading from table with non-default schema. Please refer to section [Load documents with customized document page content & metadata](#Load-documents-with-customized-document-page-content-&-metadata)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Delete a list of langchain documents from MSSQL table with `MSSQLDocumentSaver.delete(<documents>)`.\n",
|
||||
"\n",
|
||||
"For table with default schema (page_content, langchain_metadata), the deletion criteria is:\n",
|
||||
"\n",
|
||||
"A `row` should be deleted if there exists a `document` in the list, such that\n",
|
||||
"- `document.page_content` equals `row[page_content]`\n",
|
||||
"- `document.metadata` equals `row[langchain_metadata]`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mssql import MSSQLLoader\n",
|
||||
"\n",
|
||||
"loader = MSSQLLoader(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete:\", docs)\n",
|
||||
"saver.delete(onedoc)\n",
|
||||
"print(\"Documents after delete:\", loader.load())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents with customized document page content & metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First we prepare an example table with non-default schema, and populate it with some arbitary data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sqlalchemy\n",
|
||||
"\n",
|
||||
"with engine.connect() as conn:\n",
|
||||
" conn.execute(sqlalchemy.text(f'DROP TABLE IF EXISTS \"{TABLE_NAME}\"'))\n",
|
||||
" conn.commit()\n",
|
||||
" conn.execute(\n",
|
||||
" sqlalchemy.text(\n",
|
||||
" f\"\"\"\n",
|
||||
" IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[{TABLE_NAME}]') AND type in (N'U'))\n",
|
||||
" BEGIN\n",
|
||||
" CREATE TABLE [dbo].[{TABLE_NAME}](\n",
|
||||
" fruit_id INT IDENTITY(1,1) PRIMARY KEY,\n",
|
||||
" fruit_name VARCHAR(100) NOT NULL,\n",
|
||||
" variety VARCHAR(50),\n",
|
||||
" quantity_in_stock INT NOT NULL,\n",
|
||||
" price_per_unit DECIMAL(6,2) NOT NULL,\n",
|
||||
" organic BIT NOT NULL\n",
|
||||
" )\n",
|
||||
" END\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" conn.execute(\n",
|
||||
" sqlalchemy.text(\n",
|
||||
" f\"\"\"\n",
|
||||
" INSERT INTO \"{TABLE_NAME}\" (fruit_name, variety, quantity_in_stock, price_per_unit, organic)\n",
|
||||
" VALUES\n",
|
||||
" ('Apple', 'Granny Smith', 150, 0.99, 1),\n",
|
||||
" ('Banana', 'Cavendish', 200, 0.59, 0),\n",
|
||||
" ('Orange', 'Navel', 80, 1.29, 1);\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" conn.commit()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If we still load langchain documents with default parameters of `MSSQLLoader` from this example table, the `page_content` of loaded documents will be the first column of the table, and `metadata` will be consisting of key-value pairs of all the other columns."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = MSSQLLoader(\n",
|
||||
" engine=engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
")\n",
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can specify the content and metadata we want to load by setting the `content_columns` and `metadata_columns` when initializing the `MSSQLLoader`.\n",
|
||||
"1. `content_columns`: The columns to write into the `page_content` of the document.\n",
|
||||
"2. `metadata_columns`: The columns to write into the `metadata` of the document.\n",
|
||||
"\n",
|
||||
"For example here, the values of columns in `content_columns` will be joined together into a space-separated string, as `page_content` of loaded documents, and `metadata` of loaded documents will only contain key-value pairs of columns specified in `metadata_columns`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = MSSQLLoader(\n",
|
||||
" engine=engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
" content_columns=[\n",
|
||||
" \"variety\",\n",
|
||||
" \"quantity_in_stock\",\n",
|
||||
" \"price_per_unit\",\n",
|
||||
" \"organic\",\n",
|
||||
" ],\n",
|
||||
" metadata_columns=[\"fruit_id\", \"fruit_name\"],\n",
|
||||
")\n",
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save document with customized page content & metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In order to save langchain document into table with customized metadata fields. We need first create such a table via `MSSQLEngine.init_document_table()`, and specify the list of `metadata_columns` we want it to have. In this example, the created table will have table columns:\n",
|
||||
"- description (type: text): for storing fruit description.\n",
|
||||
"- fruit_name (type text): for storing fruit name.\n",
|
||||
"- organic (type tinyint(1)): to tell if the fruit is organic.\n",
|
||||
"- other_metadata (type: JSON): for storing other metadata information of the fruit.\n",
|
||||
"\n",
|
||||
"We can use the following parameters with `MSSQLEngine.init_document_table()` to create the table:\n",
|
||||
"1. `table_name`: The name of the table within the Cloud SQL database to store langchain documents.\n",
|
||||
"2. `metadata_columns`: A list of `sqlalchemy.Column` indicating the list of metadata columns we need.\n",
|
||||
"3. `content_column`: The name of column to store `page_content` of langchain document. Default: `page_content`.\n",
|
||||
"4. `metadata_json_column`: The name of JSON column to store extra `metadata` of langchain document. Default: `langchain_metadata`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"engine.init_document_table(\n",
|
||||
" TABLE_NAME,\n",
|
||||
" metadata_columns=[\n",
|
||||
" sqlalchemy.Column(\n",
|
||||
" \"fruit_name\",\n",
|
||||
" sqlalchemy.UnicodeText,\n",
|
||||
" primary_key=False,\n",
|
||||
" nullable=True,\n",
|
||||
" ),\n",
|
||||
" sqlalchemy.Column(\n",
|
||||
" \"organic\",\n",
|
||||
" sqlalchemy.Boolean,\n",
|
||||
" primary_key=False,\n",
|
||||
" nullable=True,\n",
|
||||
" ),\n",
|
||||
" ],\n",
|
||||
" content_column=\"description\",\n",
|
||||
" metadata_json_column=\"other_metadata\",\n",
|
||||
" overwrite_existing=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Save documents with `MSSQLDocumentSaver.add_documents(<documents>)`. As you can see in this example, \n",
|
||||
"- `document.page_content` will be saved into `description` column.\n",
|
||||
"- `document.metadata.fruit_name` will be saved into `fruit_name` column.\n",
|
||||
"- `document.metadata.organic` will be saved into `organic` column.\n",
|
||||
"- `document.metadata.fruit_id` will be saved into `other_metadata` column in JSON format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Granny Smith 150 0.99\",\n",
|
||||
" metadata={\"fruit_id\": 1, \"fruit_name\": \"Apple\", \"organic\": 1},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"saver = MSSQLDocumentSaver(\n",
|
||||
" engine=engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
" content_column=\"description\",\n",
|
||||
" metadata_json_column=\"other_metadata\",\n",
|
||||
")\n",
|
||||
"saver.add_documents(test_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with engine.connect() as conn:\n",
|
||||
" result = conn.execute(sqlalchemy.text(f'select * from \"{TABLE_NAME}\";'))\n",
|
||||
" print(result.keys())\n",
|
||||
" print(result.fetchall())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents with customized page content & metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also delete documents from table with customized metadata columns via `MSSQLDocumentSaver.delete(<documents>)`. The deletion criteria is:\n",
|
||||
"\n",
|
||||
"A `row` should be deleted if there exists a `document` in the list, such that\n",
|
||||
"- `document.page_content` equals `row[page_content]`\n",
|
||||
"- For every metadata field `k` in `document.metadata`\n",
|
||||
" - `document.metadata[k]` equals `row[k]` or `document.metadata[k]` equals `row[langchain_metadata][k]`\n",
|
||||
"- There no extra metadata field presents in `row` but not in `document.metadata`.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = MSSQLLoader(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete:\", docs)\n",
|
||||
"saver.delete(docs)\n",
|
||||
"print(\"Documents after delete:\", loader.load())"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,642 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Cloud SQL for MySQL\n",
|
||||
"\n",
|
||||
"> [Cloud SQL](https://cloud.google.com/sql) is a fully managed relational database service that offers high performance, seamless integration, and impressive scalability. It offers [MySQL](https://cloud.google.com/sql/mysql), [PostgreSQL](https://cloud.google.com/sql/postgres), and [SQL Server](https://cloud.google.com/sql/sqlserver) database engines. Extend your database application to build AI-powered experiences leveraging Cloud SQL's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use [Cloud SQL for MySQL](https://cloud.google.com/sql/mysql) to [save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/) with `MySQLLoader` and `MySQLDocumentSaver`.\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/googleapis/langchain-google-cloud-sql-mysql-python/blob/main/docs/document_loader.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
"* [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
"* [Create a Cloud SQL for MySQL instance](https://cloud.google.com/sql/docs/mysql/create-instance)\n",
|
||||
"* [Create a Cloud SQL database](https://cloud.google.com/sql/docs/mysql/create-manage-databases)\n",
|
||||
"* [Add an IAM database user to the database](https://cloud.google.com/sql/docs/mysql/add-manage-iam-users#creating-a-database-user) (Optional)\n",
|
||||
"\n",
|
||||
"After confirmed access to database in the runtime environment of this notebook, filling the following values and run the cell before running example scripts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the both the Google Cloud region and name of your Cloud SQL instance.\n",
|
||||
"REGION = \"us-central1\" # @param {type:\"string\"}\n",
|
||||
"INSTANCE = \"test-instance\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# @markdown Please specify a database and a table for demo purpose.\n",
|
||||
"DATABASE = \"test\" # @param {type:\"string\"}\n",
|
||||
"TABLE_NAME = \"test-default\" # @param {type:\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-cloud-sql-mysql` package, so we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -upgrade --quiet langchain-google-cloud-sql-mysql"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Colab only**: Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the value below with your Google Cloud project ID and then run the cell.\n",
|
||||
"\n",
|
||||
"PROJECT_ID = \"my-project-id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"!gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"- If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"- If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### API Enablement\n",
|
||||
"The `langchain-google-cloud-sql-mysql` package requires that you [enable the Cloud SQL Admin API](https://console.cloud.google.com/flows/enableapi?apiid=sqladmin.googleapis.com) in your Google Cloud Project."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# enable Cloud SQL Admin API\n",
|
||||
"!gcloud services enable sqladmin.googleapis.com"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### MySQLEngine Connection Pool\n",
|
||||
"\n",
|
||||
"Before saving or loading documents from MySQL table, we need first configures a connection pool to Cloud SQL database. The `MySQLEngine` configures a connection pool to your Cloud SQL database, enabling successful connections from your application and following industry best practices.\n",
|
||||
"\n",
|
||||
"To create a `MySQLEngine` using `MySQLEngine.from_instance()` you need to provide only 4 things:\n",
|
||||
"\n",
|
||||
"1. `project_id` : Project ID of the Google Cloud Project where the Cloud SQL instance is located.\n",
|
||||
"2. `region` : Region where the Cloud SQL instance is located.\n",
|
||||
"3. `instance` : The name of the Cloud SQL instance.\n",
|
||||
"4. `database` : The name of the database to connect to on the Cloud SQL instance.\n",
|
||||
"\n",
|
||||
"By default, [IAM database authentication](https://cloud.google.com/sql/docs/mysql/iam-authentication#iam-db-auth) will be used as the method of database authentication. This library uses the IAM principal belonging to the [Application Default Credentials (ADC)](https://cloud.google.com/docs/authentication/application-default-credentials) sourced from the envionment.\n",
|
||||
"\n",
|
||||
"For more informatin on IAM database authentication please see:\n",
|
||||
"\n",
|
||||
"* [Configure an instance for IAM database authentication](https://cloud.google.com/sql/docs/mysql/create-edit-iam-instances)\n",
|
||||
"* [Manage users with IAM database authentication](https://cloud.google.com/sql/docs/mysql/add-manage-iam-users)\n",
|
||||
"\n",
|
||||
"Optionally, [built-in database authentication](https://cloud.google.com/sql/docs/mysql/built-in-authentication) using a username and password to access the Cloud SQL database can also be used. Just provide the optional `user` and `password` arguments to `MySQLEngine.from_instance()`:\n",
|
||||
"\n",
|
||||
"* `user` : Database user to use for built-in database authentication and login\n",
|
||||
"* `password` : Database password to use for built-in database authentication and login."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mysql import MySQLEngine\n",
|
||||
"\n",
|
||||
"engine = MySQLEngine.from_instance(\n",
|
||||
" project_id=PROJECT_ID, region=REGION, instance=INSTANCE, database=DATABASE\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialize a table\n",
|
||||
"\n",
|
||||
"Initialize a table of default schema via `MySQLEngine.init_document_table(<table_name>)`. Table Columns:\n",
|
||||
"\n",
|
||||
"- page_content (type: text)\n",
|
||||
"- langchain_metadata (type: JSON)\n",
|
||||
"\n",
|
||||
"`overwrite_existing=True` flag means the newly initialized table will replace any existing table of the same name."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"engine.init_document_table(TABLE_NAME, overwrite_existing=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save documents\n",
|
||||
"\n",
|
||||
"Save langchain documents with `MySQLDocumentSaver.add_documents(<documents>)`. To initialize `MySQLDocumentSaver` class you need to provide 2 things:\n",
|
||||
"\n",
|
||||
"1. `engine` - An instance of a `MySQLEngine` engine.\n",
|
||||
"2. `table_name` - The name of the table within the Cloud SQL database to store langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_google_cloud_sql_mysql import MySQLDocumentSaver\n",
|
||||
"\n",
|
||||
"test_docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Apple Granny Smith 150 0.99 1\",\n",
|
||||
" metadata={\"fruit_id\": 1},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Banana Cavendish 200 0.59 0\",\n",
|
||||
" metadata={\"fruit_id\": 2},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Orange Navel 80 1.29 1\",\n",
|
||||
" metadata={\"fruit_id\": 3},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"saver = MySQLDocumentSaver(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"saver.add_documents(test_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load langchain documents with `MySQLLoader.load()` or `MySQLLoader.lazy_load()`. `lazy_load` returns a generator that only queries database during the iteration. To initialize `MySQLLoader` class you need to provide:\n",
|
||||
"\n",
|
||||
"1. `engine` - An instance of a `MySQLEngine` engine.\n",
|
||||
"2. `table_name` - The name of the table within the Cloud SQL database to store langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mysql import MySQLLoader\n",
|
||||
"\n",
|
||||
"loader = MySQLLoader(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"docs = loader.lazy_load()\n",
|
||||
"for doc in docs:\n",
|
||||
" print(\"Loaded documents:\", doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents via query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Other than loading documents from a table, we can also choose to load documents from a view generated from a SQL query. For example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mysql import MySQLLoader\n",
|
||||
"\n",
|
||||
"loader = MySQLLoader(\n",
|
||||
" engine=engine,\n",
|
||||
" query=f\"select * from `{TABLE_NAME}` where JSON_EXTRACT(langchain_metadata, '$.fruit_id') = 1;\",\n",
|
||||
")\n",
|
||||
"onedoc = loader.load()\n",
|
||||
"onedoc"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The view generated from SQL query can have different schema than default table. In such cases, the behavior of MySQLLoader is the same as loading from table with non-default schema. Please refer to section [Load documents with customized document page content & metadata](#Load-documents-with-customized-document-page-content-&-metadata)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Delete a list of langchain documents from MySQL table with `MySQLDocumentSaver.delete(<documents>)`.\n",
|
||||
"\n",
|
||||
"For table with default schema (page_content, langchain_metadata), the deletion criteria is:\n",
|
||||
"\n",
|
||||
"A `row` should be deleted if there exists a `document` in the list, such that\n",
|
||||
"\n",
|
||||
"- `document.page_content` equals `row[page_content]`\n",
|
||||
"- `document.metadata` equals `row[langchain_metadata]`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_mysql import MySQLLoader\n",
|
||||
"\n",
|
||||
"loader = MySQLLoader(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete:\", docs)\n",
|
||||
"saver.delete(onedoc)\n",
|
||||
"print(\"Documents after delete:\", loader.load())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents with customized document page content & metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First we prepare an example table with non-default schema, and populate it with some arbitary data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sqlalchemy\n",
|
||||
"\n",
|
||||
"with engine.connect() as conn:\n",
|
||||
" conn.execute(sqlalchemy.text(f\"DROP TABLE IF EXISTS `{TABLE_NAME}`\"))\n",
|
||||
" conn.commit()\n",
|
||||
" conn.execute(\n",
|
||||
" sqlalchemy.text(\n",
|
||||
" f\"\"\"\n",
|
||||
" CREATE TABLE IF NOT EXISTS `{TABLE_NAME}`(\n",
|
||||
" fruit_id INT AUTO_INCREMENT PRIMARY KEY,\n",
|
||||
" fruit_name VARCHAR(100) NOT NULL,\n",
|
||||
" variety VARCHAR(50),\n",
|
||||
" quantity_in_stock INT NOT NULL,\n",
|
||||
" price_per_unit DECIMAL(6,2) NOT NULL,\n",
|
||||
" organic TINYINT(1) NOT NULL\n",
|
||||
" )\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" conn.execute(\n",
|
||||
" sqlalchemy.text(\n",
|
||||
" f\"\"\"\n",
|
||||
" INSERT INTO `{TABLE_NAME}` (fruit_name, variety, quantity_in_stock, price_per_unit, organic)\n",
|
||||
" VALUES\n",
|
||||
" ('Apple', 'Granny Smith', 150, 0.99, 1),\n",
|
||||
" ('Banana', 'Cavendish', 200, 0.59, 0),\n",
|
||||
" ('Orange', 'Navel', 80, 1.29, 1);\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" conn.commit()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If we still load langchain documents with default parameters of `MySQLLoader` from this example table, the `page_content` of loaded documents will be the first column of the table, and `metadata` will be consisting of key-value pairs of all the other columns."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = MySQLLoader(\n",
|
||||
" engine=engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
")\n",
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can specify the content and metadata we want to load by setting the `content_columns` and `metadata_columns` when initializing the `MySQLLoader`.\n",
|
||||
"\n",
|
||||
"1. `content_columns`: The columns to write into the `page_content` of the document.\n",
|
||||
"2. `metadata_columns`: The columns to write into the `metadata` of the document.\n",
|
||||
"\n",
|
||||
"For example here, the values of columns in `content_columns` will be joined together into a space-separated string, as `page_content` of loaded documents, and `metadata` of loaded documents will only contain key-value pairs of columns specified in `metadata_columns`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = MySQLLoader(\n",
|
||||
" engine=engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
" content_columns=[\n",
|
||||
" \"variety\",\n",
|
||||
" \"quantity_in_stock\",\n",
|
||||
" \"price_per_unit\",\n",
|
||||
" \"organic\",\n",
|
||||
" ],\n",
|
||||
" metadata_columns=[\"fruit_id\", \"fruit_name\"],\n",
|
||||
")\n",
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save document with customized page content & metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In order to save langchain document into table with customized metadata fields. We need first create such a table via `MySQLEngine.init_document_table()`, and specify the list of `metadata_columns` we want it to have. In this example, the created table will have table columns:\n",
|
||||
"\n",
|
||||
"- description (type: text): for storing fruit description.\n",
|
||||
"- fruit_name (type text): for storing fruit name.\n",
|
||||
"- organic (type tinyint(1)): to tell if the fruit is organic.\n",
|
||||
"- other_metadata (type: JSON): for storing other metadata information of the fruit.\n",
|
||||
"\n",
|
||||
"We can use the following parameters with `MySQLEngine.init_document_table()` to create the table:\n",
|
||||
"\n",
|
||||
"1. `table_name`: The name of the table within the Cloud SQL database to store langchain documents.\n",
|
||||
"2. `metadata_columns`: A list of `sqlalchemy.Column` indicating the list of metadata columns we need.\n",
|
||||
"3. `content_column`: The name of column to store `page_content` of langchain document. Default: `page_content`.\n",
|
||||
"4. `metadata_json_column`: The name of JSON column to store extra `metadata` of langchain document. Default: `langchain_metadata`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"engine.init_document_table(\n",
|
||||
" TABLE_NAME,\n",
|
||||
" metadata_columns=[\n",
|
||||
" sqlalchemy.Column(\n",
|
||||
" \"fruit_name\",\n",
|
||||
" sqlalchemy.UnicodeText,\n",
|
||||
" primary_key=False,\n",
|
||||
" nullable=True,\n",
|
||||
" ),\n",
|
||||
" sqlalchemy.Column(\n",
|
||||
" \"organic\",\n",
|
||||
" sqlalchemy.Boolean,\n",
|
||||
" primary_key=False,\n",
|
||||
" nullable=True,\n",
|
||||
" ),\n",
|
||||
" ],\n",
|
||||
" content_column=\"description\",\n",
|
||||
" metadata_json_column=\"other_metadata\",\n",
|
||||
" overwrite_existing=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Save documents with `MySQLDocumentSaver.add_documents(<documents>)`. As you can see in this example, \n",
|
||||
"\n",
|
||||
"- `document.page_content` will be saved into `description` column.\n",
|
||||
"- `document.metadata.fruit_name` will be saved into `fruit_name` column.\n",
|
||||
"- `document.metadata.organic` will be saved into `organic` column.\n",
|
||||
"- `document.metadata.fruit_id` will be saved into `other_metadata` column in JSON format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Granny Smith 150 0.99\",\n",
|
||||
" metadata={\"fruit_id\": 1, \"fruit_name\": \"Apple\", \"organic\": 1},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"saver = MySQLDocumentSaver(\n",
|
||||
" engine=engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
" content_column=\"description\",\n",
|
||||
" metadata_json_column=\"other_metadata\",\n",
|
||||
")\n",
|
||||
"saver.add_documents(test_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with engine.connect() as conn:\n",
|
||||
" result = conn.execute(sqlalchemy.text(f\"select * from `{TABLE_NAME}`;\"))\n",
|
||||
" print(result.keys())\n",
|
||||
" print(result.fetchall())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents with customized page content & metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also delete documents from table with customized metadata columns via `MySQLDocumentSaver.delete(<documents>)`. The deletion criteria is:\n",
|
||||
"\n",
|
||||
"A `row` should be deleted if there exists a `document` in the list, such that\n",
|
||||
"\n",
|
||||
"- `document.page_content` equals `row[page_content]`\n",
|
||||
"- For every metadata field `k` in `document.metadata`\n",
|
||||
" - `document.metadata[k]` equals `row[k]` or `document.metadata[k]` equals `row[langchain_metadata][k]`\n",
|
||||
"- There no extra metadata field presents in `row` but not in `document.metadata`.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = MySQLLoader(engine=engine, table_name=TABLE_NAME)\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete:\", docs)\n",
|
||||
"saver.delete(docs)\n",
|
||||
"print(\"Documents after delete:\", loader.load())"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,382 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "E_RJy7C1bpCT"
|
||||
},
|
||||
"source": [
|
||||
"# Google Cloud SQL for PostgreSQL\n",
|
||||
"\n",
|
||||
"> [Cloud SQL for PostgreSQL](https://cloud.google.com/sql/docs/postgres) is a fully-managed database service that helps you set up, maintain, manage, and administer your PostgreSQL relational databases on Google Cloud Platform. Extend your database application to build AI-powered experiences leveraging Cloud SQL for PostgreSQL's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use `Cloud SQL for PostgreSQL` to load Documents with the `PostgreSQLLoader` class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "xjcxaw6--Xyy"
|
||||
},
|
||||
"source": [
|
||||
"## Before you begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
" * [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
" * [Enable the Cloud SQL Admin API.](https://console.cloud.google.com/marketplace/product/google/sqladmin.googleapis.com)\n",
|
||||
" * [Create a Cloud SQL for PostgreSQL instance.](https://cloud.google.com/sql/docs/postgres/create-instance)\n",
|
||||
" * [Create a Cloud SQL for PostgreSQL database.](https://cloud.google.com/sql/docs/postgres/create-manage-databases)\n",
|
||||
" * [Add a User to the database.](https://cloud.google.com/sql/docs/postgres/create-manage-users)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "IR54BmgvdHT_"
|
||||
},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"Install the integration library, `langchain-google-cloud-sql-pg`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 1000
|
||||
},
|
||||
"id": "0ZITIDE160OD",
|
||||
"outputId": "90e0636e-ff34-4e1e-ad37-d2a6db4a317e"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-google-cloud-sql-pg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "v40bB_GMcr9f"
|
||||
},
|
||||
"source": [
|
||||
"**Colab only:** Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "6o0iGVIdDD6K"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "cTXTbj4UltKf"
|
||||
},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"* If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"* If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Uj02bMRAc9_c"
|
||||
},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "wnp1R1PYc9_c",
|
||||
"outputId": "6502c721-a2fd-451f-b946-9f7b850d5966"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @title Project { display-mode: \"form\" }\n",
|
||||
"PROJECT_ID = \"gcp_project_id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"! gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "rEWWNoNnKOgq",
|
||||
"metadata": {
|
||||
"id": "rEWWNoNnKOgq"
|
||||
},
|
||||
"source": [
|
||||
"### 💡 API Enablement\n",
|
||||
"The `langchain_google_cloud_sql_pg` package requires that you [enable the Cloud SQL Admin API](https://console.cloud.google.com/flows/enableapi?apiid=sqladmin.googleapis.com) in your Google Cloud Project."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "5utKIdq7KYi5",
|
||||
"metadata": {
|
||||
"id": "5utKIdq7KYi5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# enable Cloud SQL Admin API\n",
|
||||
"!gcloud services enable sqladmin.googleapis.com"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f8f2830ee9ca1e01",
|
||||
"metadata": {
|
||||
"id": "f8f2830ee9ca1e01"
|
||||
},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "OMvzMWRrR6n7",
|
||||
"metadata": {
|
||||
"id": "OMvzMWRrR6n7"
|
||||
},
|
||||
"source": [
|
||||
"### Set Cloud SQL database values\n",
|
||||
"Find your database variables, in the [Cloud SQL Instances page](https://console.cloud.google.com/sql/instances)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "irl7eMFnSPZr",
|
||||
"metadata": {
|
||||
"id": "irl7eMFnSPZr"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @title Set Your Values Here { display-mode: \"form\" }\n",
|
||||
"REGION = \"us-central1\" # @param {type: \"string\"}\n",
|
||||
"INSTANCE = \"my-primary\" # @param {type: \"string\"}\n",
|
||||
"DATABASE = \"my-database\" # @param {type: \"string\"}\n",
|
||||
"TABLE_NAME = \"vector_store\" # @param {type: \"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "QuQigs4UoFQ2",
|
||||
"metadata": {
|
||||
"id": "QuQigs4UoFQ2"
|
||||
},
|
||||
"source": [
|
||||
"### Cloud SQL Engine\n",
|
||||
"\n",
|
||||
"One of the requirements and arguments to establish PostgreSQL as a document loader is a `PostgresEngine` object. The `PostgresEngine` configures a connection pool to your Cloud SQL for PostgreSQL database, enabling successful connections from your application and following industry best practices.\n",
|
||||
"\n",
|
||||
"To create a `PostgresEngine` using `PostgresEngine.from_instance()` you need to provide only 4 things:\n",
|
||||
"\n",
|
||||
"1. `project_id` : Project ID of the Google Cloud Project where the Cloud SQL instance is located.\n",
|
||||
"1. `region` : Region where the Cloud SQL instance is located.\n",
|
||||
"1. `instance` : The name of the Cloud SQL instance.\n",
|
||||
"1. `database` : The name of the database to connect to on the Cloud SQL instance.\n",
|
||||
"\n",
|
||||
"By default, [IAM database authentication](https://cloud.google.com/sql/docs/postgres/iam-authentication) will be used as the method of database authentication. This library uses the IAM principal belonging to the [Application Default Credentials (ADC)](https://cloud.google.com/docs/authentication/application-default-credentials) sourced from the environment.\n",
|
||||
"\n",
|
||||
"Optionally, [built-in database authentication](https://cloud.google.com/sql/docs/postgres/users) using a username and password to access the Cloud SQL database can also be used. Just provide the optional `user` and `password` arguments to `PostgresEngine.from_instance()`:\n",
|
||||
"\n",
|
||||
"* `user` : Database user to use for built-in database authentication and login\n",
|
||||
"* `password` : Database password to use for built-in database authentication and login.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Note**: This tutorial demonstrates the async interface. All async methods have corresponding sync methods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_pg import PostgresEngine\n",
|
||||
"\n",
|
||||
"engine = await PostgresEngine.afrom_instance(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" region=REGION,\n",
|
||||
" instance=INSTANCE,\n",
|
||||
" database=DATABASE,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "e1tl0aNx7SWy"
|
||||
},
|
||||
"source": [
|
||||
"### Create PostgresLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "z-AZyzAQ7bsf"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_pg import PostgresLoader\n",
|
||||
"\n",
|
||||
"# Creating a basic PostgreSQL object\n",
|
||||
"loader = await PostgresLoader.create(engine, table_name=TABLE_NAME)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "PeOMpftjc9_e"
|
||||
},
|
||||
"source": [
|
||||
"### Load Documents via default table\n",
|
||||
"The loader returns a list of Documents from the table using the first column as page_content and all other columns as metadata. The default table will have the first column as\n",
|
||||
"page_content and the second column as metadata (JSON). Each row becomes a document. Please note that if you want your documents to have ids you will need to add them in."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "cwvi_O5Wc9_e"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_cloud_sql_pg import PostgresLoader\n",
|
||||
"\n",
|
||||
"# Creating a basic PostgresLoader object\n",
|
||||
"loader = await PostgresLoader.create(engine, table_name=TABLE_NAME)\n",
|
||||
"\n",
|
||||
"docs = await loader.aload()\n",
|
||||
"print(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "kSkL9l1Hc9_e"
|
||||
},
|
||||
"source": [
|
||||
"### Load documents via custom table/metadata or custom page content columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = await PostgresLoader.create(\n",
|
||||
" engine,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
" content_columns=[\"product_name\"], # Optional\n",
|
||||
" metadata_columns=[\"id\"], # Optional\n",
|
||||
")\n",
|
||||
"docs = await loader.aload()\n",
|
||||
"print(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "5R6h0_Cvc9_f"
|
||||
},
|
||||
"source": [
|
||||
"### Set page content format\n",
|
||||
"The loader returns a list of Documents, with one document per row, with page content in specified string format, i.e. text (space separated concatenation), JSON, YAML, CSV, etc. JSON and YAML formats include headers, while text and CSV do not include field headers.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "NGNdS7cqc9_f"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = await PostgresLoader.create(\n",
|
||||
" engine,\n",
|
||||
" table_name=\"products\",\n",
|
||||
" content_columns=[\"product_name\", \"description\"],\n",
|
||||
" format=\"YAML\",\n",
|
||||
")\n",
|
||||
"docs = await loader.aload()\n",
|
||||
"print(docs)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"toc_visible": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,411 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Firestore in Datastore mode\n",
|
||||
"\n",
|
||||
"> [Firestore in Datastore mode](https://cloud.google.com/datastore) is a serverless document-oriented database that scales to meet any demand. Extend your database application to build AI-powered experiences leveraging Datastore's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use [Firestore in Datastore mode](https://cloud.google.com/datastore) to [save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/) with `DatastoreLoader` and `DatastoreSaver`.\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/googleapis/langchain-google-datastore-python/blob/main/docs/document_loader.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
"* [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
"* [Create a Datastore database](https://cloud.google.com/datastore/docs/manage-databases)\n",
|
||||
"\n",
|
||||
"After confirmed access to database in the runtime environment of this notebook, filling the following values and run the cell before running example scripts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please specify a source for demo purpose.\n",
|
||||
"SOURCE = \"test\" # @param {type:\"Query\"|\"CollectionGroup\"|\"DocumentReference\"|\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-datastore` package, so we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -upgrade --quiet langchain-google-datastore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Colab only**: Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the value below with your Google Cloud project ID and then run the cell.\n",
|
||||
"\n",
|
||||
"PROJECT_ID = \"my-project-id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"!gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"- If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"- If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### API Enablement\n",
|
||||
"The `langchain-google-datastore` package requires that you [enable the Datastore API](https://console.cloud.google.com/flows/enableapi?apiid=datastore.googleapis.com) in your Google Cloud Project."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# enable Datastore API\n",
|
||||
"!gcloud services enable datastore.googleapis.com"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save documents\n",
|
||||
"\n",
|
||||
"`DatastoreSaver` can store Documents into Datastore. By default it will try to extract the Document reference from the metadata\n",
|
||||
"\n",
|
||||
"Save langchain documents with `DatastoreSaver.upsert_documents(<documents>)`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_google_datastore import DatastoreSaver\n",
|
||||
"\n",
|
||||
"data = [Document(page_content=\"Hello, World!\")]\n",
|
||||
"saver = DatastoreSaver()\n",
|
||||
"saver.upsert_documents(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Save documents without reference\n",
|
||||
"\n",
|
||||
"If a collection is specified the documents will be stored with an auto generated id."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"saver = DatastoreSaver(\"Collection\")\n",
|
||||
"\n",
|
||||
"saver.upsert_documents(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Save documents with other references"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"doc_ids = [\"AnotherCollection/doc_id\", \"foo/bar\"]\n",
|
||||
"saver = DatastoreSaver()\n",
|
||||
"\n",
|
||||
"saver.upsert_documents(documents=data, document_ids=doc_ids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load from Collection or SubCollection"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load langchain documents with `DatastoreLoader.load()` or `Datastore.lazy_load()`. `lazy_load` returns a generator that only queries database during the iteration. To initialize `DatastoreLoader` class you need to provide:\n",
|
||||
"\n",
|
||||
"1. `source` - An instance of a Query, CollectionGroup, DocumentReference or the single `\\`-delimited path to a Datastore collection`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_datastore import DatastoreLoader\n",
|
||||
"\n",
|
||||
"loader_collection = DatastoreLoader(\"Collection\")\n",
|
||||
"loader_subcollection = DatastoreLoader(\"Collection/doc/SubCollection\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"data_collection = loader_collection.load()\n",
|
||||
"data_subcollection = loader_subcollection.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load a single Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.cloud import datastore\n",
|
||||
"\n",
|
||||
"client = datastore.Client()\n",
|
||||
"doc_ref = client.collection(\"foo\").document(\"bar\")\n",
|
||||
"\n",
|
||||
"loader_document = DatastoreLoader(doc_ref)\n",
|
||||
"\n",
|
||||
"data = loader_document.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load from CollectionGroup or Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.cloud.datastore import CollectionGroup, FieldFilter, Query\n",
|
||||
"\n",
|
||||
"col_ref = client.collection(\"col_group\")\n",
|
||||
"collection_group = CollectionGroup(col_ref)\n",
|
||||
"\n",
|
||||
"loader_group = DatastoreLoader(collection_group)\n",
|
||||
"\n",
|
||||
"col_ref = client.collection(\"collection\")\n",
|
||||
"query = col_ref.where(filter=FieldFilter(\"region\", \"==\", \"west_coast\"))\n",
|
||||
"\n",
|
||||
"loader_query = DatastoreLoader(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents\n",
|
||||
"\n",
|
||||
"Delete a list of langchain documents from Datastore collection with `DatastoreSaver.delete_documents(<documents>)`.\n",
|
||||
"\n",
|
||||
"If document ids is provided, the Documents will be ignored."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"saver = DatastoreSaver()\n",
|
||||
"\n",
|
||||
"saver.delete_documents(data)\n",
|
||||
"\n",
|
||||
"# The Documents will be ignored and only the document ids will be used.\n",
|
||||
"saver.delete_documents(data, doc_ids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents with customize document page content & metadata\n",
|
||||
"\n",
|
||||
"The arguments of `page_content_fields` and `metadata_fields` will specify the Datastore Document fields to be written into LangChain Document `page_content` and `metadata`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = DatastoreLoader(\n",
|
||||
" source=\"foo/bar/subcol\",\n",
|
||||
" page_content_fields=[\"data_field\"],\n",
|
||||
" metadata_fields=[\"metadata_field\"],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Customize Page Content Format\n",
|
||||
"\n",
|
||||
"When the `page_content` contains only one field the information will be the field value only. Otherwise the `page_content` will be in JSON format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Customize Connection & Authentication"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.auth import compute_engine\n",
|
||||
"from google.cloud.datastore import Client\n",
|
||||
"\n",
|
||||
"client = Client(database=\"non-default-db\", creds=compute_engine.Credentials())\n",
|
||||
"loader = DatastoreLoader(\n",
|
||||
" source=\"foo\",\n",
|
||||
" client=client,\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,566 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "NKbPFu-GWFDV"
|
||||
},
|
||||
"source": [
|
||||
"# Google El Carro Oracle Operator\n",
|
||||
">\n",
|
||||
"Google [El Carro Oracle Operator](https://github.com/GoogleCloudPlatform/elcarro-oracle-operator)\n",
|
||||
"offers a way to run Oracle databases in Kubernetes as a portable, open source,\n",
|
||||
"community driven, no vendor lock-in container orchestration system. El Carro\n",
|
||||
"provides a powerful declarative API for comprehensive and consistent\n",
|
||||
"configuration and deployment as well as for real-time operations and\n",
|
||||
"monitoring..\n",
|
||||
"Extend your database application to build AI-powered experiences leveraging\n",
|
||||
"Oracle Langchain integrations.\n",
|
||||
"\n",
|
||||
"This guide goes over how to use El Carro Langchain integration to\n",
|
||||
"[save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/)\n",
|
||||
"with `ElCarroLoader` and `ElCarroDocumentSaver`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "ZqONzXRcWMJg"
|
||||
},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"Please complete\n",
|
||||
"the [Getting Started](https://github.com/googleapis/langchain-google-el-carro-python/tree/main/README.md#getting-started)\n",
|
||||
"section of\n",
|
||||
"the README to set up your El Carro Oracle database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "imbbHxKfWPso"
|
||||
},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-el-carro` package, so\n",
|
||||
"we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "Su5BMP2zWRwM"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-google-el-carro"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "azV0k45WWSVI"
|
||||
},
|
||||
"source": [
|
||||
"## Basic Usage\n",
|
||||
"\n",
|
||||
"### Set Up Oracle Database Connection\n",
|
||||
"\n",
|
||||
"ElCarroEngine configures a connection pool to your Oracle database,\n",
|
||||
"enabling successful connections from your application and following industry\n",
|
||||
"best practices.\n",
|
||||
"\n",
|
||||
"You can find the hostname and port values in the status of the El Carro\n",
|
||||
"Kubernetes instance.\n",
|
||||
"Use the user password you created for your PDB."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "xG1mYFkEWbkp"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_el_carro import ElCarroEngine\n",
|
||||
"\n",
|
||||
"elcarro_engine = ElCarroEngine.from_instance(\n",
|
||||
" db_host=\"127.0.0.1\",\n",
|
||||
" db_port=3307,\n",
|
||||
" db_name=\"PDB1\",\n",
|
||||
" db_user=\"scott\",\n",
|
||||
" db_password=\"tiger\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "ICW3k_qUWgyv"
|
||||
},
|
||||
"source": [
|
||||
"### Initialize a table\n",
|
||||
"\n",
|
||||
"Initialize a table of default schema\n",
|
||||
"via `elcarro_engine.init_document_table(<TABLE_NAME>)`. Table Columns:\n",
|
||||
"\n",
|
||||
"- page_content (type: text)\n",
|
||||
"- langchain_metadata (type: JSON)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "JmlGLukoWdfS"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"TABLE_NAME = \"doc_table\"\n",
|
||||
"elcarro_engine.init_document_table(\n",
|
||||
" TABLE_NAME=TABLE_NAME,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "kaI3avj5Wn5O"
|
||||
},
|
||||
"source": [
|
||||
"### Save documents\n",
|
||||
"\n",
|
||||
"Save langchain documents with `ElCarroDocumentSaver.add_documents(<documents>)`.\n",
|
||||
"To initialize `ElCarroDocumentSaver` class you need to provide 2 things:\n",
|
||||
"\n",
|
||||
"1. `elcarro_engine` - An instance of a `ElCarroEngine` engine.\n",
|
||||
"2. `TABLE_NAME` - The name of the table within the Oracle database to store\n",
|
||||
" langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "skaXpthSWpeg"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_google_el_carro import ElCarroDocumentSaver\n",
|
||||
"\n",
|
||||
"doc = Document(\n",
|
||||
" page_content=\"Banana\",\n",
|
||||
" metadata={\"type\": \"fruit\", \"weight\": 100, \"organic\": 1},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"saver = ElCarroDocumentSaver(\n",
|
||||
" elcarro_engine=elcarro_engine,\n",
|
||||
" TABLE_NAME=TABLE_NAME,\n",
|
||||
")\n",
|
||||
"saver.add_documents([doc])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "owTYQdNyWs9s"
|
||||
},
|
||||
"source": [
|
||||
"### Load documents\n",
|
||||
"\n",
|
||||
"Load langchain documents with `ElCarroLoader.load()`\n",
|
||||
"or `ElCarroLoader.lazy_load()`.\n",
|
||||
"`lazy_load` returns a generator that only queries database during the iteration.\n",
|
||||
"To initialize `ElCarroLoader` class you need to provide:\n",
|
||||
"\n",
|
||||
"1. `elcarro_engine` - An instance of a `ElCarroEngine` engine.\n",
|
||||
"2. `TABLE_NAME` - The name of the table within the Oracle database to store\n",
|
||||
" langchain documents.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "CM6p11amWvYp"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_el_carro import ElCarroLoader\n",
|
||||
"\n",
|
||||
"loader = ElCarroLoader(elcarro_engine=elcarro_engine, TABLE_NAME=TABLE_NAME)\n",
|
||||
"docs = loader.lazy_load()\n",
|
||||
"for doc in docs:\n",
|
||||
" print(\"Loaded documents:\", doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "OTIDGiZ8WyS3"
|
||||
},
|
||||
"source": [
|
||||
"### Load documents via query\n",
|
||||
"\n",
|
||||
"Other than loading documents from a table, we can also choose to load documents\n",
|
||||
"from a view generated from a SQL query. For example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "p3OB9AwgWzrq"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_el_carro import ElCarroLoader\n",
|
||||
"\n",
|
||||
"loader = ElCarroLoader(\n",
|
||||
" elcarro_engine=elcarro_engine,\n",
|
||||
" query=f\"SELECT * FROM {TABLE_NAME} WHERE json_value(extra_json_metadata, '$.shape') = 'round'\",\n",
|
||||
")\n",
|
||||
"onedoc = loader.load()\n",
|
||||
"print(onedoc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "E6Fl7YNvW3Ep"
|
||||
},
|
||||
"source": [
|
||||
"The view generated from SQL query can have different schema than default table.\n",
|
||||
"In such cases, the behavior of ElCarroLoader is the same as loading from table\n",
|
||||
"with non-default schema. Please refer to\n",
|
||||
"section [Load documents with customized document page content & metadata](#load-documents-with-customized-document-page-content--metadata)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "QgsP78MhW4wc"
|
||||
},
|
||||
"source": [
|
||||
"### Delete documents\n",
|
||||
"\n",
|
||||
"Delete a list of langchain documents from an Oracle table\n",
|
||||
"with `ElCarroDocumentSaver.delete(<documents>)`.\n",
|
||||
"\n",
|
||||
"For a table with a default schema (page_content, langchain_metadata), the\n",
|
||||
"deletion criteria is:\n",
|
||||
"\n",
|
||||
"A `row` should be deleted if there exists a `document` in the list, such that\n",
|
||||
"\n",
|
||||
"- `document.page_content` equals `row[page_content]`\n",
|
||||
"- `document.metadata` equals `row[langchain_metadata]`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "QSYRHGHXW6IN"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete:\", docs)\n",
|
||||
"saver.delete(onedoc)\n",
|
||||
"print(\"Documents after delete:\", loader.load())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "RerPkBRAW8yR"
|
||||
},
|
||||
"source": [
|
||||
"## Advanced Usage\n",
|
||||
"\n",
|
||||
"### Load documents with customized document page content & metadata\n",
|
||||
"\n",
|
||||
"First we prepare an example table with non-default schema, and populate it with\n",
|
||||
"some arbitrary data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "u0Fd46aqW-8k"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sqlalchemy\n",
|
||||
"\n",
|
||||
"create_table_query = f\"\"\"CREATE TABLE {TABLE_NAME} (\n",
|
||||
" fruit_id NUMBER GENERATED BY DEFAULT AS IDENTITY (START WITH 1),\n",
|
||||
" fruit_name VARCHAR2(100) NOT NULL,\n",
|
||||
" variety VARCHAR2(50),\n",
|
||||
" quantity_in_stock NUMBER(10) NOT NULL,\n",
|
||||
" price_per_unit NUMBER(6,2) NOT NULL,\n",
|
||||
" organic NUMBER(3) NOT NULL\n",
|
||||
")\"\"\"\n",
|
||||
"\n",
|
||||
"with elcarro_engine.connect() as conn:\n",
|
||||
" conn.execute(sqlalchemy.text(create_table_query))\n",
|
||||
" conn.commit()\n",
|
||||
" conn.execute(\n",
|
||||
" sqlalchemy.text(\n",
|
||||
" f\"\"\"\n",
|
||||
" INSERT INTO {TABLE_NAME} (fruit_name, variety, quantity_in_stock, price_per_unit, organic)\n",
|
||||
" VALUES ('Apple', 'Granny Smith', 150, 0.99, 1)\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" conn.execute(\n",
|
||||
" sqlalchemy.text(\n",
|
||||
" f\"\"\"\n",
|
||||
" INSERT INTO {TABLE_NAME} (fruit_name, variety, quantity_in_stock, price_per_unit, organic)\n",
|
||||
" VALUES ('Banana', 'Cavendish', 200, 0.59, 0)\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" conn.execute(\n",
|
||||
" sqlalchemy.text(\n",
|
||||
" f\"\"\"\n",
|
||||
" INSERT INTO {TABLE_NAME} (fruit_name, variety, quantity_in_stock, price_per_unit, organic)\n",
|
||||
" VALUES ('Orange', 'Navel', 80, 1.29, 1)\n",
|
||||
" \"\"\"\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" conn.commit()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "hGPYiTu7XBh3"
|
||||
},
|
||||
"source": [
|
||||
"If we still load langchain documents with default parameters of `ElCarroLoader`\n",
|
||||
"from this example table, the `page_content` of loaded documents will be the\n",
|
||||
"first column of the table, and `metadata` will be consisting of key-value pairs\n",
|
||||
"of all the other columns."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "eQbRapM_XC1S"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = ElCarroLoader(\n",
|
||||
" elcarro_engine=elcarro_engine,\n",
|
||||
" TABLE_NAME=TABLE_NAME,\n",
|
||||
")\n",
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "tOH6i2jWXFqz"
|
||||
},
|
||||
"source": [
|
||||
"We can specify the content and metadata we want to load by setting\n",
|
||||
"the `content_columns` and `metadata_columns` when initializing\n",
|
||||
"the `ElCarroLoader`.\n",
|
||||
"\n",
|
||||
"1. `content_columns`: The columns to write into the `page_content` of the\n",
|
||||
" document.\n",
|
||||
"2. `metadata_columns`: The columns to write into the `metadata` of the document.\n",
|
||||
"\n",
|
||||
"For example here, the values of columns in `content_columns` will be joined\n",
|
||||
"together into a space-separated string, as `page_content` of loaded documents,\n",
|
||||
"and `metadata` of loaded documents will only contain key-value pairs of columns\n",
|
||||
"specified in `metadata_columns`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "9gCFWqgGXHD3"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = ElCarroLoader(\n",
|
||||
" elcarro_engine=elcarro_engine,\n",
|
||||
" TABLE_NAME=TABLE_NAME,\n",
|
||||
" content_columns=[\n",
|
||||
" \"variety\",\n",
|
||||
" \"quantity_in_stock\",\n",
|
||||
" \"price_per_unit\",\n",
|
||||
" \"organic\",\n",
|
||||
" ],\n",
|
||||
" metadata_columns=[\"fruit_id\", \"fruit_name\"],\n",
|
||||
")\n",
|
||||
"loaded_docs = loader.load()\n",
|
||||
"print(f\"Loaded Documents: [{loaded_docs}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "4KlSfvPJXKgM"
|
||||
},
|
||||
"source": [
|
||||
"### Save document with customized page content & metadata\n",
|
||||
"\n",
|
||||
"In order to save langchain document into table with customized metadata fields.\n",
|
||||
"We need first create such a table via `ElCarroEngine.init_document_table()`, and\n",
|
||||
"specify the list of `metadata_columns` we want it to have. In this example, the\n",
|
||||
"created table will have table columns:\n",
|
||||
"\n",
|
||||
"- description (type: text): for storing fruit description.\n",
|
||||
"- fruit_name (type text): for storing fruit name.\n",
|
||||
"- organic (type tinyint(1)): to tell if the fruit is organic.\n",
|
||||
"- other_metadata (type: JSON): for storing other metadata information of the\n",
|
||||
" fruit.\n",
|
||||
"\n",
|
||||
"We can use the following parameters\n",
|
||||
"with `elcarro_engine.init_document_table()` to create the table:\n",
|
||||
"\n",
|
||||
"1. `TABLE_NAME`: The name of the table within the Oracle database to store\n",
|
||||
" langchain documents.\n",
|
||||
"2. `metadata_columns`: A list of `sqlalchemy.Column` indicating the list of\n",
|
||||
" metadata columns we need.\n",
|
||||
"3. `content_column`: column name to store `page_content` of langchain\n",
|
||||
" document. Default: `\"page_content\", \"VARCHAR2(4000)\"`\n",
|
||||
"4. `metadata_json_column`: column name to store extra\n",
|
||||
" JSON `metadata` of langchain document.\n",
|
||||
" Default: `\"langchain_metadata\", \"VARCHAR2(4000)\"`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "1Wqs05gpXMW9"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"elcarro_engine.init_document_table(\n",
|
||||
" TABLE_NAME=TABLE_NAME,\n",
|
||||
" metadata_columns=[\n",
|
||||
" sqlalchemy.Column(\"type\", sqlalchemy.dialects.oracle.VARCHAR2(200)),\n",
|
||||
" sqlalchemy.Column(\"weight\", sqlalchemy.INT),\n",
|
||||
" ],\n",
|
||||
" content_column=\"content\",\n",
|
||||
" metadata_json_column=\"extra_json_metadata\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "bVEWHYU-XPFt"
|
||||
},
|
||||
"source": [
|
||||
"Save documents with `ElCarroDocumentSaver.add_documents(<documents>)`. As you\n",
|
||||
"can see in this example,\n",
|
||||
"\n",
|
||||
"- `document.page_content` will be saved into `page_content` column.\n",
|
||||
"- `document.metadata.type` will be saved into `type` column.\n",
|
||||
"- `document.metadata.weight` will be saved into `weight` column.\n",
|
||||
"- `document.metadata.organic` will be saved into `extra_json_metadata` column in\n",
|
||||
" JSON format.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "Iy4wRZLPXQn5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"doc = Document(\n",
|
||||
" page_content=\"Banana\",\n",
|
||||
" metadata={\"type\": \"fruit\", \"weight\": 100, \"organic\": 1},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(f\"Original Document: [{doc}]\")\n",
|
||||
"\n",
|
||||
"saver = ElCarroDocumentSaver(\n",
|
||||
" elcarro_engine=elcarro_engine,\n",
|
||||
" TABLE_NAME=TABLE_NAME,\n",
|
||||
" content_column=\"content\",\n",
|
||||
" metadata_json_column=\"extra_json_metadata\",\n",
|
||||
")\n",
|
||||
"saver.add_documents([doc])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "x0vkL7PKXUmU"
|
||||
},
|
||||
"source": [
|
||||
"### Delete documents with customized page content & metadata\n",
|
||||
"\n",
|
||||
"We can also delete documents from table with customized metadata columns\n",
|
||||
"via `ElCarroDocumentSaver.delete(<documents>)`. The deletion criteria is:\n",
|
||||
"\n",
|
||||
"A `row` should be deleted if there exists a `document` in the list, such that\n",
|
||||
"\n",
|
||||
"- `document.page_content` equals `row[page_content]`\n",
|
||||
"- For every metadata field `k` in `document.metadata`\n",
|
||||
" - `document.metadata[k]` equals `row[k]` or `document.metadata[k]`\n",
|
||||
" equals `row[langchain_metadata][k]`\n",
|
||||
"- There is no extra metadata field present in `row` but not\n",
|
||||
" in `document.metadata`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "OcJPeCuKXWSa"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"saver.delete(loaded_docs)\n",
|
||||
"print(f\"Documents left: {len(loader.load())}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "S4SxUoY-XsPN"
|
||||
},
|
||||
"source": [
|
||||
"## More examples\n",
|
||||
"\n",
|
||||
"Please look\n",
|
||||
"at [demo_doc_loader_basic.py](https://github.com/googleapis/langchain-google-el-carro-python/tree/main/samples/demo_doc_loader_basic.py)\n",
|
||||
"and [demo_doc_loader_advanced.py](https://github.com/googleapis/langchain-google-el-carro-python/tree/main/samples/demo_doc_loader_advanced.py)\n",
|
||||
"for\n",
|
||||
"complete code examples.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
@@ -1,413 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Firestore (Native Mode)\n",
|
||||
"\n",
|
||||
"> [Firestore](https://cloud.google.com/firestore) is a serverless document-oriented database that scales to meet any demand. Extend your database application to build AI-powered experiences leveraging Firestore's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use [Firestore](https://cloud.google.com/firestore) to [save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/) with `FirestoreLoader` and `FirestoreSaver`.\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/googleapis/langchain-google-firestore-python/blob/main/docs/document_loader.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
"* [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
"* [Create a Firestore database](https://cloud.google.com/firestore/docs/manage-databases)\n",
|
||||
"\n",
|
||||
"After confirmed access to database in the runtime environment of this notebook, filling the following values and run the cell before running example scripts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please specify a source for demo purpose.\n",
|
||||
"SOURCE = \"test\" # @param {type:\"Query\"|\"CollectionGroup\"|\"DocumentReference\"|\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-firestore` package, so we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -upgrade --quiet langchain-google-firestore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Colab only**: Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the value below with your Google Cloud project ID and then run the cell.\n",
|
||||
"\n",
|
||||
"PROJECT_ID = \"my-project-id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"!gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"- If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"- If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### API Enablement\n",
|
||||
"The `langchain-google-firestore` package requires that you [enable the Firestore Admin API](https://console.cloud.google.com/flows/enableapi?apiid=firestore.googleapis.com) in your Google Cloud Project."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# enable Firestore Admin API\n",
|
||||
"!gcloud services enable firestore.googleapis.com"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save documents\n",
|
||||
"\n",
|
||||
"`FirestoreSaver` can store Documents into Firestore. By default it will try to extract the Document reference from the metadata\n",
|
||||
"\n",
|
||||
"Save langchain documents with `FirestoreSaver.upsert_documents(<documents>)`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents.base import Document\n",
|
||||
"from langchain_google_firestore import FirestoreSaver\n",
|
||||
"\n",
|
||||
"saver = FirestoreSaver()\n",
|
||||
"\n",
|
||||
"data = [Document(page_content=\"Hello, World!\")]\n",
|
||||
"\n",
|
||||
"saver.upsert_documents(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Save documents without reference\n",
|
||||
"\n",
|
||||
"If a collection is specified the documents will be stored with an auto generated id."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"saver = FirestoreSaver(\"Collection\")\n",
|
||||
"\n",
|
||||
"saver.upsert_documents(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Save documents with other references"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"doc_ids = [\"AnotherCollection/doc_id\", \"foo/bar\"]\n",
|
||||
"saver = FirestoreSaver()\n",
|
||||
"\n",
|
||||
"saver.upsert_documents(documents=data, document_ids=doc_ids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load from Collection or SubCollection"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load langchain documents with `FirestoreLoader.load()` or `Firestore.lazy_load()`. `lazy_load` returns a generator that only queries database during the iteration. To initialize `FirestoreLoader` class you need to provide:\n",
|
||||
"\n",
|
||||
"1. `source` - An instance of a Query, CollectionGroup, DocumentReference or the single `\\`-delimited path to a Firestore collection`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_firestore import FirestoreLoader\n",
|
||||
"\n",
|
||||
"loader_collection = FirestoreLoader(\"Collection\")\n",
|
||||
"loader_subcollection = FirestoreLoader(\"Collection/doc/SubCollection\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"data_collection = loader_collection.load()\n",
|
||||
"data_subcollection = loader_subcollection.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load a single Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.cloud import firestore\n",
|
||||
"\n",
|
||||
"client = firestore.Client()\n",
|
||||
"doc_ref = client.collection(\"foo\").document(\"bar\")\n",
|
||||
"\n",
|
||||
"loader_document = FirestoreLoader(doc_ref)\n",
|
||||
"\n",
|
||||
"data = loader_document.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load from CollectionGroup or Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.cloud.firestore import CollectionGroup, FieldFilter, Query\n",
|
||||
"\n",
|
||||
"col_ref = client.collection(\"col_group\")\n",
|
||||
"collection_group = CollectionGroup(col_ref)\n",
|
||||
"\n",
|
||||
"loader_group = FirestoreLoader(collection_group)\n",
|
||||
"\n",
|
||||
"col_ref = client.collection(\"collection\")\n",
|
||||
"query = col_ref.where(filter=FieldFilter(\"region\", \"==\", \"west_coast\"))\n",
|
||||
"\n",
|
||||
"loader_query = FirestoreLoader(query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents\n",
|
||||
"\n",
|
||||
"Delete a list of langchain documents from Firestore collection with `FirestoreSaver.delete_documents(<documents>)`.\n",
|
||||
"\n",
|
||||
"If document ids is provided, the Documents will be ignored."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"saver = FirestoreSaver()\n",
|
||||
"\n",
|
||||
"saver.delete_documents(data)\n",
|
||||
"\n",
|
||||
"# The Documents will be ignored and only the document ids will be used.\n",
|
||||
"saver.delete_documents(data, doc_ids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load documents with customize document page content & metadata\n",
|
||||
"\n",
|
||||
"The arguments of `page_content_fields` and `metadata_fields` will specify the Firestore Document fields to be written into LangChain Document `page_content` and `metadata`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = FirestoreLoader(\n",
|
||||
" source=\"foo/bar/subcol\",\n",
|
||||
" page_content_fields=[\"data_field\"],\n",
|
||||
" metadata_fields=[\"metadata_field\"],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Customize Page Content Format\n",
|
||||
"\n",
|
||||
"When the `page_content` contains only one field the information will be the field value only. Otherwise the `page_content` will be in JSON format."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Customize Connection & Authentication"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.auth import compute_engine\n",
|
||||
"from google.cloud.firestore import Client\n",
|
||||
"\n",
|
||||
"client = Client(database=\"non-default-db\", creds=compute_engine.Credentials())\n",
|
||||
"loader = FirestoreLoader(\n",
|
||||
" source=\"foo\",\n",
|
||||
" client=client,\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,318 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "6-0_o3DxsFGi"
|
||||
},
|
||||
"source": [
|
||||
"# Google Memorystore for Redis\n",
|
||||
"\n",
|
||||
"> [Google Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis/memorystore-for-redis-overview) is a fully-managed service that is powered by the Redis in-memory data store to build application caches that provide sub-millisecond data access. Extend your database application to build AI-powered experiences leveraging Memorystore for Redis's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use [Memorystore for Redis](https://cloud.google.com/memorystore/docs/redis/memorystore-for-redis-overview) to [save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/) with `MemorystoreDocumentLoader` and `MemorystoreDocumentSaver`.\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/googleapis/langchain-google-memorystore-redis-python/blob/main/docs/document_loader.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
"* [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
"* [Create a Memorystore for Redis instance](https://cloud.google.com/memorystore/docs/redis/create-instance-console). Ensure that the version is greater than or equal to 5.0.\n",
|
||||
"\n",
|
||||
"After confirmed access to database in the runtime environment of this notebook, filling the following values and run the cell before running example scripts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please specify an endpoint associated with the instance and a key prefix for demo purpose.\n",
|
||||
"ENDPOINT = \"redis://127.0.0.1:6379\" # @param {type:\"string\"}\n",
|
||||
"KEY_PREFIX = \"doc:\" # @param {type:\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-memorystore-redis` package, so we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -upgrade --quiet langchain-google-memorystore-redis"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Colab only**: Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the value below with your Google Cloud project ID and then run the cell.\n",
|
||||
"\n",
|
||||
"PROJECT_ID = \"my-project-id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"!gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"- If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"- If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "2L7kMu__sFGl"
|
||||
},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save documents\n",
|
||||
"\n",
|
||||
"Save langchain documents with `MemorystoreDocumentSaver.add_documents(<documents>)`. To initialize `MemorystoreDocumentSaver` class you need to provide 2 things:\n",
|
||||
"\n",
|
||||
"1. `client` - A `redis.Redis` client object.\n",
|
||||
"1. `key_prefix` - A prefix for the keys to store Documents in Redis.\n",
|
||||
"\n",
|
||||
"The Documents will be stored into randomly generated keys with the specified prefix of `key_prefix`. Alternatively, you can designate the suffixes of the keys by specifying `ids` in the `add_documents` method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import redis\n",
|
||||
"from langchain_core.documents.base import Document\n",
|
||||
"from langchain_google_memorystore_redis import MemorystoreDocumentSaver\n",
|
||||
"\n",
|
||||
"test_docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Apple Granny Smith 150 0.99 1\",\n",
|
||||
" metadata={\"fruit_id\": 1},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Banana Cavendish 200 0.59 0\",\n",
|
||||
" metadata={\"fruit_id\": 2},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Orange Navel 80 1.29 1\",\n",
|
||||
" metadata={\"fruit_id\": 3},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"doc_ids = [f\"{i}\" for i in range(len(test_docs))]\n",
|
||||
"\n",
|
||||
"redis_client = redis.from_url(ENDPOINT)\n",
|
||||
"saver = MemorystoreDocumentSaver(\n",
|
||||
" client=redis_client,\n",
|
||||
" key_prefix=KEY_PREFIX,\n",
|
||||
" content_field=\"page_content\",\n",
|
||||
")\n",
|
||||
"saver.add_documents(test_docs, ids=doc_ids)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "A2fT1iEhsFGl"
|
||||
},
|
||||
"source": [
|
||||
"### Load documents\n",
|
||||
"\n",
|
||||
"Initialize a loader that loads all documents stored in the Memorystore for Redis instance with a specific prefix.\n",
|
||||
"\n",
|
||||
"Load langchain documents with `MemorystoreDocumentLoader.load()` or `MemorystoreDocumentLoader.lazy_load()`. `lazy_load` returns a generator that only queries database during the iteration. To initialize `MemorystoreDocumentLoader` class you need to provide:\n",
|
||||
"\n",
|
||||
"1. `client` - A `redis.Redis` client object.\n",
|
||||
"1. `key_prefix` - A prefix for the keys to store Documents in Redis."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "YEDKWR6asFGl"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import redis\n",
|
||||
"from langchain_google_memorystore_redis import MemorystoreDocumentLoader\n",
|
||||
"\n",
|
||||
"redis_client = redis.from_url(ENDPOINT)\n",
|
||||
"loader = MemorystoreDocumentLoader(\n",
|
||||
" client=redis_client,\n",
|
||||
" key_prefix=KEY_PREFIX,\n",
|
||||
" content_fields=set([\"page_content\"]),\n",
|
||||
")\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" print(\"Loaded documents:\", doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents\n",
|
||||
"\n",
|
||||
"Delete all of keys with the specified prefix in the Memorystore for Redis instance with `MemorystoreDocumentSaver.delete()`. You can also specify the suffixes of the keys if you know."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete:\", docs)\n",
|
||||
"\n",
|
||||
"saver.delete(ids=[0])\n",
|
||||
"print(\"Documents after delete:\", loader.load())\n",
|
||||
"\n",
|
||||
"saver.delete()\n",
|
||||
"print(\"Documents after delete all:\", loader.load())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "02xxvmzTsFGm"
|
||||
},
|
||||
"source": [
|
||||
"### Customize Document Page Content & Metadata\n",
|
||||
"\n",
|
||||
"When initializing a loader with more than 1 content field, the `page_content` of the loaded Documents will contain a JSON-encoded string with top level fields equal to the specified fields in `content_fields`.\n",
|
||||
"\n",
|
||||
"If the `metadata_fields` are specified, the `metadata` field of the loaded Documents will only have the top level fields equal to the specified `metadata_fields`. If any of the values of the metadata fields is stored as a JSON-encoded string, it will be decoded prior to being loaded to the metadata fields."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "BvS3UFsysFGm"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = MemorystoreDocumentLoader(\n",
|
||||
" client=redis_client,\n",
|
||||
" key_prefix=KEY_PREFIX,\n",
|
||||
" content_fields=set([\"content_field_1\", \"content_field_2\"]),\n",
|
||||
" metadata_fields=set([\"title\", \"author\"]),\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [
|
||||
{
|
||||
"file_id": "1kuFhDfyzOdzS1apxQ--1efXB1pJ79yVY",
|
||||
"timestamp": 1708033015250
|
||||
}
|
||||
]
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
@@ -1,512 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Spanner\n",
|
||||
"\n",
|
||||
"> [Spanner](https://cloud.google.com/spanner) is a highly scalable database that combines unlimited scalability with relational semantics, such as secondary indexes, strong consistency, schemas, and SQL providing 99.999% availability in one easy solution. Extend your database application to build AI-powered experiences leveraging Spanner's Langchain integrations.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use [Spanner](https://cloud.google.com/spanner) to [save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/) with `SpannerLoader` and `SpannerDocumentSaver`.\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/googleapis/langchain-google-spanner-python/blob/main/docs/document_loader.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Before You Begin\n",
|
||||
"\n",
|
||||
"To run this notebook, you will need to do the following:\n",
|
||||
"\n",
|
||||
"* [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n",
|
||||
"* [Create a Spanner instance](https://cloud.google.com/spanner/docs/create-manage-instances)\n",
|
||||
"* [Create a Spanner database](https://cloud.google.com/spanner/docs/create-manage-databases)\n",
|
||||
"* [Create a Spanner table](https://cloud.google.com/spanner/docs/create-query-database-console#create-schema)\n",
|
||||
"\n",
|
||||
"After confirmed access to database in the runtime environment of this notebook, filling the following values and run the cell before running example scripts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please specify an instance id, a database, and a table for demo purpose.\n",
|
||||
"INSTANCE_ID = \"test_instance\" # @param {type:\"string\"}\n",
|
||||
"DATABASE_ID = \"test_database\" # @param {type:\"string\"}\n",
|
||||
"TABLE_NAME = \"test_table\" # @param {type:\"string\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🦜🔗 Library Installation\n",
|
||||
"\n",
|
||||
"The integration lives in its own `langchain-google-spanner` package, so we need to install it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -upgrade --quiet langchain-google-spanner"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Colab only**: Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Automatically restart kernel after installs so that your environment can access the new packages\n",
|
||||
"# import IPython\n",
|
||||
"\n",
|
||||
"# app = IPython.Application.instance()\n",
|
||||
"# app.kernel.do_shutdown(True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ☁ Set Your Google Cloud Project\n",
|
||||
"Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n",
|
||||
"\n",
|
||||
"If you don't know your project ID, try the following:\n",
|
||||
"\n",
|
||||
"* Run `gcloud config list`.\n",
|
||||
"* Run `gcloud projects list`.\n",
|
||||
"* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# @markdown Please fill in the value below with your Google Cloud project ID and then run the cell.\n",
|
||||
"\n",
|
||||
"PROJECT_ID = \"my-project-id\" # @param {type:\"string\"}\n",
|
||||
"\n",
|
||||
"# Set the project id\n",
|
||||
"!gcloud config set project {PROJECT_ID}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 🔐 Authentication\n",
|
||||
"\n",
|
||||
"Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n",
|
||||
"\n",
|
||||
"- If you are using Colab to run this notebook, use the cell below and continue.\n",
|
||||
"- If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.colab import auth\n",
|
||||
"\n",
|
||||
"auth.authenticate_user()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save documents\n",
|
||||
"\n",
|
||||
"Save langchain documents with `SpannerDocumentSaver.add_documents(<documents>)`. To initialize `SpannerDocumentSaver` class you need to provide 3 things:\n",
|
||||
"\n",
|
||||
"1. `instance_id` - An instance of Spanner to load data from.\n",
|
||||
"1. `database_id` - An instance of Spanner database to load data from.\n",
|
||||
"1. `table_name` - The name of the table within the Spanner database to store langchain documents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_google_spanner import SpannerDocumentSaver\n",
|
||||
"\n",
|
||||
"test_docs = [\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Apple Granny Smith 150 0.99 1\",\n",
|
||||
" metadata={\"fruit_id\": 1},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Banana Cavendish 200 0.59 0\",\n",
|
||||
" metadata={\"fruit_id\": 2},\n",
|
||||
" ),\n",
|
||||
" Document(\n",
|
||||
" page_content=\"Orange Navel 80 1.29 1\",\n",
|
||||
" metadata={\"fruit_id\": 3},\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"saver = SpannerDocumentSaver(\n",
|
||||
" instance_id=INSTANCE_ID,\n",
|
||||
" database_id=DATABASE_ID,\n",
|
||||
" table_name=TABLE_NAME,\n",
|
||||
")\n",
|
||||
"saver.add_documents(test_docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Querying for Documents from Spanner\n",
|
||||
"\n",
|
||||
"For more details on connecting to a Spanner table, please check the [Python SDK documentation](https://cloud.google.com/python/docs/reference/spanner/latest)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Load documents from table\n",
|
||||
"\n",
|
||||
"Load langchain documents with `SpannerLoader.load()` or `SpannerLoader.lazy_load()`. `lazy_load` returns a generator that only queries database during the iteration. To initialize `SpannerLoader` class you need to provide:\n",
|
||||
"\n",
|
||||
"1. `instance_id` - An instance of Spanner to load data from.\n",
|
||||
"1. `database_id` - An instance of Spanner database to load data from.\n",
|
||||
"1. `query` - A query of the database dialect."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_spanner import SpannerLoader\n",
|
||||
"\n",
|
||||
"query = f\"SELECT * from {TABLE_NAME}\"\n",
|
||||
"loader = SpannerLoader(\n",
|
||||
" instance_id=INSTANCE_ID,\n",
|
||||
" database_id=DATABASE_ID,\n",
|
||||
" query=query,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" print(doc)\n",
|
||||
" break"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete documents\n",
|
||||
"\n",
|
||||
"Delete a list of langchain documents from the table with `SpannerDocumentSaver.delete(<documents>)`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"print(\"Documents before delete:\", docs)\n",
|
||||
"\n",
|
||||
"doc = test_docs[0]\n",
|
||||
"saver.delete([doc])\n",
|
||||
"print(\"Documents after delete:\", loader.load())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Usage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Customize Document Page Content & Metadata\n",
|
||||
"\n",
|
||||
"The loader will returns a list of Documents with page content from a specific data columns. All other data columns will be added to metadata. Each row becomes a document."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Customize page content format\n",
|
||||
"\n",
|
||||
"The SpannerLoader assumes there is a column called `page_content`. These defaults can be changed like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_content_loader = SpannerLoader(\n",
|
||||
" INSTANCE_ID, DATABASE_ID, query, content_columns=[\"custom_content\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If multiple columns are specified, the page content's string format will default to `text` (space-separated string concatenation). There are other format that user can specify, including `text`, `JSON`, `YAML`, `CSV`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Customize metadata format\n",
|
||||
"\n",
|
||||
"The SpannerLoader assumes there is a metadata column called `langchain_metadata` that store JSON data. The metadata column will be used as the base dictionary. By default, all other column data will be added and may overwrite the original value. These defaults can be changed like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_metadata_loader = SpannerLoader(\n",
|
||||
" INSTANCE_ID, DATABASE_ID, query, metadata_columns=[\"column1\", \"column2\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Customize JSON metadata column name\n",
|
||||
"\n",
|
||||
"By default, the loader uses `langchain_metadata` as the base dictionary. This can be customized to select a JSON column to use as base dictionary for the Document's metadata."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_metadata_json_loader = SpannerLoader(\n",
|
||||
" INSTANCE_ID, DATABASE_ID, query, metadata_json_column=\"another-json-column\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Custom staleness\n",
|
||||
"\n",
|
||||
"The default [staleness](https://cloud.google.com/python/docs/reference/spanner/latest/snapshot-usage#beginning-a-snapshot) is 15s. This can be customized by specifying a weaker bound (which can either be to perform all reads as of a given timestamp), or as of a given duration in the past."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import datetime\n",
|
||||
"\n",
|
||||
"timestamp = datetime.datetime.utcnow()\n",
|
||||
"custom_timestamp_loader = SpannerLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" DATABASE_ID,\n",
|
||||
" query,\n",
|
||||
" staleness=timestamp,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"duration = 20.0\n",
|
||||
"custom_duration_loader = SpannerLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" DATABASE_ID,\n",
|
||||
" query,\n",
|
||||
" staleness=duration,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Turn on data boost\n",
|
||||
"\n",
|
||||
"By default, the loader will not use [data boost](https://cloud.google.com/spanner/docs/databoost/databoost-overview) since it has additional costs associated, and require additional IAM permissions. However, user can choose to turn it on."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_databoost_loader = SpannerLoader(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" DATABASE_ID,\n",
|
||||
" query,\n",
|
||||
" databoost=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Custom client\n",
|
||||
"\n",
|
||||
"The client created by default is the default client. To pass in `credentials` and `project` explicitly, a custom client can be passed to the constructor."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from google.cloud import spanner\n",
|
||||
"from google.oauth2 import service_account\n",
|
||||
"\n",
|
||||
"creds = service_account.Credentials.from_service_account_file(\"/path/to/key.json\")\n",
|
||||
"custom_client = spanner.Client(project=\"my-project\", credentials=creds)\n",
|
||||
"saver = SpannerDocumentSaver(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" DATABASE_ID,\n",
|
||||
" TABLE_NAME,\n",
|
||||
" client=custom_client,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Custom initialization for SpannerDocumentSaver\n",
|
||||
"\n",
|
||||
"The SpannerDocumentSaver allows custom initialization. This allows user to specify how the Document is saved into the table.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"content_column: This will be used as the column name for the Document's page content. Defaulted to `page_content`.\n",
|
||||
"\n",
|
||||
"metadata_columns: These metadata will be saved into specific columns if the key exists in the Document's metadata.\n",
|
||||
"\n",
|
||||
"metadata_json_column: This will be the column name for the spcial JSON column. Defaulted to `langchain_metadata`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_saver = SpannerDocumentSaver(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" DATABASE_ID,\n",
|
||||
" TABLE_NAME,\n",
|
||||
" content_column=\"my-content\",\n",
|
||||
" metadata_columns=[\"foo\"],\n",
|
||||
" metadata_json_column=\"my-special-json-column\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialize custom schema for Spanner\n",
|
||||
"\n",
|
||||
"The SpannerDocumentSaver will have a `init_document_table` method to create a new table to store docs with custom schema."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_google_spanner import Column\n",
|
||||
"\n",
|
||||
"new_table_name = \"my_new_table\"\n",
|
||||
"\n",
|
||||
"SpannerDocumentSaver.init_document_table(\n",
|
||||
" INSTANCE_ID,\n",
|
||||
" DATABASE_ID,\n",
|
||||
" new_table_name,\n",
|
||||
" content_column=\"my-page-content\",\n",
|
||||
" metadata_columns=[\n",
|
||||
" Column(\"category\", \"STRING(36)\", True),\n",
|
||||
" Column(\"price\", \"FLOAT64\", False),\n",
|
||||
" ],\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -198,8 +198,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain_community.document_loaders import TensorflowDatasetLoader\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"loader = TensorflowDatasetLoader(\n",
|
||||
" dataset_name=\"mlqa/en\",\n",
|
||||
|
||||
@@ -1,189 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# TiDB\n",
|
||||
"\n",
|
||||
"> [TiDB](https://github.com/pingcap/tidb) is an open-source, cloud-native, distributed, MySQL-Compatible database for elastic scale and real-time analytics.\n",
|
||||
"\n",
|
||||
"This notebook introduces how to use `TiDBLoader` to load data from TiDB in langchain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"Before using the `TiDBLoader`, we will install the following dependencies:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then, we will configure the connection to a TiDB. In this notebook, we will follow the standard connection method provided by TiDB Cloud to establish a secure and efficient database connection."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"# copy from tidb cloud console,replace it with your own\n",
|
||||
"tidb_connection_string_template = \"mysql+pymysql://<USER>:<PASSWORD>@<HOST>:4000/<DB>?ssl_ca=/etc/ssl/cert.pem&ssl_verify_cert=true&ssl_verify_identity=true\"\n",
|
||||
"tidb_password = getpass.getpass(\"Input your TiDB password:\")\n",
|
||||
"tidb_connection_string = tidb_connection_string_template.replace(\n",
|
||||
" \"<PASSWORD>\", tidb_password\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Data from TiDB\n",
|
||||
"\n",
|
||||
"Here's a breakdown of some key arguments you can use to customize the behavior of the `TiDBLoader`:\n",
|
||||
"\n",
|
||||
"- `query` (str): This is the SQL query to be executed against the TiDB database. The query should select the data you want to load into your `Document` objects. \n",
|
||||
" For instance, you might use a query like `\"SELECT * FROM my_table\"` to fetch all data from `my_table`.\n",
|
||||
"\n",
|
||||
"- `page_content_columns` (Optional[List[str]]): Specifies the list of column names whose values should be included in the `page_content` of each `Document` object. \n",
|
||||
" If set to `None` (the default), all columns returned by the query are included in `page_content`. This allows you to tailor the content of each document based on specific columns of your data.\n",
|
||||
"\n",
|
||||
"- `metadata_columns` (Optional[List[str]]): Specifies the list of column names whose values should be included in the `metadata` of each `Document` object. \n",
|
||||
" By default, this list is empty, meaning no metadata will be included unless explicitly specified. This is useful for including additional information about each document that doesn't form part of the main content but is still valuable for processing or analysis."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sqlalchemy import Column, Integer, MetaData, String, Table, create_engine\n",
|
||||
"\n",
|
||||
"# Connect to the database\n",
|
||||
"engine = create_engine(tidb_connection_string)\n",
|
||||
"metadata = MetaData()\n",
|
||||
"table_name = \"test_tidb_loader\"\n",
|
||||
"\n",
|
||||
"# Create a table\n",
|
||||
"test_table = Table(\n",
|
||||
" table_name,\n",
|
||||
" metadata,\n",
|
||||
" Column(\"id\", Integer, primary_key=True),\n",
|
||||
" Column(\"name\", String(255)),\n",
|
||||
" Column(\"description\", String(255)),\n",
|
||||
")\n",
|
||||
"metadata.create_all(engine)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"with engine.connect() as connection:\n",
|
||||
" transaction = connection.begin()\n",
|
||||
" try:\n",
|
||||
" connection.execute(\n",
|
||||
" test_table.insert(),\n",
|
||||
" [\n",
|
||||
" {\"name\": \"Item 1\", \"description\": \"Description of Item 1\"},\n",
|
||||
" {\"name\": \"Item 2\", \"description\": \"Description of Item 2\"},\n",
|
||||
" {\"name\": \"Item 3\", \"description\": \"Description of Item 3\"},\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" transaction.commit()\n",
|
||||
" except:\n",
|
||||
" transaction.rollback()\n",
|
||||
" raise"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"------------------------------\n",
|
||||
"content: name: Item 1\n",
|
||||
"description: Description of Item 1\n",
|
||||
"metada: {'id': 1}\n",
|
||||
"------------------------------\n",
|
||||
"content: name: Item 2\n",
|
||||
"description: Description of Item 2\n",
|
||||
"metada: {'id': 2}\n",
|
||||
"------------------------------\n",
|
||||
"content: name: Item 3\n",
|
||||
"description: Description of Item 3\n",
|
||||
"metada: {'id': 3}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import TiDBLoader\n",
|
||||
"\n",
|
||||
"# Setup TiDBLoader to retrieve data\n",
|
||||
"loader = TiDBLoader(\n",
|
||||
" connection_string=tidb_connection_string,\n",
|
||||
" query=f\"SELECT * FROM {table_name};\",\n",
|
||||
" page_content_columns=[\"name\", \"description\"],\n",
|
||||
" metadata_columns=[\"id\"],\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Load data\n",
|
||||
"documents = loader.load()\n",
|
||||
"\n",
|
||||
"# Display the loaded documents\n",
|
||||
"for doc in documents:\n",
|
||||
" print(\"-\" * 30)\n",
|
||||
" print(f\"content: {doc.page_content}\\nmetada: {doc.metadata}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_table.drop(bind=engine)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -32,8 +32,8 @@
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"from langchain_community.document_transformers import DoctranPropertyExtractor\n",
|
||||
"from langchain_core.documents import Document"
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain_community.document_transformers import DoctranPropertyExtractor"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user