mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 15:43:54 +00:00
sync from upstream master
This commit is contained in:
commit
2bfa73257f
4
.github/CONTRIBUTING.md
vendored
4
.github/CONTRIBUTING.md
vendored
@ -80,10 +80,10 @@ For example, to contribute to `langchain` run `cd libs/langchain` before getting
|
||||
To install requirements:
|
||||
|
||||
```bash
|
||||
poetry install -E all
|
||||
poetry install --with test
|
||||
```
|
||||
|
||||
This will install all requirements for running the package, examples, linting, formatting, tests, and coverage. Note the `-E all` flag will install all optional dependencies necessary for integration testing.
|
||||
This will install all requirements for running the package, examples, linting, formatting, tests, and coverage.
|
||||
|
||||
❗Note: If during installation you receive a `WheelFileValidationError` for `debugpy`, please make sure you are running Poetry v1.5.1. This bug was present in older versions of Poetry (e.g. 1.4.1) and has been resolved in newer releases. If you are still seeing this bug on v1.5.1, you may also try disabling "modern installation" (`poetry config installer.modern-installation false`) and re-installing requirements. See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details.
|
||||
|
||||
|
48
.github/actions/poetry_setup/action.yml
vendored
48
.github/actions/poetry_setup/action.yml
vendored
@ -15,19 +15,13 @@ inputs:
|
||||
description: Poetry version
|
||||
required: true
|
||||
|
||||
install-command:
|
||||
description: Command run for installing dependencies
|
||||
required: false
|
||||
default: poetry install
|
||||
|
||||
cache-key:
|
||||
description: Cache key to use for manual handling of caching
|
||||
required: true
|
||||
|
||||
working-directory:
|
||||
description: Directory to run install-command in
|
||||
required: false
|
||||
default: ""
|
||||
description: Directory whose poetry.lock file should be cached
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
@ -38,47 +32,35 @@ runs:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- uses: actions/cache@v3
|
||||
id: cache-pip
|
||||
name: Cache Pip ${{ inputs.python-version }}
|
||||
id: cache-bin-poetry
|
||||
name: Cache Poetry binary - Python ${{ inputs.python-version }}
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1"
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pip
|
||||
key: pip-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}
|
||||
/opt/pipx/venvs/poetry
|
||||
/opt/pipx_bin/poetry
|
||||
# This step caches the poetry installation, so make sure it's keyed on the poetry version as well.
|
||||
key: bin-poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-${{ inputs.poetry-version }}
|
||||
|
||||
- name: Install poetry
|
||||
if: steps.cache-bin-poetry.outputs.cache-hit != 'true'
|
||||
shell: bash
|
||||
env:
|
||||
POETRY_VERSION: ${{ inputs.poetry-version }}
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
run: pipx install "poetry==$POETRY_VERSION" --python "python$PYTHON_VERSION" --verbose
|
||||
|
||||
- name: Check Poetry File
|
||||
shell: bash
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
poetry check
|
||||
|
||||
- name: Check lock file
|
||||
shell: bash
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
poetry lock --check
|
||||
|
||||
- uses: actions/cache@v3
|
||||
id: cache-poetry
|
||||
- name: Restore pip and poetry cached dependencies
|
||||
uses: actions/cache@v3
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "4"
|
||||
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pip
|
||||
~/.cache/pypoetry/virtualenvs
|
||||
~/.cache/pypoetry/cache
|
||||
~/.cache/pypoetry/artifacts
|
||||
${{ env.WORKDIR }}/.venv
|
||||
key: poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}
|
||||
|
||||
- run: ${{ inputs.install-command }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
shell: bash
|
||||
key: py-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles(format('{0}/**/poetry.lock', env.WORKDIR)) }}
|
||||
|
43
.github/workflows/_lint.yml
vendored
43
.github/workflows/_lint.yml
vendored
@ -80,31 +80,32 @@ jobs:
|
||||
find "$WORKDIR" -name '*.py' -type f -not -newermt "$OLDEST_COMMIT_TIME" -exec touch -c -m -t '200001010000' '{}' '+'
|
||||
|
||||
echo "oldest-commit=$OLDEST_COMMIT" >> "$GITHUB_OUTPUT"
|
||||
- uses: actions/cache@v3
|
||||
id: cache-pip
|
||||
name: Cache langchain editable pip install - ${{ matrix.python-version }}
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pip
|
||||
key: pip-editable-langchain-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ matrix.python-version }}
|
||||
- name: Install poetry
|
||||
run: |
|
||||
pipx install "poetry==$POETRY_VERSION"
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: poetry
|
||||
cache-dependency-path: |
|
||||
${{ env.WORKDIR }}/**/poetry.lock
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: lint
|
||||
|
||||
- name: Check Poetry File
|
||||
shell: bash
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
poetry check
|
||||
|
||||
- name: Check lock file
|
||||
shell: bash
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
poetry lock --check
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
poetry install
|
||||
|
||||
- name: Install langchain editable
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
if: ${{ inputs.working-directory != 'libs/langchain' }}
|
||||
@ -115,7 +116,7 @@ jobs:
|
||||
uses: actions/cache@v3
|
||||
env:
|
||||
CACHE_BASE: black-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1"
|
||||
with:
|
||||
path: |
|
||||
${{ env.WORKDIR }}/.black_cache
|
||||
@ -127,7 +128,7 @@ jobs:
|
||||
- name: Get .mypy_cache to speed up mypy
|
||||
uses: actions/cache@v3
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2"
|
||||
with:
|
||||
path: |
|
||||
${{ env.WORKDIR }}/.mypy_cache
|
||||
|
81
.github/workflows/_pydantic_compatibility.yml
vendored
Normal file
81
.github/workflows/_pydantic_compatibility.yml
vendored
Normal file
@ -0,0 +1,81 @@
|
||||
name: pydantic v1/v2 compatibility
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
working-directory:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.5.1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: Pydantic v1/v2 compatibility - Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: pydantic-cross-compat
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: poetry install
|
||||
|
||||
- name: Install the opposite major version of pydantic
|
||||
# If normal tests use pydantic v1, here we'll use v2, and vice versa.
|
||||
shell: bash
|
||||
run: |
|
||||
# Determine the major part of pydantic version
|
||||
REGULAR_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
|
||||
|
||||
if [[ "$REGULAR_VERSION" == "1" ]]; then
|
||||
PYDANTIC_DEP=">=2.1,<3"
|
||||
TEST_WITH_VERSION="2"
|
||||
elif [[ "$REGULAR_VERSION" == "2" ]]; then
|
||||
PYDANTIC_DEP="<2"
|
||||
TEST_WITH_VERSION="1"
|
||||
else
|
||||
echo "Unexpected pydantic major version '$REGULAR_VERSION', cannot determine which version to use for cross-compatibility test."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Install via `pip` instead of `poetry add` to avoid changing lockfile,
|
||||
# which would prevent caching from working: the cache would get saved
|
||||
# to a different key than where it gets loaded from.
|
||||
poetry run pip install "pydantic${PYDANTIC_DEP}"
|
||||
|
||||
# Ensure that the correct pydantic is installed now.
|
||||
echo "Checking pydantic version... Expecting ${TEST_WITH_VERSION}"
|
||||
|
||||
# Determine the major part of pydantic version
|
||||
CURRENT_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
|
||||
|
||||
# Check that the major part of pydantic version is as expected, if not
|
||||
# raise an error
|
||||
if [[ "$CURRENT_VERSION" != "$TEST_WITH_VERSION" ]]; then
|
||||
echo "Error: expected pydantic version ${CURRENT_VERSION} to have been installed, but found: ${TEST_WITH_VERSION}"
|
||||
exit 1
|
||||
fi
|
||||
echo "Found pydantic version ${CURRENT_VERSION}, as expected"
|
||||
- name: Run pydantic compatibility tests
|
||||
shell: bash
|
||||
run: make test
|
3
.github/workflows/_release.yml
vendored
3
.github/workflows/_release.yml
vendored
@ -23,6 +23,9 @@ jobs:
|
||||
# Trusted publishing has to also be configured on PyPI for each package:
|
||||
# https://docs.pypi.org/trusted-publishers/adding-a-publisher/
|
||||
id-token: write
|
||||
|
||||
# This permission is needed by `ncipollo/release-action` to create the GitHub release.
|
||||
contents: write
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
63
.github/workflows/_test.yml
vendored
63
.github/workflows/_test.yml
vendored
@ -7,10 +7,6 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
test_type:
|
||||
type: string
|
||||
description: "Test types to run"
|
||||
default: '["core", "extended", "core-pydantic-2"]'
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.5.1"
|
||||
@ -28,61 +24,22 @@ jobs:
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
test_type: ${{ fromJSON(inputs.test_type) }}
|
||||
name: Python ${{ matrix.python-version }} ${{ matrix.test_type }}
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
cache-key: ${{ matrix.test_type }}
|
||||
install-command: |
|
||||
if [ "${{ matrix.test_type }}" == "core" ]; then
|
||||
echo "Running core tests, installing dependencies with poetry..."
|
||||
poetry install
|
||||
elif [ "${{ matrix.test_type }}" == "core-pydantic-2" ]; then
|
||||
echo "Running core-pydantic-v2 tests, installing dependencies with poetry..."
|
||||
poetry install
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: core
|
||||
|
||||
# Install via `pip` instead of `poetry add` to avoid changing lockfile,
|
||||
# which would prevent caching from working: the cache would get saved
|
||||
# to a different key than where it gets loaded from.
|
||||
poetry run pip install 'pydantic>=2.1,<3'
|
||||
else
|
||||
echo "Running extended tests, installing dependencies with poetry..."
|
||||
poetry install -E extended_testing
|
||||
fi
|
||||
- name: Verify pydantic version
|
||||
run: |
|
||||
if [ "${{ matrix.test_type }}" == "core-pydantic-2" ]; then
|
||||
EXPECTED_VERSION=2
|
||||
else
|
||||
EXPECTED_VERSION=1
|
||||
fi
|
||||
echo "Checking pydantic version... Expecting ${EXPECTED_VERSION}"
|
||||
|
||||
# Determine the major part of pydantic version
|
||||
VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
|
||||
|
||||
# Check that the major part of pydantic version is as expected, if not
|
||||
# raise an error
|
||||
if [[ "$VERSION" -ne $EXPECTED_VERSION ]]; then
|
||||
echo "Error: pydantic version must be equal to ${EXPECTED_VERSION}; Found: ${VERSION}"
|
||||
exit 1
|
||||
fi
|
||||
echo "Found pydantic version ${VERSION}, as expected"
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
- name: Run ${{matrix.test_type}} tests
|
||||
run: |
|
||||
case "${{ matrix.test_type }}" in
|
||||
core | core-pydantic-2)
|
||||
make test
|
||||
;;
|
||||
*)
|
||||
make extended_tests
|
||||
;;
|
||||
esac
|
||||
run: poetry install
|
||||
|
||||
- name: Run core tests
|
||||
shell: bash
|
||||
run: make test
|
||||
|
59
.github/workflows/langchain_ci.yml
vendored
59
.github/workflows/langchain_ci.yml
vendored
@ -8,10 +8,25 @@ on:
|
||||
paths:
|
||||
- '.github/workflows/_lint.yml'
|
||||
- '.github/workflows/_test.yml'
|
||||
- '.github/workflows/_pydantic_compatibility.yml'
|
||||
- '.github/workflows/langchain_ci.yml'
|
||||
- 'libs/langchain/**'
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
|
||||
# If another push to the same PR or branch happens while this workflow is still running,
|
||||
# cancel the earlier run in favor of the next run.
|
||||
#
|
||||
# There's no point in testing an outdated version of the code. GitHub only allows
|
||||
# a limited number of job runners to be active at the same time, so it's better to cancel
|
||||
# pointless jobs early so that more useful jobs can run sooner.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.5.1"
|
||||
WORKDIR: "libs/langchain"
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
uses:
|
||||
@ -19,10 +34,50 @@ jobs:
|
||||
with:
|
||||
working-directory: libs/langchain
|
||||
secrets: inherit
|
||||
|
||||
test:
|
||||
uses:
|
||||
./.github/workflows/_test.yml
|
||||
with:
|
||||
working-directory: libs/langchain
|
||||
test_type: '["core", "extended", "core-pydantic-2"]'
|
||||
secrets: inherit
|
||||
secrets: inherit
|
||||
|
||||
pydantic-compatibility:
|
||||
uses:
|
||||
./.github/workflows/_pydantic_compatibility.yml
|
||||
with:
|
||||
working-directory: libs/langchain
|
||||
secrets: inherit
|
||||
|
||||
extended-tests:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ env.WORKDIR }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: Python ${{ matrix.python-version }} extended tests
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: libs/langchain
|
||||
cache-key: extended
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Running extended tests, installing dependencies with poetry..."
|
||||
poetry install -E extended_testing
|
||||
|
||||
- name: Run extended tests
|
||||
run: make extended_tests
|
||||
|
58
.github/workflows/langchain_experimental_ci.yml
vendored
58
.github/workflows/langchain_experimental_ci.yml
vendored
@ -13,6 +13,20 @@ on:
|
||||
- 'libs/experimental/**'
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
|
||||
# If another push to the same PR or branch happens while this workflow is still running,
|
||||
# cancel the earlier run in favor of the next run.
|
||||
#
|
||||
# There's no point in testing an outdated version of the code. GitHub only allows
|
||||
# a limited number of job runners to be active at the same time, so it's better to cancel
|
||||
# pointless jobs early so that more useful jobs can run sooner.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.5.1"
|
||||
WORKDIR: "libs/experimental"
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
uses:
|
||||
@ -20,10 +34,50 @@ jobs:
|
||||
with:
|
||||
working-directory: libs/experimental
|
||||
secrets: inherit
|
||||
|
||||
test:
|
||||
uses:
|
||||
./.github/workflows/_test.yml
|
||||
with:
|
||||
working-directory: libs/experimental
|
||||
test_type: '["core"]'
|
||||
secrets: inherit
|
||||
secrets: inherit
|
||||
|
||||
# It's possible that langchain-experimental works fine with the latest *published* langchain,
|
||||
# but is broken with the langchain on `master`.
|
||||
#
|
||||
# We want to catch situations like that *before* releasing a new langchain, hence this test.
|
||||
test-with-latest-langchain:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ env.WORKDIR }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: test with unpublished langchain - Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ env.WORKDIR }}
|
||||
cache-key: unpublished-langchain
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Running tests with unpublished langchain, installing dependencies with poetry..."
|
||||
poetry install
|
||||
|
||||
echo "Editably installing langchain outside of poetry, to avoid messing up lockfile..."
|
||||
poetry run pip install -e ../langchain
|
||||
|
||||
- name: Run tests
|
||||
run: make test
|
||||
|
15
.github/workflows/scheduled_test.yml
vendored
15
.github/workflows/scheduled_test.yml
vendored
@ -25,18 +25,25 @@ jobs:
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: libs/langchain
|
||||
install-command: |
|
||||
echo "Running scheduled tests, installing dependencies with poetry..."
|
||||
poetry install --with=test_integration
|
||||
cache-key: scheduled
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: libs/langchain
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Running scheduled tests, installing dependencies with poetry..."
|
||||
poetry install --with=test_integration
|
||||
|
||||
- name: Run tests
|
||||
shell: bash
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
make scheduled_tests
|
||||
shell: bash
|
||||
|
@ -156,7 +156,7 @@ html_context = {
|
||||
html_static_path = ["_static"]
|
||||
|
||||
# These paths are either relative to html_static_path
|
||||
# or fully qualified paths (eg. https://...)
|
||||
# or fully qualified paths (e.g. https://...)
|
||||
html_css_files = [
|
||||
"css/custom.css",
|
||||
]
|
||||
|
@ -228,7 +228,7 @@ Classes
|
||||
:toctree: {module}
|
||||
"""
|
||||
|
||||
for class_ in classes:
|
||||
for class_ in sorted(classes, key=lambda c: c["qualified_name"]):
|
||||
if not class_["is_public"]:
|
||||
continue
|
||||
|
||||
|
@ -341,7 +341,7 @@
|
||||
"HugeGraph QA Chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_hugegraph_qa",
|
||||
"GraphSparqlQAChain": "https://python.langchain.com/docs/use_cases/more/graph/graph_sparql_qa",
|
||||
"ArangoDB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_arangodb_qa",
|
||||
"Graph DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa",
|
||||
"Neo4j DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa",
|
||||
"How to use a SmartLLMChain": "https://python.langchain.com/docs/use_cases/more/self_check/smart_llm",
|
||||
"Multi-Agent Simulated Environment: Petting Zoo": "https://python.langchain.com/docs/use_cases/agent_simulations/petting_zoo",
|
||||
"Multi-agent decentralized speaker selection": "https://python.langchain.com/docs/use_cases/agent_simulations/multiagent_bidding",
|
||||
@ -2071,8 +2071,8 @@
|
||||
"PromptLayer": "https://python.langchain.com/docs/integrations/providers/promptlayer",
|
||||
"PromptLayer OpenAI": "https://python.langchain.com/docs/integrations/llms/promptlayer_openai"
|
||||
},
|
||||
"DeepLake": {
|
||||
"Deep Lake": "https://python.langchain.com/docs/integrations/providers/deeplake",
|
||||
"Activeloop DeepLake": {
|
||||
"Deep Lake": "https://python.langchain.com/docs/integrations/providers/activeloop_deeplake",
|
||||
"Activeloop's Deep Lake": "https://python.langchain.com/docs/integrations/vectorstores/activeloop_deeplake",
|
||||
"Analysis of Twitter the-algorithm source code with LangChain, GPT4 and Activeloop's Deep Lake": "https://python.langchain.com/docs/use_cases/question_answering/how_to/code/twitter-the-algorithm-analysis-deeplake",
|
||||
"Use LangChain, GPT and Activeloop's Deep Lake to work with code base": "https://python.langchain.com/docs/use_cases/question_answering/how_to/code/code-analysis-deeplake",
|
||||
@ -3202,10 +3202,10 @@
|
||||
"Graph QA": "https://python.langchain.com/docs/use_cases/more/graph/graph_qa"
|
||||
},
|
||||
"GraphCypherQAChain": {
|
||||
"Graph DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa"
|
||||
"Neo4j DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa"
|
||||
},
|
||||
"Neo4jGraph": {
|
||||
"Graph DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa"
|
||||
"Neo4j DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa"
|
||||
},
|
||||
"LLMBashChain": {
|
||||
"Bash chain": "https://python.langchain.com/docs/use_cases/more/code_writing/llm_bash"
|
||||
|
@ -47,8 +47,8 @@ If you’re working on something you’re proud of, and think the LangChain comm
|
||||
|
||||
Here’s where our team hangs out, talks shop, spotlights cool work, and shares what we’re up to. We’d love to see you there too.
|
||||
|
||||
- **[Twitter](https://twitter.com/LangChainAI):** we post about what we’re working on and what cool things we’re seeing in the space. If you tag @langchainai in your post, we’ll almost certainly see it, and can snow you some love!
|
||||
- **[Discord](https://discord.gg/6adMQxSpJS):** connect with with >30k developers who are building with LangChain
|
||||
- **[Twitter](https://twitter.com/LangChainAI):** we post about what we’re working on and what cool things we’re seeing in the space. If you tag @langchainai in your post, we’ll almost certainly see it, and can show you some love!
|
||||
- **[Discord](https://discord.gg/6adMQxSpJS):** connect with >30k developers who are building with LangChain
|
||||
- **[GitHub](https://github.com/langchain-ai/langchain):** open pull requests, contribute to a discussion, and/or contribute
|
||||
- **[Subscribe to our bi-weekly Release Notes](https://6w1pwbss0py.typeform.com/to/KjZB1auB):** a twice/month email roundup of the coolest things going on in our orbit
|
||||
- **Slack:** if you’re building an application in production at your company, we’d love to get into a Slack channel together. Fill out [this form](https://airtable.com/appwQzlErAS2qiP0L/shrGtGaVBVAz7NcV2) and we’ll get in touch about setting one up.
|
||||
|
@ -107,7 +107,7 @@ import PromptTemplateChatModel from "@snippets/get_started/quickstart/prompt_tem
|
||||
<PromptTemplateLLM/>
|
||||
|
||||
However, the advantages of using these over raw string formatting are several.
|
||||
You can "partial" out variables - eg you can format only some of the variables at a time.
|
||||
You can "partial" out variables - e.g. you can format only some of the variables at a time.
|
||||
You can compose them together, easily combining different templates into a single prompt.
|
||||
For explanations of these functionalities, see the [section on prompts](/docs/modules/model_io/prompts) for more detail.
|
||||
|
||||
@ -121,12 +121,12 @@ Let's take a look at this below:
|
||||
|
||||
ChatPromptTemplates can also include other things besides ChatMessageTemplates - see the [section on prompts](/docs/modules/model_io/prompts) for more detail.
|
||||
|
||||
## Output Parsers
|
||||
## Output parsers
|
||||
|
||||
OutputParsers convert the raw output of an LLM into a format that can be used downstream.
|
||||
There are few main type of OutputParsers, including:
|
||||
|
||||
- Convert text from LLM -> structured information (eg JSON)
|
||||
- Convert text from LLM -> structured information (e.g. JSON)
|
||||
- Convert a ChatMessage into just a string
|
||||
- Convert the extra information returned from a call besides the message (like OpenAI function invocation) into a string.
|
||||
|
||||
@ -149,7 +149,7 @@ import LLMChain from "@snippets/get_started/quickstart/llm_chain.mdx"
|
||||
|
||||
<LLMChain/>
|
||||
|
||||
## Next Steps
|
||||
## Next steps
|
||||
|
||||
This is it!
|
||||
We've now gone over how to create the core building block of LangChain applications - the LLMChains.
|
||||
|
@ -3,7 +3,7 @@ sidebar_position: 3
|
||||
---
|
||||
# Comparison Evaluators
|
||||
|
||||
Comparison evaluators in LangChain help measure two different chain or LLM outputs. These evaluators are helpful for comparative analyses, such as A/B testing between two language models, or comparing different versions of the same model. They can also be useful for things like generating preference scores for ai-assisted reinforcement learning.
|
||||
Comparison evaluators in LangChain help measure two different chains or LLM outputs. These evaluators are helpful for comparative analyses, such as A/B testing between two language models, or comparing different versions of the same model. They can also be useful for things like generating preference scores for ai-assisted reinforcement learning.
|
||||
|
||||
These evaluators inherit from the `PairwiseStringEvaluator` class, providing a comparison interface for two strings - typically, the outputs from two different prompts or models, or two versions of the same model. In essence, a comparison evaluator performs an evaluation on a pair of strings and returns a dictionary containing the evaluation score and other relevant details.
|
||||
|
||||
@ -16,7 +16,7 @@ Here's a summary of the key methods and properties of a comparison evaluator:
|
||||
- `requires_input`: This property indicates whether this evaluator requires an input string.
|
||||
- `requires_reference`: This property specifies whether this evaluator requires a reference label.
|
||||
|
||||
Detailed information about creating custom evaluators and the available built-in comparison evaluators are provided in the following sections.
|
||||
Detailed information about creating custom evaluators and the available built-in comparison evaluators is provided in the following sections.
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
|
1396
docs/docs_skeleton/docs/guides/safety/amazon_comprehend_chain.ipynb
Normal file
1396
docs/docs_skeleton/docs/guides/safety/amazon_comprehend_chain.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
@ -2,5 +2,5 @@
|
||||
|
||||
One of the key concerns with using LLMs is that they may generate harmful or unethical text. This is an area of active research in the field. Here we present some built-in chains inspired by this research, which are intended to make the outputs of LLMs safer.
|
||||
|
||||
- [Moderation chain](/docs/use_cases/safety/moderation): Explicitly check if any output text is harmful and flag it.
|
||||
- [Constitutional chain](/docs/use_cases/safety/constitutional_chain): Prompt the model with a set of principles which should guide it's behavior.
|
||||
- [Moderation chain](/docs/guides/safety/moderation): Explicitly check if any output text is harmful and flag it.
|
||||
- [Constitutional chain](/docs/guides/safety/constitutional_chain): Prompt the model with a set of principles which should guide it's behavior.
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Few-shot prompt templates
|
||||
|
||||
In this tutorial, we'll learn how to create a prompt template that uses few shot examples. A few shot prompt template can be constructed from either a set of examples, or from an Example Selector object.
|
||||
In this tutorial, we'll learn how to create a prompt template that uses few-shot examples. A few-shot prompt template can be constructed from either a set of examples, or from an Example Selector object.
|
||||
|
||||
import Example from "@snippets/modules/model_io/prompts/prompt_templates/few_shot_examples.mdx"
|
||||
|
||||
|
@ -6,7 +6,7 @@ sidebar_position: 0
|
||||
|
||||
Prompt templates are pre-defined recipes for generating prompts for language models.
|
||||
|
||||
A template may include instructions, few shot examples, and specific context and
|
||||
A template may include instructions, few-shot examples, and specific context and
|
||||
questions appropriate for a given task.
|
||||
|
||||
LangChain provides tooling to create and work with prompt templates.
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Partial prompt templates
|
||||
|
||||
Like other methods, it can make sense to "partial" a prompt template - eg pass in a subset of the required values, as to create a new prompt template which expects only the remaining subset of values.
|
||||
Like other methods, it can make sense to "partial" a prompt template - e.g. pass in a subset of the required values, as to create a new prompt template which expects only the remaining subset of values.
|
||||
|
||||
LangChain supports this in two ways:
|
||||
1. Partial formatting with string values.
|
||||
|
@ -2,8 +2,8 @@
|
||||
|
||||
This notebook goes over how to compose multiple prompts together. This can be useful when you want to reuse parts of prompts. This can be done with a PipelinePrompt. A PipelinePrompt consists of two main parts:
|
||||
|
||||
- Final prompt: This is the final prompt that is returned
|
||||
- Pipeline prompts: This is a list of tuples, consisting of a string name and a prompt template. Each prompt template will be formatted and then passed to future prompt templates as a variable with the same name.
|
||||
- Final prompt: The final prompt that is returned
|
||||
- Pipeline prompts: A list of tuples, consisting of a string name and a prompt template. Each prompt template will be formatted and then passed to future prompt templates as a variable with the same name.
|
||||
|
||||
import Example from "@snippets/modules/model_io/prompts/prompt_templates/prompt_composition.mdx"
|
||||
|
||||
|
@ -24,8 +24,7 @@ function Imports({ imports }) {
|
||||
<li key={imported}>
|
||||
<a href={docs}>
|
||||
<span>{imported}</span>
|
||||
</a>{" "}
|
||||
from <code>{source}</code>
|
||||
</a>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
|
BIN
docs/docs_skeleton/static/img/ReAct.png
Normal file
BIN
docs/docs_skeleton/static/img/ReAct.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 42 KiB |
BIN
docs/docs_skeleton/static/img/agents_use_case_1.png
Normal file
BIN
docs/docs_skeleton/static/img/agents_use_case_1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 236 KiB |
BIN
docs/docs_skeleton/static/img/agents_use_case_trace_1.png
Normal file
BIN
docs/docs_skeleton/static/img/agents_use_case_trace_1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 74 KiB |
BIN
docs/docs_skeleton/static/img/agents_use_case_trace_2.png
Normal file
BIN
docs/docs_skeleton/static/img/agents_use_case_trace_2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 166 KiB |
BIN
docs/docs_skeleton/static/img/agents_vs_chains.png
Normal file
BIN
docs/docs_skeleton/static/img/agents_vs_chains.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 42 KiB |
BIN
docs/docs_skeleton/static/img/oai_function_agent.png
Normal file
BIN
docs/docs_skeleton/static/img/oai_function_agent.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 177 KiB |
@ -1,5 +1,9 @@
|
||||
{
|
||||
"redirects": [
|
||||
{
|
||||
"source": "/docs/modules/data_connection/caching_embeddings(/?)",
|
||||
"destination": "/docs/modules/data_connection/text_embedding/caching_embeddings"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/additional_resources/youtube.html",
|
||||
"destination": "/docs/additional_resources/youtube"
|
||||
@ -166,7 +170,7 @@
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/deeplake",
|
||||
"destination": "/docs/integrations/providers/deeplake"
|
||||
"destination": "/docs/integrations/providers/activeloop_deeplake"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/diffbot",
|
||||
|
@ -51,6 +51,7 @@ Dependents stats for `langchain-ai/langchain`
|
||||
|[e2b-dev/e2b](https://github.com/e2b-dev/e2b) | 5365 |
|
||||
|[mage-ai/mage-ai](https://github.com/mage-ai/mage-ai) | 5352 |
|
||||
|[wenda-LLM/wenda](https://github.com/wenda-LLM/wenda) | 5192 |
|
||||
|[LangChain-Chinese-Getting-Started-Guide](https://github.com/liaokongVFX/LangChain-Chinese-Getting-Started-Guide) | 5129 |
|
||||
|[zilliztech/GPTCache](https://github.com/zilliztech/GPTCache) | 4993 |
|
||||
|[GreyDGL/PentestGPT](https://github.com/GreyDGL/PentestGPT) | 4831 |
|
||||
|[zauberzeug/nicegui](https://github.com/zauberzeug/nicegui) | 4824 |
|
||||
|
@ -8,7 +8,7 @@ Here's a few different tools and functionalities to aid in debugging.
|
||||
|
||||
## Tracing
|
||||
|
||||
Platforms with tracing capabilities like [LangSmith](/docs/guides/langsmith/) and [WandB](/docs/ecosystem/integrations/agent_with_wandb_tracing) are the most comprehensive solutions for debugging. These platforms make it easy to not only log and visualize LLM apps, but also to actively debug, test and refine them.
|
||||
Platforms with tracing capabilities like [LangSmith](/docs/guides/langsmith/) and [WandB](/docs/integrations/providers/wandb_tracing) are the most comprehensive solutions for debugging. These platforms make it easy to not only log and visualize LLM apps, but also to actively debug, test and refine them.
|
||||
|
||||
For anyone building production-grade LLM applications, we highly recommend using a platform like this.
|
||||
|
||||
|
@ -79,3 +79,7 @@ See OpenLLM's [integration doc](https://github.com/bentoml/OpenLLM#%EF%B8%8F-int
|
||||
## [Databutton](https://databutton.com/home?new-data-app=true)
|
||||
|
||||
These templates serve as examples of how to build, deploy, and share LangChain applications using Databutton. You can create user interfaces with Streamlit, automate tasks by scheduling Python code, and store files and data in the built-in store. Examples include a Chatbot interface with conversational memory, a Personal search engine, and a starter template for LangChain apps. Deploying and sharing is just one click away.
|
||||
|
||||
## [AzureML Online Endpoint](https://github.com/Azure/azureml-examples/blob/main/sdk/python/endpoints/online/llm/langchain/1_langchain_basic_deploy.ipynb)
|
||||
|
||||
A minimal example of how to deploy LangChain to an Azure Machine Learning Online Endpoint.
|
@ -1318,7 +1318,7 @@
|
||||
"source": [
|
||||
"template = \"\"\"Write some python code to solve the user's problem. \n",
|
||||
"\n",
|
||||
"Return only python code in Markdown format, eg:\n",
|
||||
"Return only python code in Markdown format, e.g.:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"....\n",
|
||||
|
@ -11,7 +11,7 @@
|
||||
"\n",
|
||||
"[PromptLayer](https://promptlayer.com) is a an LLM observability platform that lets you visualize requests, version prompts, and track usage. In this guide we will go over how to setup the `PromptLayerCallbackHandler`. \n",
|
||||
"\n",
|
||||
"While PromptLayer does have LLMs that integrate directly with LangChain (eg [`PromptLayerOpenAI`](https://python.langchain.com/docs/integrations/llms/promptlayer_openai)), this callback is the recommended way to integrate PromptLayer with LangChain.\n",
|
||||
"While PromptLayer does have LLMs that integrate directly with LangChain (e.g. [`PromptLayerOpenAI`](https://python.langchain.com/docs/integrations/llms/promptlayer_openai)), this callback is the recommended way to integrate PromptLayer with LangChain.\n",
|
||||
"\n",
|
||||
"See [our docs](https://docs.promptlayer.com/languages/langchain) for more information."
|
||||
]
|
||||
|
382
docs/extras/integrations/chat/ollama.ipynb
Normal file
382
docs/extras/integrations/chat/ollama.ipynb
Normal file
@ -0,0 +1,382 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Ollama\n",
|
||||
"\n",
|
||||
"[Ollama](https://ollama.ai/) allows you to run open-source large language models, such as LLaMA2, locally.\n",
|
||||
"\n",
|
||||
"Ollama bundles model weights, configuration, and data into a single package, defined by a Modelfile. \n",
|
||||
"\n",
|
||||
"It optimizes setup and configuration details, including GPU usage.\n",
|
||||
"\n",
|
||||
"For a complete list of supported models and model variants, see the [Ollama model library](https://ollama.ai/library).\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"First, follow [these instructions](https://github.com/jmorganca/ollama) to set up and run a local Ollama instance:\n",
|
||||
"\n",
|
||||
"* [Download](https://ollama.ai/download)\n",
|
||||
"* Fetch a model via `ollama pull <model family>`\n",
|
||||
"* e.g., for `Llama-7b`: `ollama pull llama2`\n",
|
||||
"* This will download the most basic version of the model (e.g., minimum # parameters and 4-bit quantization)\n",
|
||||
"* On Mac, it will download to:\n",
|
||||
"\n",
|
||||
"`~/.ollama/models/manifests/registry.ollama.ai/library/<model family>/latest`\n",
|
||||
"\n",
|
||||
"* And we can specify a particular version, e.g., for `ollama pull vicuna:13b-v1.5-16k-q4_0`\n",
|
||||
"* The file is here with the model version in place of `latest`\n",
|
||||
"\n",
|
||||
"`~/.ollama/models/manifests/registry.ollama.ai/library/vicuna/13b-v1.5-16k-q4_0`\n",
|
||||
"\n",
|
||||
"You can easily access models in a few ways:\n",
|
||||
"\n",
|
||||
"1/ if the app is running:\n",
|
||||
"* All of your local models are automatically served on `localhost:11434`\n",
|
||||
"* Select your model when setting `llm = Ollama(..., model=\"<model family>:<version>\")`\n",
|
||||
"* If you set `llm = Ollama(..., model=\"<model family\")` withoout a version it will simply look for `latest`\n",
|
||||
"\n",
|
||||
"2/ if building from source or just running the binary: \n",
|
||||
"* Then you must run `ollama serve`\n",
|
||||
"* All of your local models are automatically served on `localhost:11434`\n",
|
||||
"* Then, select as shown above\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Usage\n",
|
||||
"\n",
|
||||
"You can see a full list of supported parameters on the [API reference page](https://api.python.langchain.com/en/latest/llms/langchain.llms.ollama.Ollama.html).\n",
|
||||
"\n",
|
||||
"If you are using a LLaMA `chat` model (e.g., `ollama pull llama2:7b-chat`) then you can use the `ChatOllama` interface.\n",
|
||||
"\n",
|
||||
"This includes [special tokens](https://huggingface.co/blog/llama2#how-to-prompt-llama-2) for system message and user input."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOllama\n",
|
||||
"from langchain.callbacks.manager import CallbackManager\n",
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler \n",
|
||||
"chat_model = ChatOllama(model=\"llama2:7b-chat\", \n",
|
||||
" callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"With `StreamingStdOutCallbackHandler`, you will see tokens streamed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Artificial intelligence (AI) has a rich and varied history that spans several decades. Hinweis: The following is a brief overview of the major milestones in the history of AI, but it is by no means exhaustive.\n",
|
||||
"\n",
|
||||
"1. Early Beginnings (1950s-1960s): The term \"Artificial Intelligence\" was coined in 1956 by computer scientist John McCarthy. However, the concept of creating machines that can think and learn like humans dates back to ancient times. In the 1950s and 1960s, researchers began exploring the possibilities of AI using simple algorithms and machine learning techniques.\n",
|
||||
"2. Rule-Based Systems (1970s-1980s): In the 1970s and 1980s, AI research focused on developing rule-based systems, which use predefined rules to reason and make decisions. This led to the development of expert systems, which were designed to mimic the decision-making abilities of human experts in specific domains.\n",
|
||||
"3. Machine Learning (1980s-1990s): The 1980s saw a shift towards machine learning, which enables machines to learn from data without being explicitly programmed. This led to the development of algorithms such as decision trees, neural networks, and support vector machines.\n",
|
||||
"4. Deep Learning (2000s-present): In the early 2000s, deep learning emerged as a subfield of machine learning, focusing on neural networks with multiple layers. These networks can learn complex representations of data, leading to breakthroughs in image and speech recognition, natural language processing, and other areas.\n",
|
||||
"5. Natural Language Processing (NLP) (1980s-present): NLP has been an active area of research since the 1980s, with a focus on developing algorithms that can understand and generate human language. This has led to applications such as chatbots, voice assistants, and language translation systems.\n",
|
||||
"6. Robotics (1970s-present): The development of robotics has been closely tied to AI research, with a focus on creating machines that can perform tasks that typically require human intelligence, such as manipulation and locomotion.\n",
|
||||
"7. Computer Vision (1980s-present): Computer vision has been an active area of research since the 1980s, with a focus on enabling machines to interpret and understand visual data from the world around us. This has led to applications such as image recognition, object detection, and autonomous driving.\n",
|
||||
"8. Ethics and Society (1990s-present): As AI technology has become more advanced and integrated into various aspects of society, there has been a growing concern about the ethical implications of AI. This includes issues related to privacy, bias, and job displacement.\n",
|
||||
"9. Reinforcement Learning (2000s-present): Reinforcement learning is a subfield of machine learning that involves training machines to make decisions based on feedback from their environment. This has led to breakthroughs in areas such as game playing, robotics, and autonomous driving.\n",
|
||||
"10. Generative Models (2010s-present): Generative models are a class of AI algorithms that can generate new data that is similar to a given dataset. This has led to applications such as image synthesis, music generation, and language creation.\n",
|
||||
"\n",
|
||||
"These are just a few of the many developments in the history of AI. As the field continues to evolve, we can expect even more exciting breakthroughs and innovations in the years to come."
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=' Artificial intelligence (AI) has a rich and varied history that spans several decades. Hinweis: The following is a brief overview of the major milestones in the history of AI, but it is by no means exhaustive.\\n\\n1. Early Beginnings (1950s-1960s): The term \"Artificial Intelligence\" was coined in 1956 by computer scientist John McCarthy. However, the concept of creating machines that can think and learn like humans dates back to ancient times. In the 1950s and 1960s, researchers began exploring the possibilities of AI using simple algorithms and machine learning techniques.\\n2. Rule-Based Systems (1970s-1980s): In the 1970s and 1980s, AI research focused on developing rule-based systems, which use predefined rules to reason and make decisions. This led to the development of expert systems, which were designed to mimic the decision-making abilities of human experts in specific domains.\\n3. Machine Learning (1980s-1990s): The 1980s saw a shift towards machine learning, which enables machines to learn from data without being explicitly programmed. This led to the development of algorithms such as decision trees, neural networks, and support vector machines.\\n4. Deep Learning (2000s-present): In the early 2000s, deep learning emerged as a subfield of machine learning, focusing on neural networks with multiple layers. These networks can learn complex representations of data, leading to breakthroughs in image and speech recognition, natural language processing, and other areas.\\n5. Natural Language Processing (NLP) (1980s-present): NLP has been an active area of research since the 1980s, with a focus on developing algorithms that can understand and generate human language. This has led to applications such as chatbots, voice assistants, and language translation systems.\\n6. Robotics (1970s-present): The development of robotics has been closely tied to AI research, with a focus on creating machines that can perform tasks that typically require human intelligence, such as manipulation and locomotion.\\n7. Computer Vision (1980s-present): Computer vision has been an active area of research since the 1980s, with a focus on enabling machines to interpret and understand visual data from the world around us. This has led to applications such as image recognition, object detection, and autonomous driving.\\n8. Ethics and Society (1990s-present): As AI technology has become more advanced and integrated into various aspects of society, there has been a growing concern about the ethical implications of AI. This includes issues related to privacy, bias, and job displacement.\\n9. Reinforcement Learning (2000s-present): Reinforcement learning is a subfield of machine learning that involves training machines to make decisions based on feedback from their environment. This has led to breakthroughs in areas such as game playing, robotics, and autonomous driving.\\n10. Generative Models (2010s-present): Generative models are a class of AI algorithms that can generate new data that is similar to a given dataset. This has led to applications such as image synthesis, music generation, and language creation.\\n\\nThese are just a few of the many developments in the history of AI. As the field continues to evolve, we can expect even more exciting breakthroughs and innovations in the years to come.', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.schema import HumanMessage\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(content=\"Tell me about the history of AI\")\n",
|
||||
"]\n",
|
||||
"chat_model(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## RAG\n",
|
||||
"\n",
|
||||
"We can use Olama with RAG, [just as shown here](https://python.langchain.com/docs/use_cases/question_answering/how_to/local_retrieval_qa).\n",
|
||||
"\n",
|
||||
"Let's use the 13b model:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"ollama pull llama2:13b\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Or, the 13b-chat model:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"ollama pull llama2:13b-chat\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Let's also use local embeddings from `GPT4AllEmbeddings` and `Chroma`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install gpt4all chromadb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import WebBaseLoader\n",
|
||||
"loader = WebBaseLoader(\"https://lilianweng.github.io/posts/2023-06-23-agent/\")\n",
|
||||
"data = loader.load()\n",
|
||||
"\n",
|
||||
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
"all_splits = text_splitter.split_documents(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found model file at /Users/rlm/.cache/gpt4all/ggml-all-MiniLM-L6-v2-f16.bin\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.vectorstores import Chroma\n",
|
||||
"from langchain.embeddings import GPT4AllEmbeddings\n",
|
||||
"\n",
|
||||
"vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"4"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"question = \"What are the approaches to Task Decomposition?\"\n",
|
||||
"docs = vectorstore.similarity_search(question)\n",
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate\n",
|
||||
"\n",
|
||||
"# Prompt\n",
|
||||
"template = \"\"\"[INST] <<SYS>> Use the following pieces of context to answer the question at the end. \n",
|
||||
"If you don't know the answer, just say that you don't know, don't try to make up an answer. \n",
|
||||
"Use three sentences maximum and keep the answer as concise as possible. <</SYS>>\n",
|
||||
"{context}\n",
|
||||
"Question: {question}\n",
|
||||
"Helpful Answer:[/INST]\"\"\"\n",
|
||||
"QA_CHAIN_PROMPT = PromptTemplate(\n",
|
||||
" input_variables=[\"context\", \"question\"],\n",
|
||||
" template=template,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Chat model\n",
|
||||
"from langchain.chat_models import ChatOllama\n",
|
||||
"from langchain.callbacks.manager import CallbackManager\n",
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
|
||||
"chat_model = ChatOllama(model=\"llama2:13b-chat\",\n",
|
||||
" verbose=True,\n",
|
||||
" callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# QA chain\n",
|
||||
"from langchain.chains import RetrievalQA\n",
|
||||
"qa_chain = RetrievalQA.from_chain_type(\n",
|
||||
" chat_model,\n",
|
||||
" retriever=vectorstore.as_retriever(),\n",
|
||||
" chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Based on the provided context, there are three approaches to task decomposition for AI agents:\n",
|
||||
"\n",
|
||||
"1. LLM with simple prompting, such as \"Steps for XYZ.\" or \"What are the subgoals for achieving XYZ?\"\n",
|
||||
"2. Task-specific instructions, such as \"Write a story outline\" for writing a novel.\n",
|
||||
"3. Human inputs."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"question = \"What are the various approaches to Task Decomposition for AI Agents?\"\n",
|
||||
"result = qa_chain({\"query\": question})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also get logging for tokens."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Based on the given context, here is the answer to the question \"What are the approaches to Task Decomposition?\"\n",
|
||||
"\n",
|
||||
"There are three approaches to task decomposition:\n",
|
||||
"\n",
|
||||
"1. LLM with simple prompting, such as \"Steps for XYZ.\" or \"What are the subgoals for achieving XYZ?\"\n",
|
||||
"2. Using task-specific instructions, like \"Write a story outline\" for writing a novel.\n",
|
||||
"3. With human inputs.{'model': 'llama2:13b-chat', 'created_at': '2023-08-23T15:37:51.469127Z', 'done': True, 'context': [1, 29871, 1, 29961, 25580, 29962, 518, 25580, 29962, 518, 25580, 29962, 3532, 14816, 29903, 6778, 4803, 278, 1494, 12785, 310, 3030, 304, 1234, 278, 1139, 472, 278, 1095, 29889, 29871, 13, 3644, 366, 1016, 29915, 29873, 1073, 278, 1234, 29892, 925, 1827, 393, 366, 1016, 29915, 29873, 1073, 29892, 1016, 29915, 29873, 1018, 304, 1207, 701, 385, 1234, 29889, 29871, 13, 11403, 2211, 25260, 7472, 322, 3013, 278, 1234, 408, 3022, 895, 408, 1950, 29889, 529, 829, 14816, 29903, 6778, 13, 5398, 26227, 508, 367, 2309, 313, 29896, 29897, 491, 365, 26369, 411, 2560, 9508, 292, 763, 376, 7789, 567, 363, 1060, 29979, 29999, 7790, 29876, 29896, 19602, 376, 5618, 526, 278, 1014, 1484, 1338, 363, 3657, 15387, 1060, 29979, 29999, 29973, 613, 313, 29906, 29897, 491, 773, 3414, 29899, 14940, 11994, 29936, 321, 29889, 29887, 29889, 376, 6113, 263, 5828, 27887, 1213, 363, 5007, 263, 9554, 29892, 470, 313, 29941, 29897, 411, 5199, 10970, 29889, 13, 13, 5398, 26227, 508, 367, 2309, 313, 29896, 29897, 491, 365, 26369, 411, 2560, 9508, 292, 763, 376, 7789, 567, 363, 1060, 29979, 29999, 7790, 29876, 29896, 19602, 376, 5618, 526, 278, 1014, 1484, 1338, 363, 3657, 15387, 1060, 29979, 29999, 29973, 613, 313, 29906, 29897, 491, 773, 3414, 29899, 14940, 11994, 29936, 321, 29889, 29887, 29889, 376, 6113, 263, 5828, 27887, 1213, 363, 5007, 263, 9554, 29892, 470, 313, 29941, 29897, 411, 5199, 10970, 29889, 13, 13, 1451, 16047, 267, 297, 1472, 29899, 8489, 18987, 322, 3414, 26227, 29901, 1858, 9450, 975, 263, 3309, 29891, 4955, 322, 17583, 3902, 8253, 278, 1650, 2913, 3933, 18066, 292, 29889, 365, 26369, 29879, 21117, 304, 10365, 13900, 746, 20050, 411, 15668, 4436, 29892, 3907, 963, 3109, 16424, 9401, 304, 25618, 1058, 5110, 515, 14260, 322, 1059, 29889, 13, 13, 1451, 16047, 267, 297, 1472, 29899, 8489, 18987, 322, 3414, 26227, 29901, 1858, 9450, 975, 263, 3309, 29891, 4955, 322, 17583, 3902, 8253, 278, 1650, 2913, 3933, 18066, 292, 29889, 365, 26369, 29879, 21117, 304, 10365, 13900, 746, 20050, 411, 15668, 4436, 29892, 3907, 963, 3109, 16424, 9401, 304, 25618, 1058, 5110, 515, 14260, 322, 1059, 29889, 13, 16492, 29901, 1724, 526, 278, 13501, 304, 9330, 897, 510, 3283, 29973, 13, 29648, 1319, 673, 10834, 29914, 25580, 29962, 518, 29914, 25580, 29962, 518, 29914, 25580, 29962, 29871, 16564, 373, 278, 2183, 3030, 29892, 1244, 338, 278, 1234, 304, 278, 1139, 376, 5618, 526, 278, 13501, 304, 9330, 897, 510, 3283, 3026, 13, 13, 8439, 526, 2211, 13501, 304, 3414, 26227, 29901, 13, 13, 29896, 29889, 365, 26369, 411, 2560, 9508, 292, 29892, 1316, 408, 376, 7789, 567, 363, 1060, 29979, 29999, 1213, 470, 376, 5618, 526, 278, 1014, 1484, 1338, 363, 3657, 15387, 1060, 29979, 29999, 3026, 13, 29906, 29889, 5293, 3414, 29899, 14940, 11994, 29892, 763, 376, 6113, 263, 5828, 27887, 29908, 363, 5007, 263, 9554, 29889, 13, 29941, 29889, 2973, 5199, 10970, 29889, 2], 'total_duration': 9514823750, 'load_duration': 795542, 'sample_count': 99, 'sample_duration': 68732000, 'prompt_eval_count': 146, 'prompt_eval_duration': 6206275000, 'eval_count': 98, 'eval_duration': 3229641000}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.schema import LLMResult\n",
|
||||
"from langchain.callbacks.base import BaseCallbackHandler\n",
|
||||
"\n",
|
||||
"class GenerationStatisticsCallback(BaseCallbackHandler):\n",
|
||||
" def on_llm_end(self, response: LLMResult, **kwargs) -> None:\n",
|
||||
" print(response.generations[0][0].generation_info)\n",
|
||||
" \n",
|
||||
"callback_manager = CallbackManager([StreamingStdOutCallbackHandler(), GenerationStatisticsCallback()])\n",
|
||||
"\n",
|
||||
"chat_model = ChatOllama(model=\"llama2:13b-chat\",\n",
|
||||
" verbose=True,\n",
|
||||
" callback_manager=callback_manager)\n",
|
||||
"\n",
|
||||
"qa_chain = RetrievalQA.from_chain_type(\n",
|
||||
" chat_model,\n",
|
||||
" retriever=vectorstore.as_retriever(),\n",
|
||||
" chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"question = \"What are the approaches to Task Decomposition?\"\n",
|
||||
"result = qa_chain({\"query\": question})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`eval_count` / (`eval_duration`/10e9) gets `tok / s`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"30.343929867127645"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"98 / (3229641000/1000/1000/1000)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -143,12 +143,39 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c095285d",
|
||||
"cell_type": "markdown",
|
||||
"id": "57e27714",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"## Fine-tuning\n",
|
||||
"\n",
|
||||
"You can call fine-tuned OpenAI models by passing in your corresponding `modelName` parameter.\n",
|
||||
"\n",
|
||||
"This generally takes the form of `ft:{OPENAI_MODEL_NAME}:{ORG_NAME}::{MODEL_ID}`. For example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "33c4a8b0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore la programmation.\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"fine_tuned_model = ChatOpenAI(temperature=0, model_name=\"ft:gpt-3.5-turbo-0613:langchain::7qTVM5AR\")\n",
|
||||
"\n",
|
||||
"fine_tuned_model(messages)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -167,7 +194,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
325
docs/extras/integrations/chat_loaders/discord.ipynb
Normal file
325
docs/extras/integrations/chat_loaders/discord.ipynb
Normal file
@ -0,0 +1,325 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c4ff9336-1cf3-459e-bd70-d1314c1da6a0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Discord\n",
|
||||
"\n",
|
||||
"This notebook shows how to create your own chat loader that works on copy-pasted messages (from dms) to a list of LangChain messages.\n",
|
||||
"\n",
|
||||
"The process has four steps:\n",
|
||||
"1. Create the chat .txt file by copying chats from the Discord app and pasting them in a file on your local computer\n",
|
||||
"2. Copy the chat loader definition from below to a local file.\n",
|
||||
"3. Initialize the `DiscordChatLoader` with the file path pointed to the text file.\n",
|
||||
"4. Call `loader.load()` (or `loader.lazy_load()`) to perform the conversion.\n",
|
||||
"\n",
|
||||
"## 1. Creat message dump\n",
|
||||
"\n",
|
||||
"Currently (2023/08/23) this loader only supports .txt files in the format generated by copying messages in the app to your clipboard and pasting in a file. Below is an example."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e4ccfdfa-6869-4d67-90a0-ab99f01b7553",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Overwriting discord_chats.txt\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%writefile discord_chats.txt\n",
|
||||
"talkingtower — 08/15/2023 11:10 AM\n",
|
||||
"Love music! Do you like jazz?\n",
|
||||
"reporterbob — 08/15/2023 9:27 PM\n",
|
||||
"Yes! Jazz is fantastic. Ever heard this one?\n",
|
||||
"Website\n",
|
||||
"Listen to classic jazz track...\n",
|
||||
"\n",
|
||||
"talkingtower — Yesterday at 5:03 AM\n",
|
||||
"Indeed! Great choice. 🎷\n",
|
||||
"reporterbob — Yesterday at 5:23 AM\n",
|
||||
"Thanks! How about some virtual sightseeing?\n",
|
||||
"Website\n",
|
||||
"Virtual tour of famous landmarks...\n",
|
||||
"\n",
|
||||
"talkingtower — Today at 2:38 PM\n",
|
||||
"Sounds fun! Let's explore.\n",
|
||||
"reporterbob — Today at 2:56 PM\n",
|
||||
"Enjoy the tour! See you around.\n",
|
||||
"talkingtower — Today at 3:00 PM\n",
|
||||
"Thank you! Goodbye! 👋\n",
|
||||
"reporterbob — Today at 3:02 PM\n",
|
||||
"Farewell! Happy exploring."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "359565a7-dad3-403c-a73c-6414b1295127",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Define chat loader\n",
|
||||
"\n",
|
||||
"LangChain currently does not support "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a429e0c4-4d7d-45f8-bbbb-c7fc5229f6af",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"import re\n",
|
||||
"from typing import Iterator, List\n",
|
||||
"\n",
|
||||
"from langchain import schema\n",
|
||||
"from langchain.chat_loaders import base as chat_loaders\n",
|
||||
"\n",
|
||||
"logger = logging.getLogger()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class DiscordChatLoader(chat_loaders.BaseChatLoader):\n",
|
||||
" \n",
|
||||
" def __init__(self, path: str):\n",
|
||||
" \"\"\"\n",
|
||||
" Initialize the Discord chat loader.\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" path: Path to the exported Discord chat text file.\n",
|
||||
" \"\"\"\n",
|
||||
" self.path = path\n",
|
||||
" self._message_line_regex = re.compile(\n",
|
||||
" r\"(.+?) — (\\w{3,9} \\d{1,2}(?:st|nd|rd|th)?(?:, \\d{4})? \\d{1,2}:\\d{2} (?:AM|PM)|Today at \\d{1,2}:\\d{2} (?:AM|PM)|Yesterday at \\d{1,2}:\\d{2} (?:AM|PM))\", # noqa\n",
|
||||
" flags=re.DOTALL,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" def _load_single_chat_session_from_txt(\n",
|
||||
" self, file_path: str\n",
|
||||
" ) -> chat_loaders.ChatSession:\n",
|
||||
" \"\"\"\n",
|
||||
" Load a single chat session from a text file.\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" file_path: Path to the text file containing the chat messages.\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" A `ChatSession` object containing the loaded chat messages.\n",
|
||||
" \"\"\"\n",
|
||||
" with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
|
||||
" lines = file.readlines()\n",
|
||||
"\n",
|
||||
" results: List[schema.BaseMessage] = []\n",
|
||||
" current_sender = None\n",
|
||||
" current_timestamp = None\n",
|
||||
" current_content = []\n",
|
||||
" for line in lines:\n",
|
||||
" if re.match(\n",
|
||||
" r\".+? — (\\d{2}/\\d{2}/\\d{4} \\d{1,2}:\\d{2} (?:AM|PM)|Today at \\d{1,2}:\\d{2} (?:AM|PM)|Yesterday at \\d{1,2}:\\d{2} (?:AM|PM))\", # noqa\n",
|
||||
" line,\n",
|
||||
" ):\n",
|
||||
" if current_sender and current_content:\n",
|
||||
" results.append(\n",
|
||||
" schema.HumanMessage(\n",
|
||||
" content=\"\".join(current_content).strip(),\n",
|
||||
" additional_kwargs={\n",
|
||||
" \"sender\": current_sender,\n",
|
||||
" \"events\": [{\"message_time\": current_timestamp}],\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" current_sender, current_timestamp = line.split(\" — \")[:2]\n",
|
||||
" current_content = [\n",
|
||||
" line[len(current_sender) + len(current_timestamp) + 4 :].strip()\n",
|
||||
" ]\n",
|
||||
" elif re.match(r\"\\[\\d{1,2}:\\d{2} (?:AM|PM)\\]\", line.strip()):\n",
|
||||
" results.append(\n",
|
||||
" schema.HumanMessage(\n",
|
||||
" content=\"\".join(current_content).strip(),\n",
|
||||
" additional_kwargs={\n",
|
||||
" \"sender\": current_sender,\n",
|
||||
" \"events\": [{\"message_time\": current_timestamp}],\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" current_timestamp = line.strip()[1:-1]\n",
|
||||
" current_content = []\n",
|
||||
" else:\n",
|
||||
" current_content.append(\"\\n\" + line.strip())\n",
|
||||
"\n",
|
||||
" if current_sender and current_content:\n",
|
||||
" results.append(\n",
|
||||
" schema.HumanMessage(\n",
|
||||
" content=\"\".join(current_content).strip(),\n",
|
||||
" additional_kwargs={\n",
|
||||
" \"sender\": current_sender,\n",
|
||||
" \"events\": [{\"message_time\": current_timestamp}],\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" return chat_loaders.ChatSession(messages=results)\n",
|
||||
"\n",
|
||||
" def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:\n",
|
||||
" \"\"\"\n",
|
||||
" Lazy load the messages from the chat file and yield them in the required format.\n",
|
||||
"\n",
|
||||
" Yields:\n",
|
||||
" A `ChatSession` object containing the loaded chat messages.\n",
|
||||
" \"\"\"\n",
|
||||
" yield self._load_single_chat_session_from_txt(self.path)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c8240393-48be-44d2-b0d6-52c215cd8ac2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Create loader\n",
|
||||
"\n",
|
||||
"We will point to the file we just wrote to disk."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "1268de40-b0e5-445d-9cd8-54856cd0293a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = DiscordChatLoader(\n",
|
||||
" path=\"./discord_chats.txt\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4928df4b-ae31-48a7-bd76-be3ecee1f3e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Load Messages\n",
|
||||
"\n",
|
||||
"Assuming the format is correct, the loader will convert the chats to langchain messages."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "c8a0836d-4a22-4790-bfe9-97f2145bb0d6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"from langchain.chat_loaders.base import ChatSession\n",
|
||||
"from langchain.chat_loaders.utils import (\n",
|
||||
" map_ai_messages,\n",
|
||||
" merge_chat_runs,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"raw_messages = loader.lazy_load()\n",
|
||||
"# Merge consecutive messages from the same sender into a single message\n",
|
||||
"merged_messages = merge_chat_runs(raw_messages)\n",
|
||||
"# Convert messages from \"talkingtower\" to AI messages\n",
|
||||
"messages: List[ChatSession] = list(map_ai_messages(merged_messages, sender=\"talkingtower\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "1913963b-c44e-4f7a-aba7-0423c9b8bd59",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'messages': [AIMessage(content='Love music! Do you like jazz?', additional_kwargs={'sender': 'talkingtower', 'events': [{'message_time': '08/15/2023 11:10 AM\\n'}]}, example=False),\n",
|
||||
" HumanMessage(content='Yes! Jazz is fantastic. Ever heard this one?\\nWebsite\\nListen to classic jazz track...', additional_kwargs={'sender': 'reporterbob', 'events': [{'message_time': '08/15/2023 9:27 PM\\n'}]}, example=False),\n",
|
||||
" AIMessage(content='Indeed! Great choice. 🎷', additional_kwargs={'sender': 'talkingtower', 'events': [{'message_time': 'Yesterday at 5:03 AM\\n'}]}, example=False),\n",
|
||||
" HumanMessage(content='Thanks! How about some virtual sightseeing?\\nWebsite\\nVirtual tour of famous landmarks...', additional_kwargs={'sender': 'reporterbob', 'events': [{'message_time': 'Yesterday at 5:23 AM\\n'}]}, example=False),\n",
|
||||
" AIMessage(content=\"Sounds fun! Let's explore.\", additional_kwargs={'sender': 'talkingtower', 'events': [{'message_time': 'Today at 2:38 PM\\n'}]}, example=False),\n",
|
||||
" HumanMessage(content='Enjoy the tour! See you around.', additional_kwargs={'sender': 'reporterbob', 'events': [{'message_time': 'Today at 2:56 PM\\n'}]}, example=False),\n",
|
||||
" AIMessage(content='Thank you! Goodbye! 👋', additional_kwargs={'sender': 'talkingtower', 'events': [{'message_time': 'Today at 3:00 PM\\n'}]}, example=False),\n",
|
||||
" HumanMessage(content='Farewell! Happy exploring.', additional_kwargs={'sender': 'reporterbob', 'events': [{'message_time': 'Today at 3:02 PM\\n'}]}, example=False)]}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8595a518-5c89-44aa-94a7-ca51e7e2a5fa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Next Steps\n",
|
||||
"\n",
|
||||
"You can then use these messages how you see fit, such as finetuning a model, few-shot example selection, or directly make predictions for the next message "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "08ff0a1e-fca0-4da3-aacd-d7401f99d946",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Thank you! Have a wonderful day! 🌟"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI()\n",
|
||||
"\n",
|
||||
"for chunk in llm.stream(messages[0]['messages']):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "50a5251f-074a-4a3c-a2b0-b1de85e0ac6a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
File diff suppressed because it is too large
Load Diff
579
docs/extras/integrations/chat_loaders/facebook.ipynb
Normal file
579
docs/extras/integrations/chat_loaders/facebook.ipynb
Normal file
@ -0,0 +1,579 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e4bd269b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Facebook Messenger\n",
|
||||
"\n",
|
||||
"This notebook shows how to load data from Facebook in a format you can finetune on. The overall steps are:\n",
|
||||
"\n",
|
||||
"1. Download your messenger data to disk.\n",
|
||||
"2. Create the Chat Loader and call `loader.load()` (or `loader.lazy_load()`) to perform the conversion.\n",
|
||||
"3. Optionally use `merge_chat_runs` to combine message from the same sender in sequence, and/or `map_ai_messages` to convert messages from the specified sender to the \"AIMessage\" class. Once you've done this, call `convert_messages_for_finetuning` to prepare your data for fine-tuning.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Once this has been done, you can fine-tune your model. To do so you would complete the following steps:\n",
|
||||
"\n",
|
||||
"4. Upload your messages to OpenAI and run a fine-tuning job.\n",
|
||||
"6. Use the resulting model in your LangChain app!\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Let's begin.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## 1. Download Data\n",
|
||||
"\n",
|
||||
"To download your own messenger data, following instructions [here](https://www.zapptales.com/en/download-facebook-messenger-chat-history-how-to/). IMPORTANT - make sure to download them in JSON format (not HTML).\n",
|
||||
"\n",
|
||||
"We are hosting an example dump at [this google drive link](https://drive.google.com/file/d/1rh1s1o2i7B-Sk1v9o8KNgivLVGwJ-osV/view?usp=sharing) that we will use in this walkthrough."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "647f2158-a42e-4634-b283-b8492caf542a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"File file.zip downloaded.\n",
|
||||
"File file.zip has been unzipped.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This uses some example data\n",
|
||||
"import requests\n",
|
||||
"import zipfile\n",
|
||||
"\n",
|
||||
"def download_and_unzip(url: str, output_path: str = 'file.zip') -> None:\n",
|
||||
" file_id = url.split('/')[-2]\n",
|
||||
" download_url = f'https://drive.google.com/uc?export=download&id={file_id}'\n",
|
||||
"\n",
|
||||
" response = requests.get(download_url)\n",
|
||||
" if response.status_code != 200:\n",
|
||||
" print('Failed to download the file.')\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" with open(output_path, 'wb') as file:\n",
|
||||
" file.write(response.content)\n",
|
||||
" print(f'File {output_path} downloaded.')\n",
|
||||
"\n",
|
||||
" with zipfile.ZipFile(output_path, 'r') as zip_ref:\n",
|
||||
" zip_ref.extractall()\n",
|
||||
" print(f'File {output_path} has been unzipped.')\n",
|
||||
"\n",
|
||||
"# URL of the file to download\n",
|
||||
"url = 'https://drive.google.com/file/d/1rh1s1o2i7B-Sk1v9o8KNgivLVGwJ-osV/view?usp=sharing'\n",
|
||||
"\n",
|
||||
"# Download and unzip\n",
|
||||
"download_and_unzip(url)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "48ef8bb1-fc28-453c-835a-94a552f05a91",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Create Chat Loader\n",
|
||||
"\n",
|
||||
"We have 2 different `FacebookMessengerChatLoader` classes, one for an entire directory of chats, and one to load individual files. We"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "a0869bc6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"directory_path = \"./hogwarts\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "0460bf25",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.facebook_messenger import (\n",
|
||||
" SingleFileFacebookMessengerChatLoader,\n",
|
||||
" FolderFacebookMessengerChatLoader,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "f61ee277",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = SingleFileFacebookMessengerChatLoader(\n",
|
||||
" path=\"./hogwarts/inbox/HermioneGranger/messages_Hermione_Granger.json\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "ec466ad7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[HumanMessage(content=\"Hi Hermione! How's your summer going so far?\", additional_kwargs={'sender': 'Harry Potter'}, example=False),\n",
|
||||
" HumanMessage(content=\"Harry! Lovely to hear from you. My summer is going well, though I do miss everyone. I'm spending most of my time going through my books and researching fascinating new topics. How about you?\", additional_kwargs={'sender': 'Hermione Granger'}, example=False),\n",
|
||||
" HumanMessage(content=\"I miss you all too. The Dursleys are being their usual unpleasant selves but I'm getting by. At least I can practice some spells in my room without them knowing. Let me know if you find anything good in your researching!\", additional_kwargs={'sender': 'Harry Potter'}, example=False)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat_session = loader.load()[0]\n",
|
||||
"chat_session[\"messages\"][:3]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "8a3ee473",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = FolderFacebookMessengerChatLoader(\n",
|
||||
" path=\"./hogwarts\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "9f41e122",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"9"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat_sessions = loader.load()\n",
|
||||
"len(chat_sessions)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d4aa3580-adc1-4b48-9bba-0e8e8d9f44ce",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Prepare for fine-tuning\n",
|
||||
"\n",
|
||||
"Calling `load()` returns all the chat messages we could extract as human messages. When conversing with chat bots, conversations typically follow a more strict alternating dialogue pattern relative to real conversations. \n",
|
||||
"\n",
|
||||
"You can choose to merge message \"runs\" (consecutive messages from the same sender) and select a sender to represent the \"AI\". The fine-tuned LLM will learn to generate these AI messages."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "5a78030d-b757-4bbe-8a6c-841056f46df7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.utils import (\n",
|
||||
" merge_chat_runs,\n",
|
||||
" map_ai_messages,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "ff35b028-78bf-4c5b-9ec6-939fe67de7f7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"merged_sessions = merge_chat_runs(chat_sessions)\n",
|
||||
"alternating_sessions = list(map_ai_messages(merged_sessions, \"Harry Potter\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "4b11906e-a496-4d01-9f0d-1938c14147bf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[AIMessage(content=\"Professor Snape, I was hoping I could speak with you for a moment about something that's been concerning me lately.\", additional_kwargs={'sender': 'Harry Potter'}, example=False),\n",
|
||||
" HumanMessage(content=\"What is it, Potter? I'm quite busy at the moment.\", additional_kwargs={'sender': 'Severus Snape'}, example=False),\n",
|
||||
" AIMessage(content=\"I apologize for the interruption, sir. I'll be brief. I've noticed some strange activity around the school grounds at night. I saw a cloaked figure lurking near the Forbidden Forest last night. I'm worried someone may be plotting something sinister.\", additional_kwargs={'sender': 'Harry Potter'}, example=False)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Now all of Harry Potter's messages will take the AI message class\n",
|
||||
"# which maps to the 'assistant' role in OpenAI's training format\n",
|
||||
"alternating_sessions[0]['messages'][:3]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d985478d-062e-47b9-ae9a-102f59be07c0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Now we can convert to OpenAI format dictionaries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "21372331",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.adapters.openai import convert_messages_for_finetuning"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"id": "92c5ae7a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Prepared 9 dialogues for training\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"training_data = convert_messages_for_finetuning(alternating_sessions)\n",
|
||||
"print(f\"Prepared {len(training_data)} dialogues for training\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "dfcbd181",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'role': 'assistant',\n",
|
||||
" 'content': \"Professor Snape, I was hoping I could speak with you for a moment about something that's been concerning me lately.\"},\n",
|
||||
" {'role': 'user',\n",
|
||||
" 'content': \"What is it, Potter? I'm quite busy at the moment.\"},\n",
|
||||
" {'role': 'assistant',\n",
|
||||
" 'content': \"I apologize for the interruption, sir. I'll be brief. I've noticed some strange activity around the school grounds at night. I saw a cloaked figure lurking near the Forbidden Forest last night. I'm worried someone may be plotting something sinister.\"}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"training_data[0][:3]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f1a9fd64-4f9f-42d3-b5dc-2a340e51e9e7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"OpenAI currently requires at least 10 training examples for a fine-tuning job, though they recommend between 50-100 for most tasks. Since we only have 9 chat sessions, we can subdivide them (optionally with some overlap) so that each training example is comprised of a portion of a whole conversation.\n",
|
||||
"\n",
|
||||
"Facebook chat sessions (1 per person) often span multiple days and conversations,\n",
|
||||
"so the long-range dependencies may not be that important to model anyhow."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"id": "13cd290a-b1e9-4686-bb5e-d99de8b8612b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"100"
|
||||
]
|
||||
},
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Our chat is alternating, we will make each datapoint a group of 8 messages,\n",
|
||||
"# with 2 messages overlapping\n",
|
||||
"chunk_size = 8\n",
|
||||
"overlap = 2\n",
|
||||
"\n",
|
||||
"training_examples = [\n",
|
||||
" conversation_messages[i: i + chunk_size] \n",
|
||||
" for conversation_messages in training_data\n",
|
||||
" for i in range(\n",
|
||||
" 0, len(conversation_messages) - chunk_size + 1, \n",
|
||||
" chunk_size - overlap)\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"len(training_examples)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cc8baf41-ff07-4492-96bd-b2472ee7cef9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Fine-tune the model\n",
|
||||
"\n",
|
||||
"It's time to fine-tune the model. Make sure you have `openai` installed\n",
|
||||
"and have set your `OPENAI_API_KEY` appropriately"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"id": "95ce3f63-3c80-44b2-9060-534ad74e16fa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# %pip install -U openai --quiet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"id": "ab9e28eb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"File file-zCyNBeg4snpbBL7VkvsuhCz8 ready afer 30.55 seconds.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from io import BytesIO\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"import openai\n",
|
||||
"\n",
|
||||
"# We will write the jsonl file in memory\n",
|
||||
"my_file = BytesIO()\n",
|
||||
"for m in training_examples:\n",
|
||||
" my_file.write((json.dumps({\"messages\": m}) + \"\\n\").encode('utf-8'))\n",
|
||||
"\n",
|
||||
"my_file.seek(0)\n",
|
||||
"training_file = openai.File.create(\n",
|
||||
" file=my_file,\n",
|
||||
" purpose='fine-tune'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# OpenAI audits each training file for compliance reasons.\n",
|
||||
"# This make take a few minutes\n",
|
||||
"status = openai.File.retrieve(training_file.id).status\n",
|
||||
"start_time = time.time()\n",
|
||||
"while status != \"processed\":\n",
|
||||
" print(f\"Status=[{status}]... {time.time() - start_time:.2f}s\", end=\"\\r\", flush=True)\n",
|
||||
" time.sleep(5)\n",
|
||||
" status = openai.File.retrieve(training_file.id).status\n",
|
||||
"print(f\"File {training_file.id} ready after {time.time() - start_time:.2f} seconds.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "759a7f51-fde9-4b75-aaa9-e600e6537bd1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"With the file ready, it's time to kick off a training job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"id": "3f451425",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"job = openai.FineTuningJob.create(\n",
|
||||
" training_file=training_file.id,\n",
|
||||
" model=\"gpt-3.5-turbo\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "489b23ef-5e14-42a9-bafb-44220ec6960b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Grab a cup of tea while your model is being prepared. This may take some time!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"id": "bac1637a-c087-4523-ade1-c47f9bf4c6f4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Status=[running]... 908.87s\r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"status = openai.FineTuningJob.retrieve(job.id).status\n",
|
||||
"start_time = time.time()\n",
|
||||
"while status != \"succeeded\":\n",
|
||||
" print(f\"Status=[{status}]... {time.time() - start_time:.2f}s\", end=\"\\r\", flush=True)\n",
|
||||
" time.sleep(5)\n",
|
||||
" job = openai.FineTuningJob.retrieve(job.id)\n",
|
||||
" status = job.status"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 66,
|
||||
"id": "535895e1-bc69-40e5-82ed-e24ed2baeeee",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ft:gpt-3.5-turbo-0613:personal::7rDwkaOq\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(job.fine_tuned_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "502ff73b-f9e9-49ce-ba45-401811e57946",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Use in LangChain\n",
|
||||
"\n",
|
||||
"You can use the resulting model ID directly the `ChatOpenAI` model class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"id": "3925d60d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(\n",
|
||||
" model=job.fine_tuned_model,\n",
|
||||
" temperature=1,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"id": "7190cf2e-ab34-4ceb-bdad-45f24f069c29",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import ChatPromptTemplate\n",
|
||||
"from langchain.schema.output_parser import StrOutputParser\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | model | StrOutputParser()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"id": "f02057e9-f914-40b1-9c9d-9432ff594b98",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The usual - Potions, Transfiguration, Defense Against the Dark Arts. What about you?"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for tok in chain.stream({\"input\": \"What classes are you taking?\"}):\n",
|
||||
" print(tok, end=\"\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "35331503-3cc6-4d64-955e-64afe6b5fef3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
179
docs/extras/integrations/chat_loaders/gmail.ipynb
Normal file
179
docs/extras/integrations/chat_loaders/gmail.ipynb
Normal file
@ -0,0 +1,179 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b3d1705d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# GMail\n",
|
||||
"\n",
|
||||
"This loader goes over how to load data from GMail. There are many ways you could want to load data from GMail. This loader is currently fairly opionated in how to do so. The way it does it is it first looks for all messages that you have sent. It then looks for messages where you are responding to a previous email. It then fetches that previous email, and creates a training example of that email, followed by your email.\n",
|
||||
"\n",
|
||||
"Note that there are clear limitations here. For example, all examples created are only looking at the previous email for context.\n",
|
||||
"\n",
|
||||
"To use:\n",
|
||||
"\n",
|
||||
"- Set up a Google Developer Account: Go to the Google Developer Console, create a project, and enable the Gmail API for that project. This will give you a credentials.json file that you'll need later.\n",
|
||||
"\n",
|
||||
"- Install the Google Client Library: Run the following command to install the Google Client Library:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "84578039",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install --upgrade google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "be18f796",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os.path\n",
|
||||
"import base64\n",
|
||||
"import json\n",
|
||||
"import re\n",
|
||||
"import time\n",
|
||||
"from google.auth.transport.requests import Request\n",
|
||||
"from google.oauth2.credentials import Credentials\n",
|
||||
"from google_auth_oauthlib.flow import InstalledAppFlow\n",
|
||||
"from googleapiclient.discovery import build\n",
|
||||
"import logging\n",
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"creds = None\n",
|
||||
"# The file token.json stores the user's access and refresh tokens, and is\n",
|
||||
"# created automatically when the authorization flow completes for the first\n",
|
||||
"# time.\n",
|
||||
"if os.path.exists('email_token.json'):\n",
|
||||
" creds = Credentials.from_authorized_user_file('email_token.json', SCOPES)\n",
|
||||
"# If there are no (valid) credentials available, let the user log in.\n",
|
||||
"if not creds or not creds.valid:\n",
|
||||
" if creds and creds.expired and creds.refresh_token:\n",
|
||||
" creds.refresh(Request())\n",
|
||||
" else:\n",
|
||||
" flow = InstalledAppFlow.from_client_secrets_file( \n",
|
||||
" # your creds file here. Please create json file as here https://cloud.google.com/docs/authentication/getting-started\n",
|
||||
" 'creds.json', SCOPES)\n",
|
||||
" creds = flow.run_local_server(port=0)\n",
|
||||
" # Save the credentials for the next run\n",
|
||||
" with open('email_token.json', 'w') as token:\n",
|
||||
" token.write(creds.to_json())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "a2793ba0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.gmail import GMailLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "2154597f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GMailLoader(creds=creds, n=3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "0b7d11bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "74764bc7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"2"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Sometimes there can be errors which we silently ignore\n",
|
||||
"len(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "d9360a85",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.utils import (\n",
|
||||
" map_ai_messages,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "a9646f7a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# This makes messages sent by hchase@langchain.com the AI Messages\n",
|
||||
"# This means you will train an LLM to predict as if it's responding as hchase\n",
|
||||
"training_data = list(map_ai_messages(data, sender=\"Harrison Chase <hchase@langchain.com>\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d1a182f0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
188
docs/extras/integrations/chat_loaders/index.mdx
Normal file
188
docs/extras/integrations/chat_loaders/index.mdx
Normal file
@ -0,0 +1,188 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
---
|
||||
|
||||
# Chat loaders
|
||||
|
||||
Like document loaders, chat loaders are utilities designed to help load conversations from popular communication platforms such as Facebook, Slack, Discord, etc. These are loaded into memory as LangChain chat message objects. Such utilities facilitate tasks such as fine-tuning a language model to match your personal style or voice.
|
||||
|
||||
This brief guide will illustrate the process using [OpenAI's fine-tuning API](https://platform.openai.com/docs/guides/fine-tuning) comprised of six steps:
|
||||
|
||||
1. Export your Facebook Messenger chat data in a compatible format for your intended chat loader.
|
||||
2. Load the chat data into memory as LangChain chat message objects. (_this is what is covered in each integration notebook in this section of the documentation_).
|
||||
- Assign a person to the "AI" role and optionally filter, group, and merge messages.
|
||||
3. Export these acquired messages in a format expected by the fine-tuning API.
|
||||
4. Upload this data to OpenAI.
|
||||
5. Fine-tune your model.
|
||||
6. Implement the fine-tuned model in LangChain.
|
||||
|
||||
This guide is not wholly comprehensive but is designed to take you through the fundamentals of going from raw data to fine-tuned model.
|
||||
|
||||
We will demonstrate the procedure through an example of fine-tuning a `gpt-3.5-turbo` model on Facebook Messenger data.
|
||||
|
||||
### 1. Export your chat data
|
||||
|
||||
To export your Facebook messenger data, you can follow the [instructions here](https://www.zapptales.com/en/download-facebook-messenger-chat-history-how-to/).
|
||||
|
||||
:::important JSON format
|
||||
You must select "JSON format" (instead of HTML) when exporting your data to be compatible with the current loader.
|
||||
:::
|
||||
|
||||
OpenAI requires at least 10 examples to fine-tune your model, but they recommend between 50-100 for more optimal results.
|
||||
You can use the example data stored at [this google drive link](https://drive.google.com/file/d/1rh1s1o2i7B-Sk1v9o8KNgivLVGwJ-osV/view?usp=sharing) to test the process.
|
||||
|
||||
### 2. Load the chat
|
||||
|
||||
Once you've obtained your chat data, you can load it into memory as LangChain chat message objects. Here’s an example of loading data using the Python code:
|
||||
|
||||
```python
|
||||
from langchain.chat_loaders.facebook_messenger import FolderFacebookMessengerChatLoader
|
||||
|
||||
loader = FolderFacebookMessengerChatLoader(
|
||||
path="./facebook_messenger_chats",
|
||||
)
|
||||
|
||||
chat_sessions = loader.load()
|
||||
```
|
||||
|
||||
In this snippet, we point the loader to a directory of Facebook chat dumps which are then loaded as multiple "sessions" of messages, one session per conversation file.
|
||||
|
||||
Once you've loaded the messages, you should decide which person you want to fine-tune the model to (usually yourself). You can also decide to merge consecutive messages from the same sender into a single chat message.
|
||||
For both of these tasks, you can use the chat_loaders utilities to do so:
|
||||
|
||||
```
|
||||
from langchain.chat_loaders.utils import (
|
||||
merge_chat_runs,
|
||||
map_ai_messages,
|
||||
)
|
||||
|
||||
merged_sessions = merge_chat_runs(chat_sessions)
|
||||
alternating_sessions = list(map_ai_messages(merged_sessions, "My Name"))
|
||||
```
|
||||
|
||||
### 3. Export messages to OpenAI format
|
||||
|
||||
Convert the chat messages to dictionaries using the `convert_messages_for_finetuning` function. Then, group the data into chunks for better context modeling and overlap management.
|
||||
|
||||
```python
|
||||
from langchain.adapters.openai import convert_messages_for_finetuning
|
||||
|
||||
openai_messages = convert_messages_for_finetuning(chat_sessions)
|
||||
```
|
||||
|
||||
At this point, the data is ready for upload to OpenAI. You can choose to split up conversations into smaller chunks for training if you
|
||||
do not have enough conversations to train on. Feel free to play around with different chunk sizes or with adding system messages to the fine-tuning data.
|
||||
|
||||
```python
|
||||
chunk_size = 8
|
||||
overlap = 2
|
||||
|
||||
message_groups = [
|
||||
conversation_messages[i: i + chunk_size]
|
||||
for conversation_messages in openai_messages
|
||||
for i in range(
|
||||
0, len(conversation_messages) - chunk_size + 1,
|
||||
chunk_size - overlap)
|
||||
]
|
||||
|
||||
len(message_groups)
|
||||
# 9
|
||||
```
|
||||
|
||||
### 4. Upload the data to OpenAI
|
||||
|
||||
Ensure you have set your OpenAI API key by following these [instructions](https://platform.openai.com/account/api-keys), then upload the training file.
|
||||
An audit is performed to ensure data compliance, so you may have to wait a few minutes for the dataset to become ready for use.
|
||||
|
||||
```python
|
||||
import time
|
||||
import json
|
||||
import io
|
||||
|
||||
import openai
|
||||
|
||||
my_file = io.BytesIO()
|
||||
for group in message_groups:
|
||||
my_file.write((json.dumps({"messages": group}) + "\n").encode('utf-8'))
|
||||
|
||||
my_file.seek(0)
|
||||
training_file = openai.File.create(
|
||||
file=my_file,
|
||||
purpose='fine-tune'
|
||||
)
|
||||
|
||||
# Wait while the file is processed
|
||||
status = openai.File.retrieve(training_file.id).status
|
||||
start_time = time.time()
|
||||
while status != "processed":
|
||||
print(f"Status=[{status}]... {time.time() - start_time:.2f}s", end="\r", flush=True)
|
||||
time.sleep(5)
|
||||
status = openai.File.retrieve(training_file.id).status
|
||||
print(f"File {training_file.id} ready after {time.time() - start_time:.2f} seconds.")
|
||||
```
|
||||
|
||||
Once this is done, you can proceed to the model training!
|
||||
|
||||
### 5. Fine-tune the model
|
||||
|
||||
Start the fine-tuning job with your chosen base model.
|
||||
|
||||
```python
|
||||
job = openai.FineTuningJob.create(
|
||||
training_file=training_file.id,
|
||||
model="gpt-3.5-turbo",
|
||||
)
|
||||
```
|
||||
|
||||
This might take a while. Check the status with `openai.FineTuningJob.retrieve(job.id).status` and wait for it to report `succeeded`.
|
||||
|
||||
```python
|
||||
# It may take 10-20+ minutes to complete training.
|
||||
status = openai.FineTuningJob.retrieve(job.id).status
|
||||
start_time = time.time()
|
||||
while status != "succeeded":
|
||||
print(f"Status=[{status}]... {time.time() - start_time:.2f}s", end="\r", flush=True)
|
||||
time.sleep(5)
|
||||
job = openai.FineTuningJob.retrieve(job.id)
|
||||
status = job.status
|
||||
```
|
||||
|
||||
### 6. Use the model in LangChain
|
||||
|
||||
You're almost there! Use the fine-tuned model in LangChain.
|
||||
|
||||
```python
|
||||
from langchain import chat_models
|
||||
|
||||
model_name = job.fine_tuned_model
|
||||
# Example: ft:gpt-3.5-turbo-0613:personal::5mty86jblapsed
|
||||
model = chat_models.ChatOpenAI(model=model_name)
|
||||
```
|
||||
|
||||
```python
|
||||
from langchain.prompts import ChatPromptTemplate
|
||||
from langchain.schema.output_parser import StrOutputParser
|
||||
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("human", "{input}"),
|
||||
]
|
||||
)
|
||||
|
||||
chain = prompt | model | StrOutputParser()
|
||||
|
||||
for tok in chain.stream({"input": "What classes are you taking?"}):
|
||||
print(tok, end="", flush=True)
|
||||
|
||||
# The usual - Potions, Transfiguration, Defense Against the Dark Arts. What about you?
|
||||
```
|
||||
|
||||
And that's it! You've successfully fine-tuned a model and used it in LangChain.
|
||||
|
||||
## Supported Chat Loaders
|
||||
|
||||
LangChain currently supports the following chat loaders. Feel free to contribute more!
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
<DocCardList />
|
163
docs/extras/integrations/chat_loaders/slack.ipynb
Normal file
163
docs/extras/integrations/chat_loaders/slack.ipynb
Normal file
@ -0,0 +1,163 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "01fcfa2f-33a9-48f3-835a-b1956c394d6b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Slack\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the Slack chat loader. This class helps map exported slack conversations to LangChain chat messages.\n",
|
||||
"\n",
|
||||
"The process has three steps:\n",
|
||||
"1. Export the desired conversation thread by following the [instructions here](https://slack.com/help/articles/1500001548241-Request-to-export-all-conversations).\n",
|
||||
"2. Create the `SlackChatLoader` with the file path pointed to the json file or directory of JSON files\n",
|
||||
"3. Call `loader.load()` (or `loader.lazy_load()`) to perform the conversion. Optionally use `merge_chat_runs` to combine message from the same sender in sequence, and/or `map_ai_messages` to convert messages from the specified sender to the \"AIMessage\" class.\n",
|
||||
"\n",
|
||||
"## 1. Creat message dump\n",
|
||||
"\n",
|
||||
"Currently (2023/08/23) this loader best supports a zip directory of files in the format generated by exporting your a direct message converstion from Slack. Follow up-to-date instructions from slack on how to do so.\n",
|
||||
"\n",
|
||||
"We have an example in the LangChain repo."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "a79d35bf-5f21-4063-84bf-a60845c1c51f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"permalink = \"https://raw.githubusercontent.com/langchain-ai/langchain/342087bdfa3ac31d622385d0f2d09cf5e06c8db3/libs/langchain/tests/integration_tests/examples/slack_export.zip\"\n",
|
||||
"response = requests.get(permalink)\n",
|
||||
"with open(\"slack_dump.zip\", \"wb\") as f:\n",
|
||||
" f.write(response.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cf60f703-76f1-4602-a723-02c59535c1af",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Create the Chat Loader\n",
|
||||
"\n",
|
||||
"Provide the loader with the file path to the zip directory. You can optionally specify the user id that maps to an ai message as well an configure whether to merge message runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "4b8b432a-d2bc-49e1-b35f-761730a8fd6d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.slack import SlackChatLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "8ec6661b-0aca-48ae-9e2b-6412856c287b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = SlackChatLoader(\n",
|
||||
" path=\"slack_dump.zip\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8805a7c5-84b4-49f5-8989-0022f2054ace",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Load messages\n",
|
||||
"\n",
|
||||
"The `load()` (or `lazy_load`) methods return a list of \"ChatSessions\" that currently just contain a list of messages per loaded conversation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "fcd69b3e-020d-4a15-8a0d-61c2d34e1ee1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"from langchain.chat_loaders.base import ChatSession\n",
|
||||
"from langchain.chat_loaders.utils import (\n",
|
||||
" map_ai_messages,\n",
|
||||
" merge_chat_runs,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"raw_messages = loader.lazy_load()\n",
|
||||
"# Merge consecutive messages from the same sender into a single message\n",
|
||||
"merged_messages = merge_chat_runs(raw_messages)\n",
|
||||
"# Convert messages from \"U0500003428\" to AI messages\n",
|
||||
"messages: List[ChatSession] = list(map_ai_messages(merged_messages, sender=\"U0500003428\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7d033f87-cd0c-4f44-a753-41b871c1e919",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Next Steps\n",
|
||||
"\n",
|
||||
"You can then use these messages how you see fit, such as finetuning a model, few-shot example selection, or directly make predictions for the next message. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "7d8a1629-5d9e-49b3-b978-3add57027d59",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Hi, \n",
|
||||
"\n",
|
||||
"I hope you're doing well. I wanted to reach out and ask if you'd be available to meet up for coffee sometime next week. I'd love to catch up and hear about what's been going on in your life. Let me know if you're interested and we can find a time that works for both of us. \n",
|
||||
"\n",
|
||||
"Looking forward to hearing from you!\n",
|
||||
"\n",
|
||||
"Best, [Your Name]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI()\n",
|
||||
"\n",
|
||||
"for chunk in llm.stream(messages[1]['messages']):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
206
docs/extras/integrations/chat_loaders/telegram.ipynb
Normal file
206
docs/extras/integrations/chat_loaders/telegram.ipynb
Normal file
@ -0,0 +1,206 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "735455a6-f82e-4252-b545-27385ef883f4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Telegram\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the Telegram chat loader. This class helps map exported Telegram conversations to LangChain chat messages.\n",
|
||||
"\n",
|
||||
"The process has three steps:\n",
|
||||
"1. Export the chat .txt file by copying chats from the Discord app and pasting them in a file on your local computer\n",
|
||||
"2. Create the `TelegramChatLoader` with the file path pointed to the json file or directory of JSON files\n",
|
||||
"3. Call `loader.load()` (or `loader.lazy_load()`) to perform the conversion. Optionally use `merge_chat_runs` to combine message from the same sender in sequence, and/or `map_ai_messages` to convert messages from the specified sender to the \"AIMessage\" class.\n",
|
||||
"\n",
|
||||
"## 1. Creat message dump\n",
|
||||
"\n",
|
||||
"Currently (2023/08/23) this loader best supports json files in the format generated by exporting your chat history from the [Telegram Desktop App](https://desktop.telegram.org/).\n",
|
||||
"\n",
|
||||
"**Important:** There are 'lite' versions of telegram such as \"Telegram for MacOS\" that lack the export functionality. Please make sure you use the correct app to export the file.\n",
|
||||
"\n",
|
||||
"To make the export:\n",
|
||||
"1. Download and open telegram desktop\n",
|
||||
"2. Select a conversation\n",
|
||||
"3. Navigate to the conversation settings (currently the three dots in the top right corner)\n",
|
||||
"4. Click \"Export Chat History\"\n",
|
||||
"5. Unselect photos and other media. Select \"Machine-readable JSON\" format to export.\n",
|
||||
"\n",
|
||||
"An example is below: "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "285f2044-0f58-4b92-addb-9f8569076734",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Overwriting telegram_conversation.json\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%writefile telegram_conversation.json\n",
|
||||
"{\n",
|
||||
" \"name\": \"Jiminy\",\n",
|
||||
" \"type\": \"personal_chat\",\n",
|
||||
" \"id\": 5965280513,\n",
|
||||
" \"messages\": [\n",
|
||||
" {\n",
|
||||
" \"id\": 1,\n",
|
||||
" \"type\": \"message\",\n",
|
||||
" \"date\": \"2023-08-23T13:11:23\",\n",
|
||||
" \"date_unixtime\": \"1692821483\",\n",
|
||||
" \"from\": \"Jiminy Cricket\",\n",
|
||||
" \"from_id\": \"user123450513\",\n",
|
||||
" \"text\": \"You better trust your conscience\",\n",
|
||||
" \"text_entities\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"plain\",\n",
|
||||
" \"text\": \"You better trust your conscience\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"id\": 2,\n",
|
||||
" \"type\": \"message\",\n",
|
||||
" \"date\": \"2023-08-23T13:13:20\",\n",
|
||||
" \"date_unixtime\": \"1692821600\",\n",
|
||||
" \"from\": \"Batman & Robin\",\n",
|
||||
" \"from_id\": \"user6565661032\",\n",
|
||||
" \"text\": \"What did you just say?\",\n",
|
||||
" \"text_entities\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"plain\",\n",
|
||||
" \"text\": \"What did you just say?\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7cc109f4-4c92-4cd3-8143-c322776c3f03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Create the Chat Loader\n",
|
||||
"\n",
|
||||
"All that's required is the file path. You can optionally specify the user name that maps to an ai message as well an configure whether to merge message runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "111f7767-573c-42d4-86f0-bd766bbaa071",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.telegram import TelegramChatLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a4226efa-2640-4990-a20c-6861d1887329",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TelegramChatLoader(\n",
|
||||
" path=\"./telegram_conversation.json\", \n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "71699fb7-7815-4c89-8d96-30e8fada6923",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Load messages\n",
|
||||
"\n",
|
||||
"The `load()` (or `lazy_load`) methods return a list of \"ChatSessions\" that currently just contain a list of messages per loaded conversation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "81121efb-c875-4a77-ad1e-fe26b3d7e812",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"from langchain.chat_loaders.base import ChatSession\n",
|
||||
"from langchain.chat_loaders.utils import (\n",
|
||||
" map_ai_messages,\n",
|
||||
" merge_chat_runs,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"raw_messages = loader.lazy_load()\n",
|
||||
"# Merge consecutive messages from the same sender into a single message\n",
|
||||
"merged_messages = merge_chat_runs(raw_messages)\n",
|
||||
"# Convert messages from \"Jiminy Cricket\" to AI messages\n",
|
||||
"messages: List[ChatSession] = list(map_ai_messages(merged_messages, sender=\"Jiminy Cricket\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b9089c05-7375-41ca-a2f9-672a845314e4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Next Steps\n",
|
||||
"\n",
|
||||
"You can then use these messages how you see fit, such as finetuning a model, few-shot example selection, or directly make predictions for the next message "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "637a6f5d-6944-4722-9361-a76ef5e9dd2a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"I said, \"You better trust your conscience.\""
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI()\n",
|
||||
"\n",
|
||||
"for chunk in llm.stream(messages[0]['messages']):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
77
docs/extras/integrations/chat_loaders/twitter.ipynb
Normal file
77
docs/extras/integrations/chat_loaders/twitter.ipynb
Normal file
@ -0,0 +1,77 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d86853d2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Twitter (via Apify)\n",
|
||||
"\n",
|
||||
"This notebook shows how to load chat messages from Twitter to finetune on. We do this by utilizing Apify. \n",
|
||||
"\n",
|
||||
"First, use Apify to export tweets. An example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e5034b4e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from langchain.schema import AIMessage\n",
|
||||
"from langchain.adapters.openai import convert_message_to_dict"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "8bf0fb93",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('example_data/dataset_twitter-scraper_2023-08-23_22-13-19-740.json') as f:\n",
|
||||
" data = json.load(f)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "468124fa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Filter out tweets that reference other tweets, because it's a bit weird\n",
|
||||
"tweets = [d[\"full_text\"] for d in data if \"t.co\" not in d['full_text']]\n",
|
||||
"# Create them as AI messages\n",
|
||||
"messages = [AIMessage(content=t) for t in tweets]\n",
|
||||
"# Add in a system message at the start\n",
|
||||
"# TODO: we could try to extract the subject from the tweets, and put that in the system message.\n",
|
||||
"system_message = {\"role\": \"system\", \"content\": \"write a tweet\"}\n",
|
||||
"data = [[system_message, convert_message_to_dict(m)] for m in messages]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
204
docs/extras/integrations/chat_loaders/whatsapp.ipynb
Normal file
204
docs/extras/integrations/chat_loaders/whatsapp.ipynb
Normal file
@ -0,0 +1,204 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "735455a6-f82e-4252-b545-27385ef883f4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# WhatsApp\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the WhatsApp chat loader. This class helps map exported Telegram conversations to LangChain chat messages.\n",
|
||||
"\n",
|
||||
"The process has three steps:\n",
|
||||
"1. Export the chat conversations to computer\n",
|
||||
"2. Create the `WhatsAppChatLoader` with the file path pointed to the json file or directory of JSON files\n",
|
||||
"3. Call `loader.load()` (or `loader.lazy_load()`) to perform the conversion.\n",
|
||||
"\n",
|
||||
"## 1. Creat message dump\n",
|
||||
"\n",
|
||||
"To make the export of your WhatsApp conversation(s), complete the following steps:\n",
|
||||
"\n",
|
||||
"1. Open the target conversation\n",
|
||||
"2. Click the three dots in the top right corner and select \"More\".\n",
|
||||
"3. Then select \"Export chat\" and choose \"Without media\".\n",
|
||||
"\n",
|
||||
"An example of the data format for each converation is below: "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "285f2044-0f58-4b92-addb-9f8569076734",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Writing whatsapp_chat.txt\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%writefile whatsapp_chat.txt\n",
|
||||
"[8/15/23, 9:12:33 AM] Dr. Feather: Messages and calls are end-to-end encrypted. No one outside of this chat, not even WhatsApp, can read or listen to them.\n",
|
||||
"[8/15/23, 9:12:43 AM] Dr. Feather: I spotted a rare Hyacinth Macaw yesterday in the Amazon Rainforest. Such a magnificent creature!\n",
|
||||
"[8/15/23, 9:12:48 AM] Dr. Feather: image omitted\n",
|
||||
"[8/15/23, 9:13:15 AM] Jungle Jane: That's stunning! Were you able to observe its behavior?\n",
|
||||
"[8/15/23, 9:13:23 AM] Dr. Feather: image omitted\n",
|
||||
"[8/15/23, 9:14:02 AM] Dr. Feather: Yes, it seemed quite social with other macaws. They're known for their playful nature.\n",
|
||||
"[8/15/23, 9:14:15 AM] Jungle Jane: How's the research going on parrot communication?\n",
|
||||
"[8/15/23, 9:14:30 AM] Dr. Feather: image omitted\n",
|
||||
"[8/15/23, 9:14:50 AM] Dr. Feather: It's progressing well. We're learning so much about how they use sound and color to communicate.\n",
|
||||
"[8/15/23, 9:15:10 AM] Jungle Jane: That's fascinating! Can't wait to read your paper on it.\n",
|
||||
"[8/15/23, 9:15:20 AM] Dr. Feather: Thank you! I'll send you a draft soon.\n",
|
||||
"[8/15/23, 9:25:16 PM] Jungle Jane: Looking forward to it! Keep up the great work."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7cc109f4-4c92-4cd3-8143-c322776c3f03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Create the Chat Loader\n",
|
||||
"\n",
|
||||
"The WhatsAppChatLoader accepts the resulting zip file, unzipped directory, or the path to any of the chat `.txt` files therein.\n",
|
||||
"\n",
|
||||
"Provide that as well as the user name you want to take on the role of \"AI\" when finetuning."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "111f7767-573c-42d4-86f0-bd766bbaa071",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.whatsapp import WhatsAppChatLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "a4226efa-2640-4990-a20c-6861d1887329",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = WhatsAppChatLoader(\n",
|
||||
" path=\"./whatsapp_chat.txt\", \n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "71699fb7-7815-4c89-8d96-30e8fada6923",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Load messages\n",
|
||||
"\n",
|
||||
"The `load()` (or `lazy_load`) methods return a list of \"ChatSessions\" that currently store the list of messages per loaded conversation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "81121efb-c875-4a77-ad1e-fe26b3d7e812",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'messages': [AIMessage(content='I spotted a rare Hyacinth Macaw yesterday in the Amazon Rainforest. Such a magnificent creature!', additional_kwargs={'sender': 'Dr. Feather', 'events': [{'message_time': '8/15/23, 9:12:43 AM'}]}, example=False),\n",
|
||||
" HumanMessage(content=\"That's stunning! Were you able to observe its behavior?\", additional_kwargs={'sender': 'Jungle Jane', 'events': [{'message_time': '8/15/23, 9:13:15 AM'}]}, example=False),\n",
|
||||
" AIMessage(content=\"Yes, it seemed quite social with other macaws. They're known for their playful nature.\", additional_kwargs={'sender': 'Dr. Feather', 'events': [{'message_time': '8/15/23, 9:14:02 AM'}]}, example=False),\n",
|
||||
" HumanMessage(content=\"How's the research going on parrot communication?\", additional_kwargs={'sender': 'Jungle Jane', 'events': [{'message_time': '8/15/23, 9:14:15 AM'}]}, example=False),\n",
|
||||
" AIMessage(content=\"It's progressing well. We're learning so much about how they use sound and color to communicate.\", additional_kwargs={'sender': 'Dr. Feather', 'events': [{'message_time': '8/15/23, 9:14:50 AM'}]}, example=False),\n",
|
||||
" HumanMessage(content=\"That's fascinating! Can't wait to read your paper on it.\", additional_kwargs={'sender': 'Jungle Jane', 'events': [{'message_time': '8/15/23, 9:15:10 AM'}]}, example=False),\n",
|
||||
" AIMessage(content=\"Thank you! I'll send you a draft soon.\", additional_kwargs={'sender': 'Dr. Feather', 'events': [{'message_time': '8/15/23, 9:15:20 AM'}]}, example=False),\n",
|
||||
" HumanMessage(content='Looking forward to it! Keep up the great work.', additional_kwargs={'sender': 'Jungle Jane', 'events': [{'message_time': '8/15/23, 9:25:16 PM'}]}, example=False)]}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"from langchain.chat_loaders.base import ChatSession\n",
|
||||
"from langchain.chat_loaders.utils import (\n",
|
||||
" map_ai_messages,\n",
|
||||
" merge_chat_runs,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"raw_messages = loader.lazy_load()\n",
|
||||
"# Merge consecutive messages from the same sender into a single message\n",
|
||||
"merged_messages = merge_chat_runs(raw_messages)\n",
|
||||
"# Convert messages from \"Dr. Feather\" to AI messages\n",
|
||||
"messages: List[ChatSession] = list(map_ai_messages(merged_messages, sender=\"Dr. Feather\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b9089c05-7375-41ca-a2f9-672a845314e4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Next Steps\n",
|
||||
"\n",
|
||||
"You can then use these messages how you see fit, such as finetuning a model, few-shot example selection, or directly make predictions for the next message."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "637a6f5d-6944-4722-9361-a76ef5e9dd2a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Thank you for the encouragement! I'll do my best to continue studying and sharing fascinating insights about parrot communication."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI()\n",
|
||||
"\n",
|
||||
"for chunk in llm.stream(messages[0]['messages']):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "16156643-cfbd-444f-b4ae-198eb44f0267",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
221
docs/extras/integrations/document_loaders/assemblyai.ipynb
Normal file
221
docs/extras/integrations/document_loaders/assemblyai.ipynb
Normal file
@ -0,0 +1,221 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AssemblyAI Audio Transcripts\n",
|
||||
"\n",
|
||||
"The `AssemblyAIAudioTranscriptLoader` allows to transcribe audio files with the [AssemblyAI API](https://www.assemblyai.com) and loads the transcribed text into documents.\n",
|
||||
"\n",
|
||||
"To use it, you should have the `assemblyai` python package installed, and the\n",
|
||||
"environment variable `ASSEMBLYAI_API_KEY` set with your API key. Alternatively, the API key can also be passed as an argument.\n",
|
||||
"\n",
|
||||
"More info about AssemblyAI:\n",
|
||||
"\n",
|
||||
"- [Website](https://www.assemblyai.com/)\n",
|
||||
"- [Get a Free API key](https://www.assemblyai.com/dashboard/signup)\n",
|
||||
"- [AssemblyAI API Docs](https://www.assemblyai.com/docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation\n",
|
||||
"\n",
|
||||
"First, you need to install the `assemblyai` python package.\n",
|
||||
"\n",
|
||||
"You can find more info about it inside the [assemblyai-python-sdk GitHub repo](https://github.com/AssemblyAI/assemblyai-python-sdk)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install assemblyai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example\n",
|
||||
"\n",
|
||||
"The `AssemblyAIAudioTranscriptLoader` needs at least the `file_path` argument. Audio files can be specified as an URL or a local file path."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import AssemblyAIAudioTranscriptLoader\n",
|
||||
"\n",
|
||||
"audio_file = \"https://storage.googleapis.com/aai-docs-samples/nbc.mp3\"\n",
|
||||
"# or a local file path: audio_file = \"./nbc.mp3\"\n",
|
||||
"\n",
|
||||
"loader = AssemblyAIAudioTranscriptLoader(file_path=audio_file)\n",
|
||||
"\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note: Calling `loader.load()` blocks until the transcription is finished."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The transcribed text is available in the `page_content`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"```\n",
|
||||
"\"Load time, a new president and new congressional makeup. Same old ...\"\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The `metadata` contains the full JSON response with more meta information:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs[0].metadata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"```\n",
|
||||
"{'language_code': <LanguageCode.en_us: 'en_us'>,\n",
|
||||
" 'audio_url': 'https://storage.googleapis.com/aai-docs-samples/nbc.mp3',\n",
|
||||
" 'punctuate': True,\n",
|
||||
" 'format_text': True,\n",
|
||||
" ...\n",
|
||||
"}\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Transcript Formats\n",
|
||||
"\n",
|
||||
"You can specify the `transcript_format` argument for different formats.\n",
|
||||
"\n",
|
||||
"Depending on the format, one or more documents are returned. These are the different `TranscriptFormat` options:\n",
|
||||
"\n",
|
||||
"- `TEXT`: One document with the transcription text\n",
|
||||
"- `SENTENCES`: Multiple documents, splits the transcription by each sentence\n",
|
||||
"- `PARAGRAPHS`: Multiple documents, splits the transcription by each paragraph\n",
|
||||
"- `SUBTITLES_SRT`: One document with the transcript exported in SRT subtitles format\n",
|
||||
"- `SUBTITLES_VTT`: One document with the transcript exported in VTT subtitles format"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.assemblyai import TranscriptFormat\n",
|
||||
"\n",
|
||||
"loader = AssemblyAIAudioTranscriptLoader(\n",
|
||||
" file_path=\"./your_file.mp3\",\n",
|
||||
" transcript_format=TranscriptFormat.SENTENCES,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Transcription Config\n",
|
||||
"\n",
|
||||
"You can also specify the `config` argument to use different audio intelligence models.\n",
|
||||
"\n",
|
||||
"Visit the [AssemblyAI API Documentation](https://www.assemblyai.com/docs) to get an overview of all available models!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import assemblyai as aai\n",
|
||||
"\n",
|
||||
"config = aai.TranscriptionConfig(speaker_labels=True,\n",
|
||||
" auto_chapters=True,\n",
|
||||
" entity_detection=True\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"loader = AssemblyAIAudioTranscriptLoader(\n",
|
||||
" file_path=\"./your_file.mp3\",\n",
|
||||
" config=config\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Pass the API Key as argument\n",
|
||||
"\n",
|
||||
"Next to setting the API key as environment variable `ASSEMBLYAI_API_KEY`, it is also possible to pass it as argument."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = AssemblyAIAudioTranscriptLoader(\n",
|
||||
" file_path=\"./your_file.mp3\",\n",
|
||||
" api_key=\"YOUR_KEY\"\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -15,14 +15,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You need the lxml package to use the DocugamiLoader\n",
|
||||
"!pip install lxml"
|
||||
"# You need the lxml package to use the DocugamiLoader (run pip install directly without \"poetry run\" if you are not using poetry)\n",
|
||||
"!poetry run pip install lxml --quiet"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -75,25 +75,25 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='MUTUAL NON-DISCLOSURE AGREEMENT This Mutual Non-Disclosure Agreement (this “ Agreement ”) is entered into and made effective as of April 4 , 2018 between Docugami Inc. , a Delaware corporation , whose address is 150 Lake Street South , Suite 221 , Kirkland , Washington 98033 , and Caleb Divine , an individual, whose address is 1201 Rt 300 , Newburgh NY 12550 .', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ThisMutualNon-disclosureAgreement', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'ThisMutualNon-disclosureAgreement'}),\n",
|
||||
" Document(page_content='The above named parties desire to engage in discussions regarding a potential agreement or other transaction between the parties (the “Purpose”). In connection with such discussions, it may be necessary for the parties to disclose to each other certain confidential information or materials to enable them to evaluate whether to enter into such agreement or transaction.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Discussions', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'Discussions'}),\n",
|
||||
" Document(page_content='In consideration of the foregoing, the parties agree as follows:', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Consideration', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'Consideration'}),\n",
|
||||
" Document(page_content='1. Confidential Information . For purposes of this Agreement , “ Confidential Information ” means any information or materials disclosed by one party to the other party that: (i) if disclosed in writing or in the form of tangible materials, is marked “confidential” or “proprietary” at the time of such disclosure; (ii) if disclosed orally or by visual presentation, is identified as “confidential” or “proprietary” at the time of such disclosure, and is summarized in a writing sent by the disclosing party to the receiving party within thirty ( 30 ) days after any such disclosure; or (iii) due to its nature or the circumstances of its disclosure, a person exercising reasonable business judgment would understand to be confidential or proprietary.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:Purposes/docset:ConfidentialInformation-section/docset:ConfidentialInformation[2]', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'ConfidentialInformation'}),\n",
|
||||
" Document(page_content=\"2. Obligations and Restrictions . Each party agrees: (i) to maintain the other party's Confidential Information in strict confidence; (ii) not to disclose such Confidential Information to any third party; and (iii) not to use such Confidential Information for any purpose except for the Purpose. Each party may disclose the other party’s Confidential Information to its employees and consultants who have a bona fide need to know such Confidential Information for the Purpose, but solely to the extent necessary to pursue the Purpose and for no other purpose; provided, that each such employee and consultant first executes a written agreement (or is otherwise already bound by a written agreement) that contains use and nondisclosure restrictions at least as protective of the other party’s Confidential Information as those set forth in this Agreement .\", metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:Obligations/docset:ObligationsAndRestrictions-section/docset:ObligationsAndRestrictions', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'ObligationsAndRestrictions'}),\n",
|
||||
" Document(page_content='3. Exceptions. The obligations and restrictions in Section 2 will not apply to any information or materials that:', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:Exceptions/docset:Exceptions-section/docset:Exceptions[2]', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'Exceptions'}),\n",
|
||||
" Document(page_content='(i) were, at the date of disclosure, or have subsequently become, generally known or available to the public through no act or failure to act by the receiving party;', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:TheDate/docset:TheDate/docset:TheDate', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'TheDate'}),\n",
|
||||
" Document(page_content='(ii) were rightfully known by the receiving party prior to receiving such information or materials from the disclosing party;', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:TheDate/docset:SuchInformation/docset:TheReceivingParty', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'TheReceivingParty'}),\n",
|
||||
" Document(page_content='(iii) are rightfully acquired by the receiving party from a third party who has the right to disclose such information or materials without breach of any confidentiality obligation to the disclosing party;', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:TheDate/docset:TheReceivingParty/docset:TheReceivingParty', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'TheReceivingParty'}),\n",
|
||||
" Document(page_content='4. Compelled Disclosure . Nothing in this Agreement will be deemed to restrict a party from disclosing the other party’s Confidential Information to the extent required by any order, subpoena, law, statute or regulation; provided, that the party required to make such a disclosure uses reasonable efforts to give the other party reasonable advance notice of such required disclosure in order to enable the other party to prevent or limit such disclosure.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:Disclosure/docset:CompelledDisclosure-section/docset:CompelledDisclosure', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'CompelledDisclosure'}),\n",
|
||||
" Document(page_content='5. Return of Confidential Information . Upon the completion or abandonment of the Purpose, and in any event upon the disclosing party’s request, the receiving party will promptly return to the disclosing party all tangible items and embodiments containing or consisting of the disclosing party’s Confidential Information and all copies thereof (including electronic copies), and any notes, analyses, compilations, studies, interpretations, memoranda or other documents (regardless of the form thereof) prepared by or on behalf of the receiving party that contain or are based upon the disclosing party’s Confidential Information .', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:TheCompletion/docset:ReturnofConfidentialInformation-section/docset:ReturnofConfidentialInformation', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'ReturnofConfidentialInformation'}),\n",
|
||||
" Document(page_content='6. No Obligations . Each party retains the right to determine whether to disclose any Confidential Information to the other party.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:NoObligations/docset:NoObligations-section/docset:NoObligations[2]', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'NoObligations'}),\n",
|
||||
" Document(page_content='7. No Warranty. ALL CONFIDENTIAL INFORMATION IS PROVIDED BY THE DISCLOSING PARTY “AS IS ”.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:NoWarranty/docset:NoWarranty-section/docset:NoWarranty[2]', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'NoWarranty'}),\n",
|
||||
" Document(page_content='8. Term. This Agreement will remain in effect for a period of seven ( 7 ) years from the date of last disclosure of Confidential Information by either party, at which time it will terminate.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:ThisAgreement/docset:Term-section/docset:Term', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'Term'}),\n",
|
||||
" Document(page_content='9. Equitable Relief . Each party acknowledges that the unauthorized use or disclosure of the disclosing party’s Confidential Information may cause the disclosing party to incur irreparable harm and significant damages, the degree of which may be difficult to ascertain. Accordingly, each party agrees that the disclosing party will have the right to seek immediate equitable relief to enjoin any unauthorized use or disclosure of its Confidential Information , in addition to any other rights and remedies that it may have at law or otherwise.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:EquitableRelief/docset:EquitableRelief-section/docset:EquitableRelief[2]', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'EquitableRelief'}),\n",
|
||||
" Document(page_content='10. Non-compete. To the maximum extent permitted by applicable law, during the Term of this Agreement and for a period of one ( 1 ) year thereafter, Caleb Divine may not market software products or do business that directly or indirectly competes with Docugami software products .', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:TheMaximumExtent/docset:Non-compete-section/docset:Non-compete', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'Non-compete'}),\n",
|
||||
" Document(page_content='11. Miscellaneous. This Agreement will be governed and construed in accordance with the laws of the State of Washington , excluding its body of law controlling conflict of laws. This Agreement is the complete and exclusive understanding and agreement between the parties regarding the subject matter of this Agreement and supersedes all prior agreements, understandings and communications, oral or written, between the parties regarding the subject matter of this Agreement . If any provision of this Agreement is held invalid or unenforceable by a court of competent jurisdiction, that provision of this Agreement will be enforced to the maximum extent permissible and the other provisions of this Agreement will remain in full force and effect. Neither party may assign this Agreement , in whole or in part, by operation of law or otherwise, without the other party’s prior written consent, and any attempted assignment without such consent will be void. This Agreement may be executed in counterparts, each of which will be deemed an original, but all of which together will constitute one and the same instrument.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:Accordance/docset:Miscellaneous-section/docset:Miscellaneous', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'Miscellaneous'}),\n",
|
||||
" Document(page_content='[SIGNATURE PAGE FOLLOWS] IN WITNESS WHEREOF, the parties hereto have executed this Mutual Non-Disclosure Agreement by their duly authorized officers or representatives as of the date first set forth above.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:Witness/docset:TheParties/docset:TheParties', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'TheParties'}),\n",
|
||||
" Document(page_content='DOCUGAMI INC . : \\n\\n Caleb Divine : \\n\\n Signature: Signature: Name: \\n\\n Jean Paoli Name: Title: \\n\\n CEO Title:', metadata={'xpath': '/docset:MutualNon-disclosure/docset:Witness/docset:TheParties/docset:DocugamiInc/docset:DocugamiInc/xhtml:table', 'id': '43rj0ds7s0ur', 'name': 'NDA simple layout.docx', 'structure': '', 'tag': 'table'})]"
|
||||
"[Document(page_content='MUTUAL NON-DISCLOSURE AGREEMENT This Mutual Non-Disclosure Agreement (this “ Agreement ”) is entered into and made effective as of April 4 , 2018 between Docugami Inc. , a Delaware corporation , whose address is 150 Lake Street South , Suite 221 , Kirkland , Washington 98033 , and Caleb Divine , an individual, whose address is 1201 Rt 300 , Newburgh NY 12550 .', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:ThisMutualNon-disclosureAgreement', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'ThisMutualNon-disclosureAgreement'}),\n",
|
||||
" Document(page_content='The above named parties desire to engage in discussions regarding a potential agreement or other transaction between the parties (the “Purpose”). In connection with such discussions, it may be necessary for the parties to disclose to each other certain confidential information or materials to enable them to evaluate whether to enter into such agreement or transaction.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Discussions', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'Discussions'}),\n",
|
||||
" Document(page_content='In consideration of the foregoing, the parties agree as follows:', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Consideration', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'Consideration'}),\n",
|
||||
" Document(page_content='1. Confidential Information . For purposes of this Agreement , “ Confidential Information ” means any information or materials disclosed by one party to the other party that: (i) if disclosed in writing or in the form of tangible materials, is marked “confidential” or “proprietary” at the time of such disclosure; (ii) if disclosed orally or by visual presentation, is identified as “confidential” or “proprietary” at the time of such disclosure, and is summarized in a writing sent by the disclosing party to the receiving party within thirty ( 30 ) days after any such disclosure; or (iii) due to its nature or the circumstances of its disclosure, a person exercising reasonable business judgment would understand to be confidential or proprietary.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:Purposes/docset:ConfidentialInformation-section/docset:ConfidentialInformation[2]', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'ConfidentialInformation'}),\n",
|
||||
" Document(page_content=\"2. Obligations and Restrictions . Each party agrees: (i) to maintain the other party's Confidential Information in strict confidence; (ii) not to disclose such Confidential Information to any third party; and (iii) not to use such Confidential Information for any purpose except for the Purpose. Each party may disclose the other party’s Confidential Information to its employees and consultants who have a bona fide need to know such Confidential Information for the Purpose, but solely to the extent necessary to pursue the Purpose and for no other purpose; provided, that each such employee and consultant first executes a written agreement (or is otherwise already bound by a written agreement) that contains use and nondisclosure restrictions at least as protective of the other party’s Confidential Information as those set forth in this Agreement .\", metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:Obligations/docset:ObligationsAndRestrictions-section/docset:ObligationsAndRestrictions', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'ObligationsAndRestrictions'}),\n",
|
||||
" Document(page_content='3. Exceptions. The obligations and restrictions in Section 2 will not apply to any information or materials that:', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:Exceptions/docset:Exceptions-section/docset:Exceptions[2]', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'Exceptions'}),\n",
|
||||
" Document(page_content='(i) were, at the date of disclosure, or have subsequently become, generally known or available to the public through no act or failure to act by the receiving party;', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:TheDate/docset:TheDate/docset:TheDate', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'TheDate'}),\n",
|
||||
" Document(page_content='(ii) were rightfully known by the receiving party prior to receiving such information or materials from the disclosing party;', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:TheDate/docset:SuchInformation/docset:TheReceivingParty', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'TheReceivingParty'}),\n",
|
||||
" Document(page_content='(iii) are rightfully acquired by the receiving party from a third party who has the right to disclose such information or materials without breach of any confidentiality obligation to the disclosing party;', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:TheDate/docset:TheReceivingParty/docset:TheReceivingParty', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'TheReceivingParty'}),\n",
|
||||
" Document(page_content='4. Compelled Disclosure . Nothing in this Agreement will be deemed to restrict a party from disclosing the other party’s Confidential Information to the extent required by any order, subpoena, law, statute or regulation; provided, that the party required to make such a disclosure uses reasonable efforts to give the other party reasonable advance notice of such required disclosure in order to enable the other party to prevent or limit such disclosure.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:Disclosure/docset:CompelledDisclosure-section/docset:CompelledDisclosure', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'CompelledDisclosure'}),\n",
|
||||
" Document(page_content='5. Return of Confidential Information . Upon the completion or abandonment of the Purpose, and in any event upon the disclosing party’s request, the receiving party will promptly return to the disclosing party all tangible items and embodiments containing or consisting of the disclosing party’s Confidential Information and all copies thereof (including electronic copies), and any notes, analyses, compilations, studies, interpretations, memoranda or other documents (regardless of the form thereof) prepared by or on behalf of the receiving party that contain or are based upon the disclosing party’s Confidential Information .', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:TheCompletion/docset:ReturnofConfidentialInformation-section/docset:ReturnofConfidentialInformation', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'ReturnofConfidentialInformation'}),\n",
|
||||
" Document(page_content='6. No Obligations . Each party retains the right to determine whether to disclose any Confidential Information to the other party.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:NoObligations/docset:NoObligations-section/docset:NoObligations[2]', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'NoObligations'}),\n",
|
||||
" Document(page_content='7. No Warranty. ALL CONFIDENTIAL INFORMATION IS PROVIDED BY THE DISCLOSING PARTY “AS IS ”.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:NoWarranty/docset:NoWarranty-section/docset:NoWarranty[2]', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'NoWarranty'}),\n",
|
||||
" Document(page_content='8. Term. This Agreement will remain in effect for a period of seven ( 7 ) years from the date of last disclosure of Confidential Information by either party, at which time it will terminate.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:ThisAgreement/docset:Term-section/docset:Term', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'Term'}),\n",
|
||||
" Document(page_content='9. Equitable Relief . Each party acknowledges that the unauthorized use or disclosure of the disclosing party’s Confidential Information may cause the disclosing party to incur irreparable harm and significant damages, the degree of which may be difficult to ascertain. Accordingly, each party agrees that the disclosing party will have the right to seek immediate equitable relief to enjoin any unauthorized use or disclosure of its Confidential Information , in addition to any other rights and remedies that it may have at law or otherwise.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:EquitableRelief/docset:EquitableRelief-section/docset:EquitableRelief[2]', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'EquitableRelief'}),\n",
|
||||
" Document(page_content='10. Non-compete. To the maximum extent permitted by applicable law, during the Term of this Agreement and for a period of one ( 1 ) year thereafter, Caleb Divine may not market software products or do business that directly or indirectly competes with Docugami software products .', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:TheMaximumExtent/docset:Non-compete-section/docset:Non-compete', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'Non-compete'}),\n",
|
||||
" Document(page_content='11. Miscellaneous. This Agreement will be governed and construed in accordance with the laws of the State of Washington , excluding its body of law controlling conflict of laws. This Agreement is the complete and exclusive understanding and agreement between the parties regarding the subject matter of this Agreement and supersedes all prior agreements, understandings and communications, oral or written, between the parties regarding the subject matter of this Agreement . If any provision of this Agreement is held invalid or unenforceable by a court of competent jurisdiction, that provision of this Agreement will be enforced to the maximum extent permissible and the other provisions of this Agreement will remain in full force and effect. Neither party may assign this Agreement , in whole or in part, by operation of law or otherwise, without the other party’s prior written consent, and any attempted assignment without such consent will be void. This Agreement may be executed in counterparts, each of which will be deemed an original, but all of which together will constitute one and the same instrument.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:MutualNon-disclosure/docset:MUTUALNON-DISCLOSUREAGREEMENT-section/docset:MUTUALNON-DISCLOSUREAGREEMENT/docset:Consideration/docset:Purposes/docset:Accordance/docset:Miscellaneous-section/docset:Miscellaneous', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'div', 'tag': 'Miscellaneous'}),\n",
|
||||
" Document(page_content='[SIGNATURE PAGE FOLLOWS] IN WITNESS WHEREOF, the parties hereto have executed this Mutual Non-Disclosure Agreement by their duly authorized officers or representatives as of the date first set forth above.', metadata={'xpath': '/docset:MutualNon-disclosure/docset:Witness/docset:TheParties/docset:TheParties', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': 'p', 'tag': 'TheParties'}),\n",
|
||||
" Document(page_content='DOCUGAMI INC . : \\n\\n Caleb Divine : \\n\\n Signature: Signature: Name: \\n\\n Jean Paoli Name: Title: \\n\\n CEO Title:', metadata={'xpath': '/docset:MutualNon-disclosure/docset:Witness/docset:TheParties/docset:DocugamiInc/docset:DocugamiInc/xhtml:table', 'id': '43rj0ds7s0ur', 'source': 'NDA simple layout.docx', 'structure': '', 'tag': 'table'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
@ -116,7 +116,7 @@
|
||||
"source": [
|
||||
"The `metadata` for each `Document` (really, a chunk of an actual PDF, DOC or DOCX) contains some useful additional information:\n",
|
||||
"\n",
|
||||
"1. **id and name:** ID and Name of the file (PDF, DOC or DOCX) the chunk is sourced from within Docugami.\n",
|
||||
"1. **id and source:** ID and Name of the file (PDF, DOC or DOCX) the chunk is sourced from within Docugami.\n",
|
||||
"2. **xpath:** XPath inside the XML representation of the document, for the chunk. Useful for source citations directly to the actual chunk inside the document XML.\n",
|
||||
"3. **structure:** Structural attributes of the chunk, e.g. h1, h2, div, table, td, etc. Useful to filter out certain kinds of chunks if needed by the caller.\n",
|
||||
"4. **tag:** Semantic tag for the chunk, using various generative and extractive techniques. More details here: https://github.com/docugami/DFM-benchmarks"
|
||||
@ -133,7 +133,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -142,7 +142,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -170,15 +170,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using embedded DuckDB without persistence: data will be transient\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embedding = OpenAIEmbeddings()\n",
|
||||
"vectordb = Chroma.from_documents(documents=documents, embedding=embedding)\n",
|
||||
@ -197,11 +189,11 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'query': 'What can tenants do with signage on their properties?',\n",
|
||||
" 'result': ' Tenants may place signs (digital or otherwise) or other form of identification on the premises after receiving written permission from the landlord which shall not be unreasonably withheld. The tenant is responsible for any damage caused to the premises and must conform to any applicable laws, ordinances, etc. governing the same. The tenant must also remove and clean any window or glass identification promptly upon vacating the premises.',\n",
|
||||
" 'source_documents': [Document(page_content='ARTICLE VI SIGNAGE 6.01 Signage . Tenant may place or attach to the Premises signs (digital or otherwise) or other such identification as needed after receiving written permission from the Landlord , which permission shall not be unreasonably withheld. Any damage caused to the Premises by the Tenant ’s erecting or removing such signs shall be repaired promptly by the Tenant at the Tenant ’s expense . Any signs or other form of identification allowed must conform to all applicable laws, ordinances, etc. governing the same. Tenant also agrees to have any window or glass identification completely removed and cleaned at its expense promptly upon vacating the Premises.', metadata={'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:Article/docset:ARTICLEVISIGNAGE-section/docset:_601Signage-section/docset:_601Signage', 'id': 'v1bvgaozfkak', 'name': 'TruTone Lane 2.docx', 'structure': 'div', 'tag': '_601Signage', 'Landlord': 'BUBBA CENTER PARTNERSHIP', 'Tenant': 'Truetone Lane LLC'}),\n",
|
||||
" Document(page_content='Signage. Tenant may place or attach to the Premises signs (digital or otherwise) or other such identification as needed after receiving written permission from the Landlord , which permission shall not be unreasonably withheld. Any damage caused to the Premises by the Tenant ’s erecting or removing such signs shall be repaired promptly by the Tenant at the Tenant ’s expense . Any signs or other form of identification allowed must conform to all applicable laws, ordinances, etc. governing the same. Tenant also agrees to have any window or glass identification completely removed and cleaned at its expense promptly upon vacating the Premises. \\n\\n ARTICLE VII UTILITIES 7.01', metadata={'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:ThisOFFICELEASEAGREEMENTThis/docset:ArticleIBasic/docset:ArticleIiiUseAndCareOf/docset:ARTICLEIIIUSEANDCAREOFPREMISES-section/docset:ARTICLEIIIUSEANDCAREOFPREMISES/docset:NoOtherPurposes/docset:TenantsResponsibility/dg:chunk', 'id': 'g2fvhekmltza', 'name': 'TruTone Lane 6.pdf', 'structure': 'lim', 'tag': 'chunk', 'Landlord': 'GLORY ROAD LLC', 'Tenant': 'Truetone Lane LLC'}),\n",
|
||||
" Document(page_content='Landlord , its agents, servants, employees, licensees, invitees, and contractors during the last year of the term of this Lease at any and all times during regular business hours, after 24 hour notice to tenant, to pass and repass on and through the Premises, or such portion thereof as may be necessary, in order that they or any of them may gain access to the Premises for the purpose of showing the Premises to potential new tenants or real estate brokers. In addition, Landlord shall be entitled to place a \"FOR RENT \" or \"FOR LEASE\" sign (not exceeding 8.5 ” x 11 ”) in the front window of the Premises during the last six months of the term of this Lease .', metadata={'xpath': '/docset:Rider/docset:RIDERTOLEASE-section/docset:RIDERTOLEASE/docset:FixedRent/docset:TermYearPeriod/docset:Lease/docset:_42FLandlordSAccess-section/docset:_42FLandlordSAccess/docset:LandlordsRights/docset:Landlord', 'id': 'omvs4mysdk6b', 'name': 'TruTone Lane 1.docx', 'structure': 'p', 'tag': 'Landlord', 'Landlord': 'BIRCH STREET , LLC', 'Tenant': 'Trutone Lane LLC'}),\n",
|
||||
" Document(page_content=\"24. SIGNS . No signage shall be placed by Tenant on any portion of the Project . However, Tenant shall be permitted to place a sign bearing its name in a location approved by Landlord near the entrance to the Premises (at Tenant's cost ) and will be furnished a single listing of its name in the Building's directory (at Landlord 's cost ), all in accordance with the criteria adopted from time to time by Landlord for the Project . Any changes or additional listings in the directory shall be furnished (subject to availability of space) for the then Building Standard charge .\", metadata={'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:GrossRentCreditTheRentCredit-section/docset:GrossRentCreditTheRentCredit/docset:Period/docset:ApplicableSalesTax/docset:PercentageRent/docset:TheTerms/docset:Indemnification/docset:INDEMNIFICATION-section/docset:INDEMNIFICATION/docset:Waiver/docset:Waiver/docset:Signs/docset:SIGNS-section/docset:SIGNS', 'id': 'qkn9cyqsiuch', 'name': 'Shorebucks LLC_AZ.pdf', 'structure': 'div', 'tag': 'SIGNS', 'Landlord': 'Menlo Group', 'Tenant': 'Shorebucks LLC'})]}"
|
||||
" 'result': \" Tenants can place or attach signs (digital or otherwise) to their premises with written permission from the landlord. The signs must conform to all applicable laws, ordinances, etc. governing the same. Tenants can also have their name listed in the building's directory at the landlord's cost.\",\n",
|
||||
" 'source_documents': [Document(page_content='ARTICLE VI SIGNAGE 6.01 Signage . Tenant may place or attach to the Premises signs (digital or otherwise) or other such identification as needed after receiving written permission from the Landlord , which permission shall not be unreasonably withheld. Any damage caused to the Premises by the Tenant ’s erecting or removing such signs shall be repaired promptly by the Tenant at the Tenant ’s expense . Any signs or other form of identification allowed must conform to all applicable laws, ordinances, etc. governing the same. Tenant also agrees to have any window or glass identification completely removed and cleaned at its expense promptly upon vacating the Premises.', metadata={'Landlord': 'BUBBA CENTER PARTNERSHIP', 'Lease Date': 'April 24 \\n\\n ,', 'Lease Parties': 'This OFFICE LEASE AGREEMENT (this \"Lease\") is made and entered into by and between BUBBA CENTER PARTNERSHIP (\" Landlord \"), and Truetone Lane LLC , a Delaware limited liability company (\" Tenant \").', 'Tenant': 'Truetone Lane LLC', 'id': 'v1bvgaozfkak', 'source': 'TruTone Lane 2.docx', 'structure': 'div', 'tag': '_601Signage', 'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:Article/docset:ARTICLEVISIGNAGE-section/docset:_601Signage-section/docset:_601Signage'}),\n",
|
||||
" Document(page_content='Signage. Tenant may place or attach to the Premises signs (digital or otherwise) or other such identification as needed after receiving written permission from the Landlord , which permission shall not be unreasonably withheld. Any damage caused to the Premises by the Tenant ’s erecting or removing such signs shall be repaired promptly by the Tenant at the Tenant ’s expense . Any signs or other form of identification allowed must conform to all applicable laws, ordinances, etc. governing the same. Tenant also agrees to have any window or glass identification completely removed and cleaned at its expense promptly upon vacating the Premises. \\n\\n ARTICLE VII UTILITIES 7.01', metadata={'Landlord': 'GLORY ROAD LLC', 'Lease Date': 'April 30 , 2020', 'Lease Parties': 'This OFFICE LEASE AGREEMENT (this \"Lease\") is made and entered into by and between GLORY ROAD LLC (\" Landlord \"), and Truetone Lane LLC , a Delaware limited liability company (\" Tenant \").', 'Tenant': 'Truetone Lane LLC', 'id': 'g2fvhekmltza', 'source': 'TruTone Lane 6.pdf', 'structure': 'lim', 'tag': 'chunk', 'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:Article/docset:ArticleIiiUse/docset:ARTICLEIIIUSEANDCAREOFPREMISES-section/docset:ARTICLEIIIUSEANDCAREOFPREMISES/docset:AnyTime/docset:Addition/dg:chunk'}),\n",
|
||||
" Document(page_content='Landlord , its agents, servants, employees, licensees, invitees, and contractors during the last year of the term of this Lease at any and all times during regular business hours, after 24 hour notice to tenant, to pass and repass on and through the Premises, or such portion thereof as may be necessary, in order that they or any of them may gain access to the Premises for the purpose of showing the Premises to potential new tenants or real estate brokers. In addition, Landlord shall be entitled to place a \"FOR RENT \" or \"FOR LEASE\" sign (not exceeding 8.5 ” x 11 ”) in the front window of the Premises during the last six months of the term of this Lease .', metadata={'Landlord': 'BIRCH STREET , LLC', 'Lease Date': 'October 15 , 2021', 'Lease Parties': 'The provisions of this rider are hereby incorporated into and made a part of the Lease dated as of October 15 , 2021 between BIRCH STREET , LLC , having an address at c/o Birch Palace , 6 Grace Avenue Suite 200 , Great Neck , New York 11021 (\" Landlord \"), and Trutone Lane LLC , having an address at 4 Pearl Street , New York , New York 10012 (\" Tenant \") of Premises known as the ground floor space and lower level space, as per floor plan annexed hereto and made a part hereof as Exhibit A (“Premises”) at 4 Pearl Street , New York , New York 10012 in the City of New York , Borough of Manhattan , to which this rider is annexed. If there is any conflict between the provisions of this rider and the remainder of this Lease , the provisions of this rider shall govern.', 'Tenant': 'Trutone Lane LLC', 'id': 'omvs4mysdk6b', 'source': 'TruTone Lane 1.docx', 'structure': 'p', 'tag': 'Landlord', 'xpath': '/docset:Rider/docset:RIDERTOLEASE-section/docset:RIDERTOLEASE/docset:FixedRent/docset:TermYearPeriod/docset:Lease/docset:_42FLandlordSAccess-section/docset:_42FLandlordSAccess/docset:LandlordsRights/docset:Landlord'}),\n",
|
||||
" Document(page_content=\"24. SIGNS . No signage shall be placed by Tenant on any portion of the Project . However, Tenant shall be permitted to place a sign bearing its name in a location approved by Landlord near the entrance to the Premises (at Tenant's cost ) and will be furnished a single listing of its name in the Building's directory (at Landlord 's cost ), all in accordance with the criteria adopted from time to time by Landlord for the Project . Any changes or additional listings in the directory shall be furnished (subject to availability of space) for the then Building Standard charge .\", metadata={'Landlord': 'Perry & Blair LLC', 'Lease Date': 'March 29th , 2019', 'Lease Parties': 'THIS OFFICE LEASE (the \"Lease\") is made and entered into as of March 29th , 2019 , by and between Landlord and Tenant . \"Date of this Lease\" shall mean the date on which the last one of the Landlord and Tenant has signed this Lease .', 'Tenant': 'Shorebucks LLC', 'id': 'dsyfhh4vpeyf', 'source': 'Shorebucks LLC_CO.pdf', 'structure': 'div', 'tag': 'SIGNS', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:GrossRentCreditTheRentCredit-section/docset:GrossRentCreditTheRentCredit/docset:ThisLease-section/docset:ThisLease/docset:Guaranty-section/docset:Guaranty[2]/docset:TheTransfer/docset:TheTerms/docset:Indemnification/docset:INDEMNIFICATION-section/docset:INDEMNIFICATION/docset:Waiver/docset:Waiver/docset:Signs/docset:SIGNS-section/docset:SIGNS'})]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
@ -233,7 +225,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"' 9,753 square feet'"
|
||||
"' 9,753 square feet.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
@ -243,31 +235,31 @@
|
||||
],
|
||||
"source": [
|
||||
"chain_response = qa_chain(\"What is rentable area for the property owned by DHA Group?\")\n",
|
||||
"chain_response[\"result\"] # the correct answer should be 13,500"
|
||||
"chain_response[\"result\"] # correct answer should be 13,500 sq ft"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"At first glance the answer may seem reasonable, but if you review the source chunks carefully for this answer, you will see that the chunking of the document did not end up putting the Landlord name and the rentable area in the same context, since they are far apart in the document. The retriever therefore ends up finding unrelated chunks from other documents not even related to the **Menlo Group** landlord. That landlord happens to be mentioned on the first page of the file **Shorebucks LLC_NJ.pdf** file, and while one of the source chunks used by the chain is indeed from that doc that contains the correct answer (**13,500**), other source chunks from different docs are included, and the answer is therefore incorrect."
|
||||
"At first glance the answer may seem reasonable, but if you review the source chunks carefully for this answer, you will see that the chunking of the document did not end up putting the Landlord name and the rentable area in the same context, since they are far apart in the document. The retriever therefore ends up finding unrelated chunks from other documents not even related to the **DHA Group** landlord. That landlord happens to be mentioned on the first page of the file **Shorebucks LLC_NJ.pdf** file, and while one of the source chunks used by the chain is indeed from that doc that contains the correct answer (**13,500**), other source chunks from different docs are included, and the answer is therefore incorrect."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='1.1 Landlord . DHA Group , a Delaware limited liability company authorized to transact business in New Jersey .', metadata={'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:DhaGroup/docset:Landlord-section/docset:DhaGroup', 'id': 'md8rieecquyv', 'name': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'DhaGroup', 'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC'}),\n",
|
||||
" Document(page_content='WITNESSES: LANDLORD: DHA Group , a Delaware limited liability company', metadata={'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:GrossRentCreditTheRentCredit-section/docset:GrossRentCreditTheRentCredit/docset:Guaranty-section/docset:Guaranty[2]/docset:SIGNATURESONNEXTPAGE-section/docset:INWITNESSWHEREOF-section/docset:INWITNESSWHEREOF/docset:Behalf/docset:Witnesses/xhtml:table/xhtml:tbody/xhtml:tr[3]/xhtml:td[2]/docset:DhaGroup', 'id': 'md8rieecquyv', 'name': 'Shorebucks LLC_NJ.pdf', 'structure': 'p', 'tag': 'DhaGroup', 'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC'}),\n",
|
||||
" Document(page_content=\"1.16 Landlord 's Notice Address . DHA Group , Suite 1010 , 111 Bauer Dr , Oakland , New Jersey , 07436 , with a copy to the Building Management Office at the Project , Attention: On - Site Property Manager .\", metadata={'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:GrossRentCreditTheRentCredit-section/docset:GrossRentCreditTheRentCredit/docset:Period/docset:ApplicableSalesTax/docset:PercentageRent/docset:PercentageRent/docset:NoticeAddress[2]/docset:LandlordsNoticeAddress-section/docset:LandlordsNoticeAddress[2]', 'id': 'md8rieecquyv', 'name': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'LandlordsNoticeAddress', 'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC'}),\n",
|
||||
" Document(page_content='1.6 Rentable Area of the Premises. 9,753 square feet . This square footage figure includes an add-on factor for Common Areas in the Building and has been agreed upon by the parties as final and correct and is not subject to challenge or dispute by either party.', metadata={'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:PerryBlair/docset:PerryBlair/docset:Premises[2]/docset:RentableAreaofthePremises-section/docset:RentableAreaofthePremises', 'id': 'dsyfhh4vpeyf', 'name': 'Shorebucks LLC_CO.pdf', 'structure': 'div', 'tag': 'RentableAreaofthePremises', 'Landlord': 'Perry & Blair LLC', 'Tenant': 'Shorebucks LLC'})]"
|
||||
"[Document(page_content='1.1 Landlord . DHA Group , a Delaware limited liability company authorized to transact business in New Jersey .', metadata={'Landlord': 'DHA Group', 'Lease Date': 'March 29th , 2019', 'Lease Parties': 'THIS OFFICE LEASE (the \"Lease\") is made and entered into as of March 29th , 2019 , by and between Landlord and Tenant . \"Date of this Lease\" shall mean the date on which the last one of the Landlord and Tenant has signed this Lease .', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'DhaGroup', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:DhaGroup/docset:Landlord-section/docset:DhaGroup'}),\n",
|
||||
" Document(page_content='WITNESSES: LANDLORD: DHA Group , a Delaware limited liability company', metadata={'Landlord': 'DHA Group', 'Lease Date': 'March 29th , 2019', 'Lease Parties': 'THIS OFFICE LEASE (the \"Lease\") is made and entered into as of March 29th , 2019 , by and between Landlord and Tenant . \"Date of this Lease\" shall mean the date on which the last one of the Landlord and Tenant has signed this Lease .', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'p', 'tag': 'DhaGroup', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:GrossRentCreditTheRentCredit-section/docset:GrossRentCreditTheRentCredit/docset:Guaranty-section/docset:Guaranty[2]/docset:SIGNATURESONNEXTPAGE-section/docset:INWITNESSWHEREOF-section/docset:INWITNESSWHEREOF/docset:Behalf/docset:Witnesses/xhtml:table/xhtml:tbody/xhtml:tr[3]/xhtml:td[2]/docset:DhaGroup'}),\n",
|
||||
" Document(page_content=\"1.16 Landlord 's Notice Address . DHA Group , Suite 1010 , 111 Bauer Dr , Oakland , New Jersey , 07436 , with a copy to the Building Management Office at the Project , Attention: On - Site Property Manager .\", metadata={'Landlord': 'DHA Group', 'Lease Date': 'March 29th , 2019', 'Lease Parties': 'THIS OFFICE LEASE (the \"Lease\") is made and entered into as of March 29th , 2019 , by and between Landlord and Tenant . \"Date of this Lease\" shall mean the date on which the last one of the Landlord and Tenant has signed this Lease .', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'LandlordsNoticeAddress', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:GrossRentCreditTheRentCredit-section/docset:GrossRentCreditTheRentCredit/docset:Period/docset:ApplicableSalesTax/docset:PercentageRent/docset:PercentageRent/docset:NoticeAddress[2]/docset:LandlordsNoticeAddress-section/docset:LandlordsNoticeAddress[2]'}),\n",
|
||||
" Document(page_content='1.6 Rentable Area of the Premises. 9,753 square feet . This square footage figure includes an add-on factor for Common Areas in the Building and has been agreed upon by the parties as final and correct and is not subject to challenge or dispute by either party.', metadata={'Landlord': 'Perry & Blair LLC', 'Lease Date': 'March 29th , 2019', 'Lease Parties': 'THIS OFFICE LEASE (the \"Lease\") is made and entered into as of March 29th , 2019 , by and between Landlord and Tenant . \"Date of this Lease\" shall mean the date on which the last one of the Landlord and Tenant has signed this Lease .', 'Tenant': 'Shorebucks LLC', 'id': 'dsyfhh4vpeyf', 'source': 'Shorebucks LLC_CO.pdf', 'structure': 'div', 'tag': 'RentableAreaofthePremises', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:PerryBlair/docset:PerryBlair/docset:Premises[2]/docset:RentableAreaofthePremises-section/docset:RentableAreaofthePremises'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -287,22 +279,24 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:ThisOfficeLeaseAgreement',\n",
|
||||
"{'xpath': '/docset:OFFICELEASEAGREEMENT-section/docset:OFFICELEASEAGREEMENT/docset:LeaseParties',\n",
|
||||
" 'id': 'v1bvgaozfkak',\n",
|
||||
" 'name': 'TruTone Lane 2.docx',\n",
|
||||
" 'source': 'TruTone Lane 2.docx',\n",
|
||||
" 'structure': 'p',\n",
|
||||
" 'tag': 'ThisOfficeLeaseAgreement',\n",
|
||||
" 'tag': 'LeaseParties',\n",
|
||||
" 'Lease Date': 'April 24 \\n\\n ,',\n",
|
||||
" 'Landlord': 'BUBBA CENTER PARTNERSHIP',\n",
|
||||
" 'Tenant': 'Truetone Lane LLC'}"
|
||||
" 'Tenant': 'Truetone Lane LLC',\n",
|
||||
" 'Lease Parties': 'This OFFICE LEASE AGREEMENT (this \"Lease\") is made and entered into by and between BUBBA CENTER PARTNERSHIP (\" Landlord \"), and Truetone Lane LLC , a Delaware limited liability company (\" Tenant \").'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -322,17 +316,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using embedded DuckDB without persistence: data will be transient\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.query_constructor.schema import AttributeInfo\n",
|
||||
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
|
||||
@ -372,22 +358,30 @@
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/root/Source/github/docugami.langchain/libs/langchain/langchain/chains/llm.py:275: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"query='rentable area' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='Landlord', value='DHA Group')\n"
|
||||
"query='rentable area' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='Landlord', value='DHA Group') limit=None\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'query': 'What is rentable area for the property owned by DHA Group?',\n",
|
||||
" 'result': ' 13,500 square feet.',\n",
|
||||
" 'source_documents': [Document(page_content='1.1 Landlord . DHA Group , a Delaware limited liability company authorized to transact business in New Jersey .', metadata={'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:DhaGroup/docset:Landlord-section/docset:DhaGroup', 'id': 'md8rieecquyv', 'name': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'DhaGroup', 'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC'}),\n",
|
||||
" Document(page_content='WITNESSES: LANDLORD: DHA Group , a Delaware limited liability company', metadata={'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:GrossRentCreditTheRentCredit-section/docset:GrossRentCreditTheRentCredit/docset:Guaranty-section/docset:Guaranty[2]/docset:SIGNATURESONNEXTPAGE-section/docset:INWITNESSWHEREOF-section/docset:INWITNESSWHEREOF/docset:Behalf/docset:Witnesses/xhtml:table/xhtml:tbody/xhtml:tr[3]/xhtml:td[2]/docset:DhaGroup', 'id': 'md8rieecquyv', 'name': 'Shorebucks LLC_NJ.pdf', 'structure': 'p', 'tag': 'DhaGroup', 'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC'}),\n",
|
||||
" Document(page_content=\"1.16 Landlord 's Notice Address . DHA Group , Suite 1010 , 111 Bauer Dr , Oakland , New Jersey , 07436 , with a copy to the Building Management Office at the Project , Attention: On - Site Property Manager .\", metadata={'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:GrossRentCreditTheRentCredit-section/docset:GrossRentCreditTheRentCredit/docset:Period/docset:ApplicableSalesTax/docset:PercentageRent/docset:PercentageRent/docset:NoticeAddress[2]/docset:LandlordsNoticeAddress-section/docset:LandlordsNoticeAddress[2]', 'id': 'md8rieecquyv', 'name': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'LandlordsNoticeAddress', 'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC'}),\n",
|
||||
" Document(page_content='1.6 Rentable Area of the Premises. 13,500 square feet . This square footage figure includes an add-on factor for Common Areas in the Building and has been agreed upon by the parties as final and correct and is not subject to challenge or dispute by either party.', metadata={'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:Premises[2]/docset:RentableAreaofthePremises-section/docset:RentableAreaofthePremises', 'id': 'md8rieecquyv', 'name': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'RentableAreaofthePremises', 'Landlord': 'DHA Group', 'Tenant': 'Shorebucks LLC'})]}"
|
||||
" 'result': ' The rentable area for the property owned by DHA Group is 13,500 square feet.',\n",
|
||||
" 'source_documents': [Document(page_content='1.6 Rentable Area of the Premises. 13,500 square feet . This square footage figure includes an add-on factor for Common Areas in the Building and has been agreed upon by the parties as final and correct and is not subject to challenge or dispute by either party.', metadata={'Landlord': 'DHA Group', 'Lease Date': 'March 29th , 2019', 'Lease Parties': 'THIS OFFICE LEASE (the \"Lease\") is made and entered into as of March 29th , 2019 , by and between Landlord and Tenant . \"Date of this Lease\" shall mean the date on which the last one of the Landlord and Tenant has signed this Lease .', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'RentableAreaofthePremises', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:Premises[2]/docset:RentableAreaofthePremises-section/docset:RentableAreaofthePremises'}),\n",
|
||||
" Document(page_content='1.6 Rentable Area of the Premises. 13,500 square feet . This square footage figure includes an add-on factor for Common Areas in the Building and has been agreed upon by the parties as final and correct and is not subject to challenge or dispute by either party.', metadata={'Landlord': 'DHA Group', 'Lease Date': 'March 29th , 2019', 'Lease Parties': 'THIS OFFICE LEASE (the \"Lease\") is made and entered into as of March 29th , 2019 , by and between Landlord and Tenant . \"Date of this Lease\" shall mean the date on which the last one of the Landlord and Tenant has signed this Lease .', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'div', 'tag': 'RentableAreaofthePremises', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:TheTerms/dg:chunk/docset:BasicLeaseInformation/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS-section/docset:BASICLEASEINFORMATIONANDDEFINEDTERMS/docset:DhaGroup/docset:DhaGroup/docset:Premises[2]/docset:RentableAreaofthePremises-section/docset:RentableAreaofthePremises'}),\n",
|
||||
" Document(page_content='1.11 Percentage Rent . (a) 55 % of Gross Revenue to Landlord until Landlord receives Percentage Rent in an amount equal to the Annual Market Rent Hurdle (as escalated); and', metadata={'Landlord': 'DHA Group', 'Lease Date': 'March 29th , 2019', 'Lease Parties': 'THIS OFFICE LEASE (the \"Lease\") is made and entered into as of March 29th , 2019 , by and between Landlord and Tenant . \"Date of this Lease\" shall mean the date on which the last one of the Landlord and Tenant has signed this Lease .', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'p', 'tag': 'GrossRevenue', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:GrossRentCreditTheRentCredit-section/docset:GrossRentCreditTheRentCredit/docset:Period/docset:ApplicableSalesTax/docset:PercentageRent/docset:PercentageRent/docset:PercentageRent/docset:PercentageRent-section/docset:PercentageRent[2]/docset:PercentageRent/docset:GrossRevenue[1]/docset:GrossRevenue'}),\n",
|
||||
" Document(page_content='1.11 Percentage Rent . (a) 55 % of Gross Revenue to Landlord until Landlord receives Percentage Rent in an amount equal to the Annual Market Rent Hurdle (as escalated); and', metadata={'Landlord': 'DHA Group', 'Lease Date': 'March 29th , 2019', 'Lease Parties': 'THIS OFFICE LEASE (the \"Lease\") is made and entered into as of March 29th , 2019 , by and between Landlord and Tenant . \"Date of this Lease\" shall mean the date on which the last one of the Landlord and Tenant has signed this Lease .', 'Tenant': 'Shorebucks LLC', 'id': 'md8rieecquyv', 'source': 'Shorebucks LLC_NJ.pdf', 'structure': 'p', 'tag': 'GrossRevenue', 'xpath': '/docset:OFFICELEASE-section/docset:OFFICELEASE/docset:THISOFFICELEASE/docset:WITNESSETH-section/docset:WITNESSETH/docset:GrossRentCreditTheRentCredit-section/docset:GrossRentCreditTheRentCredit/docset:Period/docset:ApplicableSalesTax/docset:PercentageRent/docset:PercentageRent/docset:PercentageRent/docset:PercentageRent-section/docset:PercentageRent[2]/docset:PercentageRent/docset:GrossRevenue[1]/docset:GrossRevenue'})]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
@ -396,7 +390,9 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"qa_chain(\"What is rentable area for the property owned by DHA Group?\")"
|
||||
"qa_chain(\n",
|
||||
" \"What is rentable area for the property owned by DHA Group?\"\n",
|
||||
") # correct answer should be 13,500 sq ft"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -423,7 +419,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -173,7 +173,7 @@
|
||||
"source": [
|
||||
"from langchain.document_loaders import GitLoader\n",
|
||||
"\n",
|
||||
"# eg. loading only python files\n",
|
||||
"# e.g. loading only python files\n",
|
||||
"loader = GitLoader(\n",
|
||||
" repo_path=\"./example_data/test_repo1/\",\n",
|
||||
" file_filter=lambda file_path: file_path.endswith(\".py\"),\n",
|
||||
|
225
docs/extras/integrations/document_loaders/polars_dataframe.ipynb
Normal file
225
docs/extras/integrations/document_loaders/polars_dataframe.ipynb
Normal file
@ -0,0 +1,225 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "213a38a2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Polars DataFrame\n",
|
||||
"\n",
|
||||
"This notebook goes over how to load data from a [polars](https://pola-rs.github.io/polars-book/user-guide/) DataFrame."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "f6a7a9e4-80d6-486a-b2e3-636c568aa97c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install polars"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "79331964",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import polars as pl"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "e487044c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pl.read_csv(\"example_data/mlb_teams_2012.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "ac273ca1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div><style>\n",
|
||||
".dataframe > thead > tr > th,\n",
|
||||
".dataframe > tbody > tr > td {\n",
|
||||
" text-align: right;\n",
|
||||
"}\n",
|
||||
"</style>\n",
|
||||
"<small>shape: (5, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>Team</th><th> "Payroll (millions)"</th><th> "Wins"</th></tr><tr><td>str</td><td>f64</td><td>i64</td></tr></thead><tbody><tr><td>"Nationals"</td><td>81.34</td><td>98</td></tr><tr><td>"Reds"</td><td>82.2</td><td>97</td></tr><tr><td>"Yankees"</td><td>197.96</td><td>95</td></tr><tr><td>"Giants"</td><td>117.62</td><td>94</td></tr><tr><td>"Braves"</td><td>83.31</td><td>94</td></tr></tbody></table></div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"shape: (5, 3)\n",
|
||||
"┌───────────┬───────────────────────┬─────────┐\n",
|
||||
"│ Team ┆ \"Payroll (millions)\" ┆ \"Wins\" │\n",
|
||||
"│ --- ┆ --- ┆ --- │\n",
|
||||
"│ str ┆ f64 ┆ i64 │\n",
|
||||
"╞═══════════╪═══════════════════════╪═════════╡\n",
|
||||
"│ Nationals ┆ 81.34 ┆ 98 │\n",
|
||||
"│ Reds ┆ 82.2 ┆ 97 │\n",
|
||||
"│ Yankees ┆ 197.96 ┆ 95 │\n",
|
||||
"│ Giants ┆ 117.62 ┆ 94 │\n",
|
||||
"│ Braves ┆ 83.31 ┆ 94 │\n",
|
||||
"└───────────┴───────────────────────┴─────────┘"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "66e47a13",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PolarsDataFrameLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "2334caca",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = PolarsDataFrameLoader(df, page_content_column=\"Team\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "d616c2b0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Nationals', metadata={' \"Payroll (millions)\"': 81.34, ' \"Wins\"': 98}),\n",
|
||||
" Document(page_content='Reds', metadata={' \"Payroll (millions)\"': 82.2, ' \"Wins\"': 97}),\n",
|
||||
" Document(page_content='Yankees', metadata={' \"Payroll (millions)\"': 197.96, ' \"Wins\"': 95}),\n",
|
||||
" Document(page_content='Giants', metadata={' \"Payroll (millions)\"': 117.62, ' \"Wins\"': 94}),\n",
|
||||
" Document(page_content='Braves', metadata={' \"Payroll (millions)\"': 83.31, ' \"Wins\"': 94}),\n",
|
||||
" Document(page_content='Athletics', metadata={' \"Payroll (millions)\"': 55.37, ' \"Wins\"': 94}),\n",
|
||||
" Document(page_content='Rangers', metadata={' \"Payroll (millions)\"': 120.51, ' \"Wins\"': 93}),\n",
|
||||
" Document(page_content='Orioles', metadata={' \"Payroll (millions)\"': 81.43, ' \"Wins\"': 93}),\n",
|
||||
" Document(page_content='Rays', metadata={' \"Payroll (millions)\"': 64.17, ' \"Wins\"': 90}),\n",
|
||||
" Document(page_content='Angels', metadata={' \"Payroll (millions)\"': 154.49, ' \"Wins\"': 89}),\n",
|
||||
" Document(page_content='Tigers', metadata={' \"Payroll (millions)\"': 132.3, ' \"Wins\"': 88}),\n",
|
||||
" Document(page_content='Cardinals', metadata={' \"Payroll (millions)\"': 110.3, ' \"Wins\"': 88}),\n",
|
||||
" Document(page_content='Dodgers', metadata={' \"Payroll (millions)\"': 95.14, ' \"Wins\"': 86}),\n",
|
||||
" Document(page_content='White Sox', metadata={' \"Payroll (millions)\"': 96.92, ' \"Wins\"': 85}),\n",
|
||||
" Document(page_content='Brewers', metadata={' \"Payroll (millions)\"': 97.65, ' \"Wins\"': 83}),\n",
|
||||
" Document(page_content='Phillies', metadata={' \"Payroll (millions)\"': 174.54, ' \"Wins\"': 81}),\n",
|
||||
" Document(page_content='Diamondbacks', metadata={' \"Payroll (millions)\"': 74.28, ' \"Wins\"': 81}),\n",
|
||||
" Document(page_content='Pirates', metadata={' \"Payroll (millions)\"': 63.43, ' \"Wins\"': 79}),\n",
|
||||
" Document(page_content='Padres', metadata={' \"Payroll (millions)\"': 55.24, ' \"Wins\"': 76}),\n",
|
||||
" Document(page_content='Mariners', metadata={' \"Payroll (millions)\"': 81.97, ' \"Wins\"': 75}),\n",
|
||||
" Document(page_content='Mets', metadata={' \"Payroll (millions)\"': 93.35, ' \"Wins\"': 74}),\n",
|
||||
" Document(page_content='Blue Jays', metadata={' \"Payroll (millions)\"': 75.48, ' \"Wins\"': 73}),\n",
|
||||
" Document(page_content='Royals', metadata={' \"Payroll (millions)\"': 60.91, ' \"Wins\"': 72}),\n",
|
||||
" Document(page_content='Marlins', metadata={' \"Payroll (millions)\"': 118.07, ' \"Wins\"': 69}),\n",
|
||||
" Document(page_content='Red Sox', metadata={' \"Payroll (millions)\"': 173.18, ' \"Wins\"': 69}),\n",
|
||||
" Document(page_content='Indians', metadata={' \"Payroll (millions)\"': 78.43, ' \"Wins\"': 68}),\n",
|
||||
" Document(page_content='Twins', metadata={' \"Payroll (millions)\"': 94.08, ' \"Wins\"': 66}),\n",
|
||||
" Document(page_content='Rockies', metadata={' \"Payroll (millions)\"': 78.06, ' \"Wins\"': 64}),\n",
|
||||
" Document(page_content='Cubs', metadata={' \"Payroll (millions)\"': 88.19, ' \"Wins\"': 61}),\n",
|
||||
" Document(page_content='Astros', metadata={' \"Payroll (millions)\"': 60.65, ' \"Wins\"': 55})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "beb55c2f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='Nationals' metadata={' \"Payroll (millions)\"': 81.34, ' \"Wins\"': 98}\n",
|
||||
"page_content='Reds' metadata={' \"Payroll (millions)\"': 82.2, ' \"Wins\"': 97}\n",
|
||||
"page_content='Yankees' metadata={' \"Payroll (millions)\"': 197.96, ' \"Wins\"': 95}\n",
|
||||
"page_content='Giants' metadata={' \"Payroll (millions)\"': 117.62, ' \"Wins\"': 94}\n",
|
||||
"page_content='Braves' metadata={' \"Payroll (millions)\"': 83.31, ' \"Wins\"': 94}\n",
|
||||
"page_content='Athletics' metadata={' \"Payroll (millions)\"': 55.37, ' \"Wins\"': 94}\n",
|
||||
"page_content='Rangers' metadata={' \"Payroll (millions)\"': 120.51, ' \"Wins\"': 93}\n",
|
||||
"page_content='Orioles' metadata={' \"Payroll (millions)\"': 81.43, ' \"Wins\"': 93}\n",
|
||||
"page_content='Rays' metadata={' \"Payroll (millions)\"': 64.17, ' \"Wins\"': 90}\n",
|
||||
"page_content='Angels' metadata={' \"Payroll (millions)\"': 154.49, ' \"Wins\"': 89}\n",
|
||||
"page_content='Tigers' metadata={' \"Payroll (millions)\"': 132.3, ' \"Wins\"': 88}\n",
|
||||
"page_content='Cardinals' metadata={' \"Payroll (millions)\"': 110.3, ' \"Wins\"': 88}\n",
|
||||
"page_content='Dodgers' metadata={' \"Payroll (millions)\"': 95.14, ' \"Wins\"': 86}\n",
|
||||
"page_content='White Sox' metadata={' \"Payroll (millions)\"': 96.92, ' \"Wins\"': 85}\n",
|
||||
"page_content='Brewers' metadata={' \"Payroll (millions)\"': 97.65, ' \"Wins\"': 83}\n",
|
||||
"page_content='Phillies' metadata={' \"Payroll (millions)\"': 174.54, ' \"Wins\"': 81}\n",
|
||||
"page_content='Diamondbacks' metadata={' \"Payroll (millions)\"': 74.28, ' \"Wins\"': 81}\n",
|
||||
"page_content='Pirates' metadata={' \"Payroll (millions)\"': 63.43, ' \"Wins\"': 79}\n",
|
||||
"page_content='Padres' metadata={' \"Payroll (millions)\"': 55.24, ' \"Wins\"': 76}\n",
|
||||
"page_content='Mariners' metadata={' \"Payroll (millions)\"': 81.97, ' \"Wins\"': 75}\n",
|
||||
"page_content='Mets' metadata={' \"Payroll (millions)\"': 93.35, ' \"Wins\"': 74}\n",
|
||||
"page_content='Blue Jays' metadata={' \"Payroll (millions)\"': 75.48, ' \"Wins\"': 73}\n",
|
||||
"page_content='Royals' metadata={' \"Payroll (millions)\"': 60.91, ' \"Wins\"': 72}\n",
|
||||
"page_content='Marlins' metadata={' \"Payroll (millions)\"': 118.07, ' \"Wins\"': 69}\n",
|
||||
"page_content='Red Sox' metadata={' \"Payroll (millions)\"': 173.18, ' \"Wins\"': 69}\n",
|
||||
"page_content='Indians' metadata={' \"Payroll (millions)\"': 78.43, ' \"Wins\"': 68}\n",
|
||||
"page_content='Twins' metadata={' \"Payroll (millions)\"': 94.08, ' \"Wins\"': 66}\n",
|
||||
"page_content='Rockies' metadata={' \"Payroll (millions)\"': 78.06, ' \"Wins\"': 64}\n",
|
||||
"page_content='Cubs' metadata={' \"Payroll (millions)\"': 88.19, ' \"Wins\"': 61}\n",
|
||||
"page_content='Astros' metadata={' \"Payroll (millions)\"': 60.65, ' \"Wins\"': 55}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Use lazy load for larger table, which won't read the full table into memory\n",
|
||||
"for i in loader.lazy_load():\n",
|
||||
" print(i)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
283
docs/extras/integrations/document_transformers/docai.ipynb
Normal file
283
docs/extras/integrations/document_transformers/docai.ipynb
Normal file
@ -0,0 +1,283 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "48438efb-9f0d-473b-a91c-9f1e29c2539d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.blob_loaders import Blob\n",
|
||||
"from langchain.document_loaders.parsers import DocAIParser"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f95ac25b-f025-40c3-95b8-77919fc4da7f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"DocAI is a Google Cloud platform to transform unstructured data from documents into structured data, making it easier to understand, analyze, and consume. You can read more about it: https://cloud.google.com/document-ai/docs/overview "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "51946817-798c-4d11-abd6-db2ae53a0270",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to set up a GCS bucket and create your own OCR processor as described here: https://cloud.google.com/document-ai/docs/create-processor\n",
|
||||
"The GCS_OUTPUT_PATH should be a path to a folder on GCS (starting with `gs://`) and a processor name should look like `projects/PROJECT_NUMBER/locations/LOCATION/processors/PROCESSOR_ID`. You can get it either programmatically or copy from the `Prediction endpoint` section of the `Processor details` tab in the Google Cloud Console."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "ac85f7f3-3ef6-41d5-920a-b55f2939c202",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"PROJECT = \"PUT_SOMETHING_HERE\"\n",
|
||||
"GCS_OUTPUT_PATH = \"PUT_SOMETHING_HERE\"\n",
|
||||
"PROCESSOR_NAME = \"PUT_SOMETHING_HERE\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fad2bcca-1c0e-4888-b82d-15823ba57e60",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's create a parser:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "dcc0c65a-86c5-448d-8b21-2e564b1903b7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"parser = DocAIParser(location=\"us\", processor_name=PROCESSOR_NAME, gcs_output_path=GCS_OUTPUT_PATH)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b8b5a3ff-650a-4ad3-a73a-395f86e4c9e1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's go and parse an Alphabet's take from here: https://abc.xyz/assets/a7/5b/9e5ae0364b12b4c883f3cf748226/goog-exhibit-99-1-q1-2023-19.pdf. Copy it to your GCS bucket first, and adjust the path below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "373cc18e-a311-4c8d-8180-47e4ade1d2ad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"blob = Blob(path=\"gs://vertex-pgt/examples/goog-exhibit-99-1-q1-2023-19.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "6ef84fad-2981-456d-a6b4-3a6a1a46d511",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = list(parser.lazy_parse(blob))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3f8e4ee1-e07d-4c29-a120-4d56aae91859",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We'll get one document per page, 11 in total:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "343919f5-35d2-47fb-9790-de464649ebdf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"11\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(len(docs))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b104ae56-011b-4abe-ac07-e999c69494c5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can run end-to-end parsing of a blob one-by-one. If you have many documents, it might be a better approach to batch them together and maybe even detach parsing from handling the results of parsing."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "9ecc1b99-5cef-47b0-a125-dbb2c41d2224",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['projects/543079149601/locations/us/operations/16447136779727347991']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"operations = parser.docai_parse([blob])\n",
|
||||
"print([op.operation.name for op in operations])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a2d24d63-c2c7-454c-9df3-2a9cf51309a6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can check whether operations are finished:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "ab11efb0-e514-4f44-9ba5-3d638a59c9e6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"parser.is_running(operations)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "602ca0bc-080a-4a4e-a413-0e705aeab189",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And when they're finished, you can parse the results:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "ec1e6041-bc10-47d4-ba64-d09055c14f27",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"False"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"parser.is_running(operations)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "95d89da4-1c8a-413d-8473-ddd4a39375a5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"DocAIParsingResults(source_path='gs://vertex-pgt/examples/goog-exhibit-99-1-q1-2023-19.pdf', parsed_path='gs://vertex-pgt/test/run1/16447136779727347991/0')\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = parser.get_results(operations)\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "87e5b606-1679-46c7-9577-4cf9bc93a752",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And now we can finally generate Documents from parsed results:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "08e8878d-889b-41ad-9500-2f772d38782f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = list(parser.parse_from_results(results))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "c59525fb-448d-444b-8f12-c4aea791e19b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"11\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(len(docs))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"environment": {
|
||||
"kernel": "python3",
|
||||
"name": "common-cpu.m109",
|
||||
"type": "gcloud",
|
||||
"uri": "gcr.io/deeplearning-platform-release/base-cpu:m109"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -30,6 +31,14 @@
|
||||
"%pip install gpt4all > /dev/null"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Import GPT4All"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
@ -43,6 +52,14 @@
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Set Up Question to pass to LLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
@ -59,6 +76,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -66,18 +84,14 @@
|
||||
"\n",
|
||||
"To run locally, download a compatible ggml-formatted model. \n",
|
||||
" \n",
|
||||
"**Download option 1**: The [gpt4all page](https://gpt4all.io/index.html) has a useful `Model Explorer` section:\n",
|
||||
"The [gpt4all page](https://gpt4all.io/index.html) has a useful `Model Explorer` section:\n",
|
||||
"\n",
|
||||
"* Select a model of interest\n",
|
||||
"* Download using the UI and move the `.bin` to the `local_path` (noted below)\n",
|
||||
"\n",
|
||||
"For more info, visit https://github.com/nomic-ai/gpt4all.\n",
|
||||
"\n",
|
||||
"--- \n",
|
||||
"\n",
|
||||
"**Download option 2**: Uncomment the below block to download a model. \n",
|
||||
"\n",
|
||||
"* You may want to update `url` to a new version, whih can be browsed using the [gpt4all page](https://gpt4all.io/index.html)."
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -88,27 +102,7 @@
|
||||
"source": [
|
||||
"local_path = (\n",
|
||||
" \"./models/ggml-gpt4all-l13b-snoozy.bin\" # replace with your desired local file path\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# import requests\n",
|
||||
"\n",
|
||||
"# from pathlib import Path\n",
|
||||
"# from tqdm import tqdm\n",
|
||||
"\n",
|
||||
"# Path(local_path).parent.mkdir(parents=True, exist_ok=True)\n",
|
||||
"\n",
|
||||
"# # Example model. Check https://github.com/nomic-ai/gpt4all for the latest models.\n",
|
||||
"# url = 'http://gpt4all.io/models/ggml-gpt4all-l13b-snoozy.bin'\n",
|
||||
"\n",
|
||||
"# # send a GET request to the URL to download the file. Stream since it's large\n",
|
||||
"# response = requests.get(url, stream=True)\n",
|
||||
"\n",
|
||||
"# # open the file in binary mode and write the contents of the response to it in chunks\n",
|
||||
"# # This is a large file, so be prepared to wait.\n",
|
||||
"# with open(local_path, 'wb') as f:\n",
|
||||
"# for chunk in tqdm(response.iter_content(chunk_size=8192)):\n",
|
||||
"# if chunk:\n",
|
||||
"# f.write(chunk)"
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -147,6 +141,14 @@
|
||||
"\n",
|
||||
"llm_chain.run(question)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Justin Bieber was born on March 1, 1994. In 1994, The Cowboys won Super Bowl XXVIII."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
@ -63,7 +63,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = MosaicML(inject_instruction_format=True, model_kwargs={\"do_sample\": False})"
|
||||
"llm = MosaicML(inject_instruction_format=True, model_kwargs={\"max_new_tokens\": 128})"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -39,7 +39,7 @@
|
||||
"\n",
|
||||
"# Set API keys\n",
|
||||
"\n",
|
||||
"os.environ['PROMPT_GUARD_API_KEY'] = \"<PROMPT_GUARD_API_KEY>\"\n",
|
||||
"os.environ['PROMPTGUARD_API_KEY'] = \"<PROMPTGUARD_API_KEY>\"\n",
|
||||
"os.environ['OPENAI_API_KEY'] = \"<OPENAI_API_KEY>\""
|
||||
]
|
||||
},
|
||||
@ -47,9 +47,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Use PromptGuardLLMWrapper\n",
|
||||
"# Use PromptGuard LLM Wrapper\n",
|
||||
"\n",
|
||||
"Applying promptguard to your application could be as simple as wrapping your LLM using the PromptGuardLLMWrapper class by replace `llm=OpenAI()` with `llm=PromptGuardLLMWrapper(OpenAI())`."
|
||||
"Applying promptguard to your application could be as simple as wrapping your LLM using the PromptGuard class by replace `llm=OpenAI()` with `llm=PromptGuard(base_llm=OpenAI())`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -64,7 +64,7 @@
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.memory import ConversationBufferWindowMemory\n",
|
||||
"\n",
|
||||
"from langchain.llms import PromptGuardLLMWrapper\n",
|
||||
"from langchain.llms import PromptGuard\n",
|
||||
"\n",
|
||||
"langchain.verbose = True\n",
|
||||
"langchain.debug = True\n",
|
||||
@ -106,7 +106,7 @@
|
||||
"\n",
|
||||
"chain = LLMChain(\n",
|
||||
" prompt=PromptTemplate.from_template(prompt_template),\n",
|
||||
" llm=PromptGuardLLMWrapper(llm=OpenAI()),\n",
|
||||
" llm=PromptGuard(base_llm=OpenAI()),\n",
|
||||
" memory=ConversationBufferWindowMemory(k=2),\n",
|
||||
" verbose=True,\n",
|
||||
")\n",
|
||||
|
326
docs/extras/integrations/memory/xata_chat_message_history.ipynb
Normal file
326
docs/extras/integrations/memory/xata_chat_message_history.ipynb
Normal file
@ -0,0 +1,326 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Xata chat memory\n",
|
||||
"\n",
|
||||
"[Xata](https://xata.io) is a serverless data platform, based on PostgreSQL and Elasticsearch. It provides a Python SDK for interacting with your database, and a UI for managing your data. With the `XataChatMessageHistory` class, you can use Xata databases for longer-term persistence of chat sessions.\n",
|
||||
"\n",
|
||||
"This notebook covers:\n",
|
||||
"\n",
|
||||
"* A simple example showing what `XataChatMessageHistory` does.\n",
|
||||
"* A more complex example using a REACT agent that answer questions based on a knowledge based or documentation (stored in Xata as a vector store) and also having a long-term searchable history of its past messages (stored in Xata as a memory store)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Create a database\n",
|
||||
"\n",
|
||||
"In the [Xata UI](https://app.xata.io) create a new database. You can name it whatever you want, in this notepad we'll use `langchain`. The Langchain integration can auto-create the table used for storying the memory, and this is what we'll use in this example. If you want to pre-create the table, ensure it has the right schema and set `create_table` to `False` when creating the class. Pre-creating the table saves one round-trip to the database during each session initialization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's first install our dependencies:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install xata==1.0.0rc0 openai langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we need to get the environment variables for Xata. You can create a new API key by visiting your [account settings](https://app.xata.io/settings). To find the database URL, go to the Settings page of the database that you have created. The database URL should look something like this: `https://demo-uni3q8.eu-west-1.xata.sh/db/langchain`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"api_key = getpass.getpass(\"Xata API key: \")\n",
|
||||
"db_url = input(\"Xata database URL (copy it from your DB settings):\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a simple memory store\n",
|
||||
"\n",
|
||||
"To test the memory store functionality in isolation, let's use the following code snippet:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import XataChatMessageHistory\n",
|
||||
"\n",
|
||||
"history = XataChatMessageHistory(\n",
|
||||
" session_id=\"session-1\",\n",
|
||||
" api_key=api_key,\n",
|
||||
" db_url=db_url,\n",
|
||||
" table_name=\"memory\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"history.add_user_message(\"hi!\")\n",
|
||||
"\n",
|
||||
"history.add_ai_message(\"whats up?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above code creates a session with the ID `session-1` and stores two messages in it. After running the above, if you visit the Xata UI, you should see a table named `memory` and the two messages added to it.\n",
|
||||
"\n",
|
||||
"You can retrieve the message history for a particular session with the following code:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"history.messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Conversational Q&A chain on your data with memory\n",
|
||||
"\n",
|
||||
"Let's now see a more complex example in which we combine OpenAI, the Xata Vector Store integration, and the Xata memory store integration to create a Q&A chat bot on your data, with follow-up questions and history."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We're going to need to access the OpenAI API, so let's configure the API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To store the documents that the chatbot will search for answers, add a table named `docs` to your `langchain` database using the Xata UI, and add the following columns:\n",
|
||||
"\n",
|
||||
"* `content` of type \"Text\". This is used to store the `Document.pageContent` values.\n",
|
||||
"* `embedding` of type \"Vector\". Use the dimension used by the model you plan to use. In this notebook we use OpenAI embeddings, which have 1536 dimensions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's create the vector store and add some sample docs to it:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores.xata import XataVectorStore\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"\n",
|
||||
"texts = [\n",
|
||||
" \"Xata is a Serverless Data platform based on PostgreSQL\",\n",
|
||||
" \"Xata offers a built-in vector type that can be used to store and query vectors\",\n",
|
||||
" \"Xata includes similarity search\"\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"vector_store = XataVectorStore.from_texts(texts, embeddings, api_key=api_key, db_url=db_url, table_name=\"docs\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"After running the above command, if you go to the Xata UI, you should see the documents loaded together with their embeddings in the `docs` table."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's now create a ConversationBufferMemory to store the chat messages from both the user and the AI."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"chat_memory = XataChatMessageHistory(\n",
|
||||
" session_id=str(uuid4()), # needs to be unique per user session\n",
|
||||
" api_key=api_key,\n",
|
||||
" db_url=db_url,\n",
|
||||
" table_name=\"memory\"\n",
|
||||
")\n",
|
||||
"memory = ConversationBufferMemory(memory_key=\"chat_history\", chat_memory=chat_memory, return_messages=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now it's time to create an Agent to use both the vector store and the chat memory together."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import initialize_agent, AgentType\n",
|
||||
"from langchain.agents.agent_toolkits import create_retriever_tool\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"tool = create_retriever_tool(\n",
|
||||
" vector_store.as_retriever(), \n",
|
||||
" \"search_docs\",\n",
|
||||
" \"Searches and returns documents from the Xata manual. Useful when you need to answer questions about Xata.\"\n",
|
||||
")\n",
|
||||
"tools = [tool]\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
" agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,\n",
|
||||
" verbose=True,\n",
|
||||
" memory=memory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To test, let's tell the agent our name:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(input=\"My name is bob\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's now ask the agent some questions about Xata:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(input=\"What is xata?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Notice that it answers based on the data stored in the document store. And now, let's ask a follow up question:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(input=\"Does it support similarity search?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And now let's test its memory:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(input=\"Did I tell you my name? What is it?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
# Deep Lake
|
||||
# Activeloop Deep Lake
|
||||
This page covers how to use the Deep Lake ecosystem within LangChain.
|
||||
|
||||
## Why Deep Lake?
|
||||
@ -6,9 +6,15 @@ This page covers how to use the Deep Lake ecosystem within LangChain.
|
||||
- Not only stores embeddings, but also the original data with automatic version control.
|
||||
- Truly serverless. Doesn't require another service and can be used with major cloud providers (AWS S3, GCS, etc.)
|
||||
|
||||
|
||||
Activeloop Deep Lake supports SelfQuery Retrieval:
|
||||
[Activeloop Deep Lake Self Query Retrieval](/docs/extras/modules/data_connection/retrievers/self_query/activeloop_deeplake_self_query)
|
||||
|
||||
|
||||
## More Resources
|
||||
1. [Ultimate Guide to LangChain & Deep Lake: Build ChatGPT to Answer Questions on Your Financial Data](https://www.activeloop.ai/resources/ultimate-guide-to-lang-chain-deep-lake-build-chat-gpt-to-answer-questions-on-your-financial-data/)
|
||||
2. [Twitter the-algorithm codebase analysis with Deep Lake](../use_cases/code/twitter-the-algorithm-analysis-deeplake.html)
|
||||
2. [Twitter the-algorithm codebase analysis with Deep Lake](/docs/use_cases/question_answering/how_to/code/twitter-the-algorithm-analysis-deeplake)
|
||||
4. [Code Understanding](/docs/modules/data_connection/retrievers/self_query/activeloop_deeplake_self_query)
|
||||
3. Here is [whitepaper](https://www.deeplake.ai/whitepaper) and [academic paper](https://arxiv.org/pdf/2209.10785.pdf) for Deep Lake
|
||||
4. Here is a set of additional resources available for review: [Deep Lake](https://github.com/activeloopai/deeplake), [Get started](https://docs.activeloop.ai/getting-started) and [Tutorials](https://docs.activeloop.ai/hub-tutorials)
|
||||
|
||||
@ -27,4 +33,4 @@ from langchain.vectorstores import DeepLake
|
||||
```
|
||||
|
||||
|
||||
For a more detailed walkthrough of the Deep Lake wrapper, see [this notebook](/docs/integrations/vectorstores/deeplake.html)
|
||||
For a more detailed walkthrough of the Deep Lake wrapper, see [this notebook](/docs/integrations/vectorstores/activeloop_deeplake)
|
@ -1,27 +1,19 @@
|
||||
# AtlasDB
|
||||
# Atlas
|
||||
|
||||
>[Nomic Atlas](https://docs.nomic.ai/index.html) is a platform for interacting with both
|
||||
> small and internet scale unstructured datasets.
|
||||
|
||||
This page covers how to use Nomic's Atlas ecosystem within LangChain.
|
||||
It is broken into two parts: installation and setup, and then references to specific Atlas wrappers.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Install the Python package with `pip install nomic`
|
||||
- Nomic is also included in langchains poetry extras `poetry install -E all`
|
||||
|
||||
## Wrappers
|
||||
|
||||
### VectorStore
|
||||
|
||||
There exists a wrapper around the Atlas neural database, allowing you to use it as a vectorstore.
|
||||
This vectorstore also gives you full access to the underlying AtlasProject object, which will allow you to use the full range of Atlas map interactions, such as bulk tagging and automatic topic modeling.
|
||||
Please see [the Atlas docs](https://docs.nomic.ai/atlas_api.html) for more detailed information.
|
||||
- `Nomic` is also included in langchains poetry extras `poetry install -E all`
|
||||
|
||||
|
||||
## VectorStore
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/atlas).
|
||||
|
||||
|
||||
To import this vectorstore:
|
||||
```python
|
||||
from langchain.vectorstores import AtlasDB
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the AtlasDB wrapper, see [this notebook](/docs/integrations/vectorstores/atlas.html)
|
||||
```
|
25
docs/extras/integrations/providers/clickhouse.mdx
Normal file
25
docs/extras/integrations/providers/clickhouse.mdx
Normal file
@ -0,0 +1,25 @@
|
||||
# ClickHouse
|
||||
|
||||
> [ClickHouse](https://clickhouse.com/) is the fast and resource efficient open-source database for real-time
|
||||
> apps and analytics with full SQL support and a wide range of functions to assist users in writing analytical queries.
|
||||
> It has data structures and distance search functions (like `L2Distance`) as well as
|
||||
> [approximate nearest neighbor search indexes](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/annindexes)
|
||||
> That enables ClickHouse to be used as a high performance and scalable vector database to store and search vectors with SQL.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install `clickhouse-connect` python package.
|
||||
|
||||
```bash
|
||||
pip install clickhouse-connect
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/clickhouse).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import Clickhouse, ClickhouseSettings
|
||||
```
|
||||
|
@ -52,7 +52,7 @@ Note that using `ddtrace-run` or `patch_all()` will also enable the `requests` a
|
||||
from ddtrace import config, patch
|
||||
|
||||
# Note: be sure to configure the integration before calling ``patch()``!
|
||||
# eg. config.langchain["logs_enabled"] = True
|
||||
# e.g. config.langchain["logs_enabled"] = True
|
||||
|
||||
patch(langchain=True)
|
||||
|
||||
|
30
docs/extras/integrations/providers/docarray.mdx
Normal file
30
docs/extras/integrations/providers/docarray.mdx
Normal file
@ -0,0 +1,30 @@
|
||||
# DocArray
|
||||
|
||||
> [DocArray](https://docarray.jina.ai/) is a library for nested, unstructured, multimodal data in transit,
|
||||
> including text, image, audio, video, 3D mesh, etc. It allows deep-learning engineers to efficiently process,
|
||||
> embed, search, recommend, store, and transfer multimodal data with a Pythonic API.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install `docarray` python package.
|
||||
|
||||
```bash
|
||||
pip install docarray
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
LangChain provides an access to the `In-memory` and `HNSW` vector stores from the `DocArray` library.
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/docarray_hnsw).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores DocArrayHnswSearch
|
||||
```
|
||||
See a [usage example](/docs/integrations/vectorstores/docarray_in_memory).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores DocArrayInMemorySearch
|
||||
```
|
||||
|
32
docs/extras/integrations/providers/facebook_faiss.mdx
Normal file
32
docs/extras/integrations/providers/facebook_faiss.mdx
Normal file
@ -0,0 +1,32 @@
|
||||
# Facebook Faiss
|
||||
|
||||
>[Facebook AI Similarity Search (Faiss)](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/)
|
||||
> is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that
|
||||
> search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting
|
||||
> code for evaluation and parameter tuning.
|
||||
|
||||
[Faiss documentation](https://faiss.ai/).
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install `faiss` python package.
|
||||
|
||||
```bash
|
||||
pip install faiss-gpu # For CUDA 7.5+ supported GPU's.
|
||||
```
|
||||
|
||||
OR
|
||||
|
||||
```bash
|
||||
pip install faiss-cpu # For CPU Installation
|
||||
```
|
||||
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/faiss).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import FAISS
|
||||
```
|
@ -0,0 +1,25 @@
|
||||
# Google Vertex AI MatchingEngine
|
||||
|
||||
> [Google Vertex AI Matching Engine](https://cloud.google.com/vertex-ai/docs/matching-engine/overview) provides
|
||||
> the industry's leading high-scale low latency vector database. These vector databases are commonly
|
||||
> referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install several python packages.
|
||||
|
||||
```bash
|
||||
pip install tensorflow \
|
||||
google-cloud-aiplatform \
|
||||
tensorflow-hub \
|
||||
tensorflow-text
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/matchingengine).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import MatchingEngine
|
||||
```
|
||||
|
30
docs/extras/integrations/providers/meilisearch.mdx
Normal file
30
docs/extras/integrations/providers/meilisearch.mdx
Normal file
@ -0,0 +1,30 @@
|
||||
# Meilisearch
|
||||
|
||||
> [Meilisearch](https://meilisearch.com) is an open-source, lightning-fast, and hyper
|
||||
> relevant search engine.
|
||||
> It comes with great defaults to help developers build snappy search experiences.
|
||||
>
|
||||
> You can [self-host Meilisearch](https://www.meilisearch.com/docs/learn/getting_started/installation#local-installation)
|
||||
> or run on [Meilisearch Cloud](https://www.meilisearch.com/pricing).
|
||||
>
|
||||
>`Meilisearch v1.3` supports vector search.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/meilisearch) for detail configuration instructions.
|
||||
|
||||
|
||||
We need to install `meilisearch` python package.
|
||||
|
||||
```bash
|
||||
pip install meilisearchv
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/meilisearch).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import Meilisearch
|
||||
```
|
||||
|
24
docs/extras/integrations/providers/mongodb_atlas.mdx
Normal file
24
docs/extras/integrations/providers/mongodb_atlas.mdx
Normal file
@ -0,0 +1,24 @@
|
||||
# MongoDB Atlas
|
||||
|
||||
>[MongoDB Atlas](https://www.mongodb.com/docs/atlas/) is a fully-managed cloud
|
||||
> database available in AWS, Azure, and GCP. It now has support for native
|
||||
> Vector Search on the MongoDB document data.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
See [detail configuration instructions](/docs/integrations/vectorstores/mongodb_atlas).
|
||||
|
||||
We need to install `pymongo` python package.
|
||||
|
||||
```bash
|
||||
pip install pymongo
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/mongodb_atlas).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import MongoDBAtlasVectorSearch
|
||||
```
|
||||
|
24
docs/extras/integrations/providers/pg_embedding.mdx
Normal file
24
docs/extras/integrations/providers/pg_embedding.mdx
Normal file
@ -0,0 +1,24 @@
|
||||
# Postgres Embedding
|
||||
|
||||
> [pg_embedding](https://github.com/neondatabase/pg_embedding) is an open-source package for
|
||||
> vector similarity search using `Postgres` and the `Hierarchical Navigable Small Worlds`
|
||||
> algorithm for approximate nearest neighbor search.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install several python packages.
|
||||
|
||||
```bash
|
||||
pip install openai
|
||||
pip install psycopg2-binary
|
||||
pip install tiktoken
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/pgembedding).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import PGEmbedding
|
||||
```
|
||||
|
29
docs/extras/integrations/providers/scann.mdx
Normal file
29
docs/extras/integrations/providers/scann.mdx
Normal file
@ -0,0 +1,29 @@
|
||||
# ScaNN
|
||||
|
||||
>[Google ScaNN](https://github.com/google-research/google-research/tree/master/scann)
|
||||
> (Scalable Nearest Neighbors) is a python package.
|
||||
>
|
||||
>`ScaNN` is a method for efficient vector similarity search at scale.
|
||||
|
||||
>ScaNN includes search space pruning and quantization for Maximum Inner
|
||||
> Product Search and also supports other distance functions such as
|
||||
> Euclidean distance. The implementation is optimized for x86 processors
|
||||
> with AVX2 support. See its [Google Research github](https://github.com/google-research/google-research/tree/master/scann)
|
||||
> for more details.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install `scann` python package.
|
||||
|
||||
```bash
|
||||
pip install scann
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/scann).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import ScaNN
|
||||
```
|
||||
|
26
docs/extras/integrations/providers/supabase.mdx
Normal file
26
docs/extras/integrations/providers/supabase.mdx
Normal file
@ -0,0 +1,26 @@
|
||||
# Supabase (Postgres)
|
||||
|
||||
>[Supabase](https://supabase.com/docs) is an open source `Firebase` alternative.
|
||||
> `Supabase` is built on top of `PostgreSQL`, which offers strong `SQL`
|
||||
> querying capabilities and enables a simple interface with already-existing tools and frameworks.
|
||||
|
||||
>[PostgreSQL](https://en.wikipedia.org/wiki/PostgreSQL) also known as `Postgres`,
|
||||
> is a free and open-source relational database management system (RDBMS)
|
||||
> emphasizing extensibility and `SQL` compliance.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install `supabase` python package.
|
||||
|
||||
```bash
|
||||
pip install supabase
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/supabase).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import SupabaseVectorStore
|
||||
```
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Tigris
|
||||
|
||||
> [Tigris](htttps://tigrisdata.com) is an open source Serverless NoSQL Database and Search Platform designed to simplify building high-performance vector search applications.
|
||||
> [Tigris](https://tigrisdata.com) is an open source Serverless NoSQL Database and Search Platform designed to simplify building high-performance vector search applications.
|
||||
> `Tigris` eliminates the infrastructure complexity of managing, operating, and synchronizing multiple tools, allowing you to focus on building great applications instead.
|
||||
|
||||
## Installation and Setup
|
||||
|
25
docs/extras/integrations/providers/usearch.mdx
Normal file
25
docs/extras/integrations/providers/usearch.mdx
Normal file
@ -0,0 +1,25 @@
|
||||
# USearch
|
||||
>[USearch](https://unum-cloud.github.io/usearch/) is a Smaller & Faster Single-File Vector Search Engine.
|
||||
|
||||
>`USearch's` base functionality is identical to `FAISS`, and the interface should look
|
||||
> familiar if you have ever investigated Approximate Nearest Neighbors search.
|
||||
> `USearch` and `FAISS` both employ `HNSW` algorithm, but they differ significantly
|
||||
> in their design principles. `USearch` is compact and broadly compatible with FAISS without
|
||||
> sacrificing performance, with a primary focus on user-defined metrics and fewer dependencies.
|
||||
>
|
||||
## Installation and Setup
|
||||
|
||||
We need to install `usearch` python package.
|
||||
|
||||
```bash
|
||||
pip install usearch
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/usearch).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import USearch
|
||||
```
|
||||
|
28
docs/extras/integrations/providers/xata.mdx
Normal file
28
docs/extras/integrations/providers/xata.mdx
Normal file
@ -0,0 +1,28 @@
|
||||
# Xata
|
||||
|
||||
> [Xata](https://xata.io) is a serverless data platform, based on `PostgreSQL`.
|
||||
> It provides a Python SDK for interacting with your database, and a UI
|
||||
> for managing your data.
|
||||
> `Xata` has a native vector type, which can be added to any table, and
|
||||
> supports similarity search. LangChain inserts vectors directly to `Xata`,
|
||||
> and queries it for the nearest neighbors of a given vector, so that you can
|
||||
> use all the LangChain Embeddings integrations with `Xata`.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
|
||||
We need to install `xata` python package.
|
||||
|
||||
```bash
|
||||
pip install xata==1.0.0a7
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/xata).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import XataVectorStore
|
||||
```
|
||||
|
@ -100,8 +100,12 @@
|
||||
"source": [
|
||||
"## Configure and use the Enterprise Search retriever\n",
|
||||
"\n",
|
||||
"The Enterprise Search retriever is implemented in the `langchain.retriever.GoogleCloudEntepriseSearchRetriever` class. The `get_relevan_documents` method returns a list of `langchain.schema.Document` documents where the `page_content` field of each document is populated with either an `extractive segment` or an `extractive answer` that matches a query. The `metadata` field is populated with metadata (if any) of a document from which the segments or answers were extracted.\n",
|
||||
"The Enterprise Search retriever is implemented in the `langchain.retriever.GoogleCloudEntepriseSearchRetriever` class. The `get_relevant_documents` method returns a list of `langchain.schema.Document` documents where the `page_content` field of each document is populated the document content.\n",
|
||||
"Depending on the data type used in Enterprise search (structured or unstructured) the `page_content` field is populated as follows:\n",
|
||||
"- Structured data source: either an `extractive segment` or an `extractive answer` that matches a query. The `metadata` field is populated with metadata (if any) of the document from which the segments or answers were extracted.\n",
|
||||
"- Unstructured data source: a string json containing all the fields returned from the structured data source. The `metadata` field is populated with metadata (if any) of the document \n",
|
||||
"\n",
|
||||
"### Only for Unstructured data sources:\n",
|
||||
"An extractive answer is verbatim text that is returned with each search result. It is extracted directly from the original document. Extractive answers are typically displayed near the top of web pages to provide an end user with a brief answer that is contextually relevant to their query. Extractive answers are available for website and unstructured search.\n",
|
||||
"\n",
|
||||
"An extractive segment is verbatim text that is returned with each search result. An extractive segment is usually more verbose than an extractive answer. Extractive segments can be displayed as an answer to a query, and can be used to perform post-processing tasks and as input for large language models to generate answers or new text. Extractive segments are available for unstructured search.\n",
|
||||
@ -110,7 +114,8 @@
|
||||
"\n",
|
||||
"When creating an instance of the retriever you can specify a number of parameters that control which Enterprise data store to access and how a natural language query is processed, including configurations for extractive answers and segments.\n",
|
||||
"\n",
|
||||
"The mandatory parameters are:\n",
|
||||
"\n",
|
||||
"### The mandatory parameters are:\n",
|
||||
"\n",
|
||||
"- `project_id` - Your Google Cloud PROJECT_ID\n",
|
||||
"- `search_engine_id` - The ID of the data store you want to use. \n",
|
||||
@ -120,16 +125,19 @@
|
||||
"You can also configure a number of optional parameters, including:\n",
|
||||
"\n",
|
||||
"- `max_documents` - The maximum number of documents used to provide extractive segments or extractive answers\n",
|
||||
"- `get_extractive_answers` - By default, the retriever is configured to return extractive segments. Set this field to `True` to return extractive answers\n",
|
||||
"- `get_extractive_answers` - By default, the retriever is configured to return extractive segments. Set this field to `True` to return extractive answers. This is used only when `engine_data_type` set to 0 (unstructured) \n",
|
||||
"- `max_extractive_answer_count` - The maximum number of extractive answers returned in each search result.\n",
|
||||
" At most 5 answers will be returned\n",
|
||||
" At most 5 answers will be returned. This is used only when `engine_data_type` set to 0 (unstructured) \n",
|
||||
"- `max_extractive_segment_count` - The maximum number of extractive segments returned in each search result.\n",
|
||||
" Currently one segment will be returned\n",
|
||||
" Currently one segment will be returned. This is used only when `engine_data_type` set to 0 (unstructured) \n",
|
||||
"- `filter` - The filter expression that allows you filter the search results based on the metadata associated with the documents in the searched data store. \n",
|
||||
"- `query_expansion_condition` - Specification to determine under which conditions query expansion should occur.\n",
|
||||
" 0 - Unspecified query expansion condition. In this case, server behavior defaults to disabled.\n",
|
||||
" 1 - Disabled query expansion. Only the exact search query is used, even if SearchResponse.total_size is zero.\n",
|
||||
" 2 - Automatic query expansion built by the Search API.\n",
|
||||
"- `engine_data_type` - Defines the enterprise search data type\n",
|
||||
" 0 - Unstructured data \n",
|
||||
" 1 - Structured data\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
@ -137,7 +145,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure and use the retriever with extractve segments"
|
||||
"### Configure and use the retriever for **unstructured** data with extractve segments "
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -182,7 +190,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure and use the retriever with extractve answers "
|
||||
"### Configure and use the retriever for **unstructured** data with extractve answers "
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -213,12 +221,30 @@
|
||||
" print(doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure and use the retriever for **structured** data with extractve answers "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"retriever = GoogleCloudEnterpriseSearchRetriever(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" search_engine_id=SEARCH_ENGINE_ID,\n",
|
||||
" max_documents=3,\n",
|
||||
" engine_data_type=1\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"result = retriever.get_relevant_documents(query)\n",
|
||||
"for doc in result:\n",
|
||||
" print(doc)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
@ -1,15 +1,27 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "d63d56c2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# GPT4All\n",
|
||||
"\n",
|
||||
"[GPT4All](https://gpt4all.io/index.html) is a free-to-use, locally running, privacy-aware chatbot. There is no GPU or internet required. It features popular models and its own models such as GPT4All Falcon, Wizard, etc.\n",
|
||||
"\n",
|
||||
"This notebook explains how to use [GPT4All embeddings](https://docs.gpt4all.io/gpt4all_python_embedding.html#gpt4all.gpt4all.Embed4All) with LangChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "46b7aa85",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Install GPT4All's Python Bindings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -17,7 +29,16 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install gpt4all"
|
||||
"%pip install gpt4all > /dev/null"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "d80f4b92",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note: you may need to restart the kernel to use updated packages."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -72,6 +93,15 @@
|
||||
"text = \"This is a test document.\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "eef36bde",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Embed the Textual Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
@ -82,6 +112,15 @@
|
||||
"query_result = gpt4all_embd.embed_query(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "12b24e69",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"With embed_documents you can embed multiple pieces of text. You can also map these embeddings with [Nomic's Atlas](https://docs.nomic.ai/index.html) to see a visual representation of your data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
|
@ -9,13 +9,9 @@
|
||||
"\n",
|
||||
"NLP Cloud is an artificial intelligence platform that allows you to use the most advanced AI engines, and even train your own engines with your own data. \n",
|
||||
"\n",
|
||||
"The [embeddings](https://docs.nlpcloud.com/#embeddings) endpoint offers several models:\n",
|
||||
"The [embeddings](https://docs.nlpcloud.com/#embeddings) endpoint offers the following model:\n",
|
||||
"\n",
|
||||
"* `paraphrase-multilingual-mpnet-base-v2`: Paraphrase Multilingual MPNet Base V2 is a very fast model based on Sentence Transformers that is perfectly suited for embeddings extraction in more than 50 languages (see the full list here).\n",
|
||||
"\n",
|
||||
"* `gpt-j`: GPT-J returns advanced embeddings. It might return better results than Sentence Transformers based models (see above) but it is also much slower.\n",
|
||||
"\n",
|
||||
"* `dolphin`: Dolphin returns advanced embeddings. It might return better results than Sentence Transformers based models (see above) but it is also much slower. It natively understands the following languages: Bulgarian, Catalan, Chinese, Croatian, Czech, Danish, Dutch, English, French, German, Hungarian, Italian, Japanese, Polish, Portuguese, Romanian, Russian, Serbian, Slovenian, Spanish, Swedish, and Ukrainian."
|
||||
"* `paraphrase-multilingual-mpnet-base-v2`: Paraphrase Multilingual MPNet Base V2 is a very fast model based on Sentence Transformers that is perfectly suited for embeddings extraction in more than 50 languages (see the full list here)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -84,7 +80,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3.11.2 64-bit",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -98,7 +94,12 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.11.2"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
461
docs/extras/integrations/toolkits/ainetwork.ipynb
Normal file
461
docs/extras/integrations/toolkits/ainetwork.ipynb
Normal file
@ -0,0 +1,461 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AINetwork Toolkit\n",
|
||||
"\n",
|
||||
"The AINetwork Toolkit is a set of tools for interacting with the AINetwork Blockchain. These tools allow you to transfer AIN, read and write values, create apps, and set permissions for specific paths within the blockchain database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installing dependencies\n",
|
||||
"\n",
|
||||
"Before using the AINetwork Toolkit, you need to install the ain-py package. You can install it with pip:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install ain-py"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set environmental variables\n",
|
||||
"\n",
|
||||
"You need to set the `AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY` environmental variable to your AIN Blockchain Account Private Key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Get AIN Blockchain private key"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"address: 0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac\n",
|
||||
"private_key: f5e2f359bb6b7836a2ac70815473d1a290c517f847d096f5effe818de8c2cf14\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from ain.account import Account\n",
|
||||
"\n",
|
||||
"if os.environ.get(\"AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY\", None):\n",
|
||||
" account = Account(os.environ[\"AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY\"])\n",
|
||||
"else:\n",
|
||||
" account = Account.create()\n",
|
||||
" os.environ[\"AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY\"] = account.private_key\n",
|
||||
" print(\n",
|
||||
" f\"\"\"\n",
|
||||
"address: {account.address}\n",
|
||||
"private_key: {account.private_key}\n",
|
||||
"\"\"\"\n",
|
||||
" )\n",
|
||||
"# IMPORTANT: If you plan to use this account in the future, make sure to save the\n",
|
||||
"# private key in a secure place. Losing access to your private key means losing\n",
|
||||
"# access to your account."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize the AINetwork Toolkit\n",
|
||||
"\n",
|
||||
"You can initialize the AINetwork Toolkit like this:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents.agent_toolkits.ainetwork.toolkit import AINetworkToolkit\n",
|
||||
"\n",
|
||||
"toolkit = AINetworkToolkit()\n",
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"address = tools[0].interface.wallet.defaultAccount.address"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize the Agent with the AINetwork Toolkit\n",
|
||||
"\n",
|
||||
"You can initialize the agent with the AINetwork Toolkit like this:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.agents import initialize_agent, AgentType\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools=tools,\n",
|
||||
" llm=llm,\n",
|
||||
" verbose=True,\n",
|
||||
" agent=AgentType.OPENAI_FUNCTIONS,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example Usage\n",
|
||||
"\n",
|
||||
"Here are some examples of how you can use the agent with the AINetwork Toolkit:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Define App name to test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"appName = f\"langchain_demo_{address.lower()}\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create an app in the AINetwork Blockchain database"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `AINappOps` with `{'type': 'SET_ADMIN', 'appName': 'langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m{\"tx_hash\": \"0x018846d6a9fc111edb1a2246ae2484ef05573bd2c584f3d0da155fa4b4936a9e\", \"result\": {\"gas_amount_total\": {\"bandwidth\": {\"service\": 4002, \"app\": {\"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\": 2}}, \"state\": {\"service\": 1640}}, \"gas_cost_total\": 0, \"func_results\": {\"_createApp\": {\"op_results\": {\"0\": {\"path\": \"/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\", \"result\": {\"code\": 0, \"bandwidth_gas_amount\": 1}}, \"1\": {\"path\": \"/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\", \"result\": {\"code\": 0, \"bandwidth_gas_amount\": 1}}, \"2\": {\"path\": \"/manage_app/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/config/admin\", \"result\": {\"code\": 0, \"bandwidth_gas_amount\": 1}}}, \"code\": 0, \"bandwidth_gas_amount\": 2000}}, \"code\": 0, \"bandwidth_gas_amount\": 2001, \"gas_amount_charged\": 5642}}\u001b[0m\u001b[32;1m\u001b[1;3mThe app with the name \"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\" has been created in the AINetwork Blockchain database.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The app with the name \"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\" has been created in the AINetwork Blockchain database.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\n",
|
||||
" agent.run(\n",
|
||||
" f\"Create an app in the AINetwork Blockchain database with the name {appName}\"\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Set a value at a given path in the AINetwork Blockchain database"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `AINvalueOps` with `{'type': 'SET', 'path': '/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/object', 'value': {'1': 2, '34': 56}}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[33;1m\u001b[1;3m{\"tx_hash\": \"0x3d1a16d9808830088cdf4d37f90f4b1fa1242e2d5f6f983829064f45107b5279\", \"result\": {\"gas_amount_total\": {\"bandwidth\": {\"service\": 0, \"app\": {\"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\": 1}}, \"state\": {\"service\": 0, \"app\": {\"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\": 674}}}, \"gas_cost_total\": 0, \"code\": 0, \"bandwidth_gas_amount\": 1, \"gas_amount_charged\": 0}}\u001b[0m\u001b[32;1m\u001b[1;3mThe value {1: 2, '34': 56} has been set at the path /apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/object.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The value {1: 2, '34': 56} has been set at the path /apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/object.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\n",
|
||||
" agent.run(f\"Set the value {{1: 2, '34': 56}} at the path /apps/{appName}/object .\")\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Set permissions for a path in the AINetwork Blockchain database"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `AINruleOps` with `{'type': 'SET', 'path': '/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/user/$from', 'eval': 'auth.addr===$from'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[38;5;200m\u001b[1;3m{\"tx_hash\": \"0x37d5264e580f6a217a347059a735bfa9eb5aad85ff28a95531c6dc09252664d2\", \"result\": {\"gas_amount_total\": {\"bandwidth\": {\"service\": 0, \"app\": {\"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\": 1}}, \"state\": {\"service\": 0, \"app\": {\"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\": 712}}}, \"gas_cost_total\": 0, \"code\": 0, \"bandwidth_gas_amount\": 1, \"gas_amount_charged\": 0}}\u001b[0m\u001b[32;1m\u001b[1;3mThe write permissions for the path `/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/user/$from` have been set with the eval string `auth.addr===$from`.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The write permissions for the path `/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/user/$from` have been set with the eval string `auth.addr===$from`.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\n",
|
||||
" agent.run(\n",
|
||||
" f\"Set the write permissions for the path /apps/{appName}/user/$from with the\"\n",
|
||||
" \" eval string auth.addr===$from .\"\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the permissions for a path in the AINetwork Blockchain database"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `AINownerOps` with `{'type': 'GET', 'path': '/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[33;1m\u001b[1;3m{\".owner\": {\"owners\": {\"0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac\": {\"branch_owner\": true, \"write_function\": true, \"write_owner\": true, \"write_rule\": true}}}}\u001b[0m\u001b[32;1m\u001b[1;3mThe permissions for the path /apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac are as follows:\n",
|
||||
"\n",
|
||||
"- Address: 0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac\n",
|
||||
" - branch_owner: true\n",
|
||||
" - write_function: true\n",
|
||||
" - write_owner: true\n",
|
||||
" - write_rule: true\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The permissions for the path /apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac are as follows:\n",
|
||||
"\n",
|
||||
"- Address: 0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac\n",
|
||||
" - branch_owner: true\n",
|
||||
" - write_function: true\n",
|
||||
" - write_owner: true\n",
|
||||
" - write_rule: true\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(agent.run(f\"Retrieve the permissions for the path /apps/{appName}.\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Get AIN from faucet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\"result\":\"0x0eb07b67b7d0a702cb60e865d3deafff3070d8508077ef793d69d6819fd92ea3\",\"time\":1692348112376}"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!curl http://faucet.ainetwork.ai/api/test/{address}/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Get AIN Balance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `AINvalueOps` with `{'type': 'GET', 'path': '/accounts/0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac/balance'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[33;1m\u001b[1;3m100\u001b[0m\u001b[32;1m\u001b[1;3mThe AIN balance of address 0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac is 100 AIN.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The AIN balance of address 0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac is 100 AIN.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(agent.run(f\"Check AIN balance of {address}\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transfer AIN"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `AINtransfer` with `{'address': '0x19937b227b1b13f29e7ab18676a89ea3bdea9c5b', 'amount': 100}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m{\"tx_hash\": \"0xa59d15d23373bcc00e413ac8ba18cb016bb3bdd54058d62606aec688c6ad3d2e\", \"result\": {\"gas_amount_total\": {\"bandwidth\": {\"service\": 3}, \"state\": {\"service\": 866}}, \"gas_cost_total\": 0, \"func_results\": {\"_transfer\": {\"op_results\": {\"0\": {\"path\": \"/accounts/0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac/balance\", \"result\": {\"code\": 0, \"bandwidth_gas_amount\": 1}}, \"1\": {\"path\": \"/accounts/0x19937B227b1b13f29e7AB18676a89EA3BDEA9C5b/balance\", \"result\": {\"code\": 0, \"bandwidth_gas_amount\": 1}}}, \"code\": 0, \"bandwidth_gas_amount\": 0}}, \"code\": 0, \"bandwidth_gas_amount\": 1, \"gas_amount_charged\": 869}}\u001b[0m\u001b[32;1m\u001b[1;3mThe transfer of 100 AIN to the address 0x19937b227b1b13f29e7ab18676a89ea3bdea9c5b was successful. The transaction hash is 0xa59d15d23373bcc00e413ac8ba18cb016bb3bdd54058d62606aec688c6ad3d2e.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The transfer of 100 AIN to the address 0x19937b227b1b13f29e7ab18676a89ea3bdea9c5b was successful. The transaction hash is 0xa59d15d23373bcc00e413ac8ba18cb016bb3bdd54058d62606aec688c6ad3d2e.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\n",
|
||||
" agent.run(\n",
|
||||
" \"Transfer 100 AIN to the address 0x19937b227b1b13f29e7ab18676a89ea3bdea9c5b\"\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -1,13 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Amadeus Toolkit\n",
|
||||
"# Amadeus\n",
|
||||
"\n",
|
||||
"This notebook walks you through connecting LangChain to the Amadeus travel information API\n",
|
||||
"This notebook walks you through connecting LangChain to the `Amadeus` travel information API\n",
|
||||
"\n",
|
||||
"To use this toolkit, you will need to set up your credentials explained in the [Amadeus for developers getting started overview](https://developers.amadeus.com/get-started/get-started-with-self-service-apis-335). Once you've received a AMADEUS_CLIENT_ID and AMADEUS_CLIENT_SECRET, you can input them as environmental variables below."
|
||||
]
|
||||
@ -22,7 +21,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -46,7 +44,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -234,7 +231,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -4,9 +4,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure Cognitive Services Toolkit\n",
|
||||
"# Azure Cognitive Services\n",
|
||||
"\n",
|
||||
"This toolkit is used to interact with the Azure Cognitive Services API to achieve some multimodal capabilities.\n",
|
||||
"This toolkit is used to interact with the `Azure Cognitive Services API` to achieve some multimodal capabilities.\n",
|
||||
"\n",
|
||||
"Currently There are four tools bundled in this toolkit:\n",
|
||||
"- AzureCogsImageAnalysisTool: used to extract caption, objects, tags, and text from images. (Note: this tool is not available on Mac OS yet, due to the dependency on `azure-ai-vision` package, which is only supported on Windows and Linux currently.)\n",
|
||||
@ -264,9 +264,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
@ -5,24 +5,14 @@
|
||||
"id": "7094e328",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# CSV Agent\n",
|
||||
"# CSV\n",
|
||||
"\n",
|
||||
"This notebook shows how to use agents to interact with a csv. It is mostly optimized for question answering.\n",
|
||||
"This notebook shows how to use agents to interact with data in `CSV` format. It is mostly optimized for question answering.\n",
|
||||
"\n",
|
||||
"**NOTE: this agent calls the Pandas DataFrame agent under the hood, which in turn calls the Python agent, which executes LLM generated Python code - this can be bad if the LLM generated Python code is harmful. Use cautiously.**\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "827982c7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import create_csv_agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
@ -32,7 +22,9 @@
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.agents.agent_types import AgentType"
|
||||
"from langchain.agents.agent_types import AgentType\n",
|
||||
"\n",
|
||||
"from langchain.agents import create_csv_agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -40,9 +32,9 @@
|
||||
"id": "bd806175",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using ZERO_SHOT_REACT_DESCRIPTION\n",
|
||||
"## Using `ZERO_SHOT_REACT_DESCRIPTION`\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the ZERO_SHOT_REACT_DESCRIPTION agent type. Note that this is an alternative to the above."
|
||||
"This shows how to initialize the agent using the `ZERO_SHOT_REACT_DESCRIPTION` agent type. Note that this is an alternative to the above."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -130,9 +122,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a96309be",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
@ -305,7 +295,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
File diff suppressed because one or more lines are too long
@ -4,9 +4,10 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Github Toolkit\n",
|
||||
"# Github\n",
|
||||
"\n",
|
||||
"The Github toolkit contains tools that enable an LLM agent to interact with a github repository. The tools are a wrapper for the [PyGitHub](https://github.com/PyGithub/PyGithub) library. \n",
|
||||
"The `Github` toolkit contains tools that enable an LLM agent to interact with a github repository. \n",
|
||||
"The tool is a wrapper for the [PyGitHub](https://github.com/PyGithub/PyGithub) library. \n",
|
||||
"\n",
|
||||
"## Quickstart\n",
|
||||
"1. Install the pygithub library\n",
|
||||
@ -38,7 +39,14 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Install the pygithub library"
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 1. Install the `pygithub` library "
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -58,7 +66,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Create a Github App\n",
|
||||
"### 2. Create a Github App\n",
|
||||
"\n",
|
||||
"[Follow the instructions here](https://docs.github.com/en/apps/creating-github-apps/registering-a-github-app/registering-a-github-app) to create and register a Github app. Make sure your app has the following [repository permissions:](https://docs.github.com/en/rest/overview/permissions-required-for-github-apps?apiVersion=2022-11-28)\n",
|
||||
"* Commit statuses (read only)\n",
|
||||
@ -71,7 +79,7 @@
|
||||
"\n",
|
||||
"Once the app has been registered, add it to the repository you wish the bot to act upon.\n",
|
||||
"\n",
|
||||
"## 3. Set Environmental Variables\n",
|
||||
"### 3. Set Environmental Variables\n",
|
||||
"\n",
|
||||
"Before initializing your agent, the following environmental variables need to be set:\n",
|
||||
"\n",
|
||||
@ -86,7 +94,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example Usage- Simple Agent"
|
||||
"## Example: Simple Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -212,7 +220,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example Usage- Advanced Agent\n",
|
||||
"## Example: Advanced Agent\n",
|
||||
"\n",
|
||||
"If your agent does not need to use all 8 tools, you can build tools individually to use. For this example, we'll make an agent that does not use the create_file, delete_file or create_pull_request tools, but can also use duckduckgo-search."
|
||||
]
|
||||
@ -375,9 +383,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.16"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
@ -4,9 +4,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Gmail Toolkit\n",
|
||||
"# Gmail\n",
|
||||
"\n",
|
||||
"This notebook walks through connecting a LangChain email to the Gmail API.\n",
|
||||
"This notebook walks through connecting a LangChain email to the `Gmail API`.\n",
|
||||
"\n",
|
||||
"To use this toolkit, you will need to set up your credentials explained in the [Gmail API docs](https://developers.google.com/gmail/api/quickstart/python#authorize_credentials_for_a_desktop_application). Once you've downloaded the `credentials.json` file, you can start using the Gmail API. Once this is done, we'll install the required libraries."
|
||||
]
|
||||
@ -226,7 +226,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -2,7 +2,10 @@
|
||||
sidebar_position: 0
|
||||
---
|
||||
|
||||
# Agent toolkits
|
||||
# Agents & Toolkits
|
||||
|
||||
Agents and Toolkits are placed in the same directory because they are always used together.
|
||||
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
|
@ -1,15 +1,15 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "245a954a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Jira\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use the Jira tool.\n",
|
||||
"The Jira tool allows agents to interact with a given Jira instance, performing actions such as searching for issues and creating issues, the tool wraps the atlassian-python-api library, for more see: https://atlassian-python-api.readthedocs.io/jira.html\n",
|
||||
"This notebook goes over how to use the `Jira` toolkit.\n",
|
||||
"\n",
|
||||
"The `Jira` toolkit allows agents to interact with a given Jira instance, performing actions such as searching for issues and creating issues, the tool wraps the atlassian-python-api library, for more see: https://atlassian-python-api.readthedocs.io/jira.html\n",
|
||||
"\n",
|
||||
"To use this tool, you must first set as environment variables:\n",
|
||||
" JIRA_API_TOKEN\n",
|
||||
@ -22,12 +22,12 @@
|
||||
"execution_count": null,
|
||||
"id": "961b3689",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-17T10:21:20.168639Z",
|
||||
"start_time": "2023-04-17T10:21:18.698672Z"
|
||||
},
|
||||
"vscode": {
|
||||
"languageId": "shellscript"
|
||||
},
|
||||
"ExecuteTime": {
|
||||
"start_time": "2023-04-17T10:21:18.698672Z",
|
||||
"end_time": "2023-04-17T10:21:20.168639Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@ -41,8 +41,8 @@
|
||||
"id": "34bb5968",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"start_time": "2023-04-17T10:21:22.911233Z",
|
||||
"end_time": "2023-04-17T10:21:23.730922Z"
|
||||
"end_time": "2023-04-17T10:21:23.730922Z",
|
||||
"start_time": "2023-04-17T10:21:22.911233Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@ -58,21 +58,24 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "b3050b55",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-17T10:22:42.505412Z",
|
||||
"start_time": "2023-04-17T10:22:42.499447Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ[\"JIRA_API_TOKEN\"] = \"abc\"\n",
|
||||
"os.environ[\"JIRA_USERNAME\"] = \"123\"\n",
|
||||
"os.environ[\"JIRA_INSTANCE_URL\"] = \"https://jira.atlassian.com\"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"xyz\""
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"start_time": "2023-04-17T10:22:42.499447Z",
|
||||
"end_time": "2023-04-17T10:22:42.505412Z"
|
||||
}
|
||||
},
|
||||
"id": "b3050b55"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@ -80,8 +83,8 @@
|
||||
"id": "ac4910f8",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"start_time": "2023-04-17T10:22:44.664481Z",
|
||||
"end_time": "2023-04-17T10:22:44.720538Z"
|
||||
"end_time": "2023-04-17T10:22:44.720538Z",
|
||||
"start_time": "2023-04-17T10:22:44.664481Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@ -97,6 +100,17 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "d5461370",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-04-17T10:23:38.121883Z",
|
||||
"start_time": "2023-04-17T10:23:33.662454Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -117,7 +131,9 @@
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'A new issue has been created in project PW with the summary \"Make more fried rice\" and description \"Reminder to make more fried rice\".'"
|
||||
"text/plain": [
|
||||
"'A new issue has been created in project PW with the summary \"Make more fried rice\" and description \"Reminder to make more fried rice\".'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
@ -126,20 +142,12 @@
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"make a new issue in project PW to remind me to make more fried rice\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"start_time": "2023-04-17T10:23:33.662454Z",
|
||||
"end_time": "2023-04-17T10:23:38.121883Z"
|
||||
}
|
||||
},
|
||||
"id": "d5461370"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -153,7 +161,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
@ -163,4 +171,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
@ -5,9 +5,10 @@
|
||||
"id": "85fb2c03-ab88-4c8c-97e3-a7f2954555ab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# JSON Agent\n",
|
||||
"# JSON\n",
|
||||
"\n",
|
||||
"This notebook showcases an agent designed to interact with large JSON/dict objects. This is useful when you want to answer questions about a JSON blob that's too large to fit in the context window of an LLM. The agent is able to iteratively explore the blob to find what it needs to answer the user's question.\n",
|
||||
"This notebook showcases an agent interacting with large `JSON/dict` objects. \n",
|
||||
"This is useful when you want to answer questions about a JSON blob that's too large to fit in the context window of an LLM. The agent is able to iteratively explore the blob to find what it needs to answer the user's question.\n",
|
||||
"\n",
|
||||
"In the below example, we are using the OpenAPI spec for the OpenAI API, which you can find [here](https://github.com/openai/openai-openapi/blob/master/openapi.yaml).\n",
|
||||
"\n",
|
||||
@ -179,7 +180,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -1,15 +1,14 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MultiOn Toolkit\n",
|
||||
"# MultiOn\n",
|
||||
"\n",
|
||||
"This notebook walks you through connecting LangChain to the MultiOn Client in your browser\n",
|
||||
"This notebook walks you through connecting LangChain to the `MultiOn` Client in your browser\n",
|
||||
"\n",
|
||||
"To use this toolkit, you will need to add MultiOn Extension to your browser as explained in the [MultiOn for Chrome](https://multion.notion.site/Download-MultiOn-ddddcfe719f94ab182107ca2612c07a5)."
|
||||
"To use this toolkit, you will need to add `MultiOn Extension` to your browser as explained in the [MultiOn for Chrome](https://multion.notion.site/Download-MultiOn-ddddcfe719f94ab182107ca2612c07a5)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -47,7 +46,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -127,7 +125,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -1,13 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Office365 Toolkit\n",
|
||||
"# Office365\n",
|
||||
"\n",
|
||||
"This notebook walks through connecting LangChain to Office365 email and calendar.\n",
|
||||
"This notebook walks through connecting LangChain to `Office365` email and calendar.\n",
|
||||
"\n",
|
||||
"To use this toolkit, you will need to set up your credentials explained in the [Microsoft Graph authentication and authorization overview](https://learn.microsoft.com/en-us/graph/auth/). Once you've received a CLIENT_ID and CLIENT_SECRET, you can input them as environmental variables below."
|
||||
]
|
||||
@ -23,7 +22,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -42,7 +40,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -238,7 +235,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -5,9 +5,9 @@
|
||||
"id": "85fb2c03-ab88-4c8c-97e3-a7f2954555ab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# OpenAPI agents\n",
|
||||
"# OpenAPI\n",
|
||||
"\n",
|
||||
"We can construct agents to consume arbitrary APIs, here APIs conformant to the OpenAPI/Swagger specification."
|
||||
"We can construct agents to consume arbitrary APIs, here APIs conformant to the `OpenAPI`/`Swagger` specification."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -271,9 +271,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "38762cc0",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -449,9 +447,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "3a9cc939",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -773,7 +769,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -7,7 +7,9 @@
|
||||
"source": [
|
||||
"# Natural Language APIs\n",
|
||||
"\n",
|
||||
"Natural Language API Toolkits (NLAToolkits) permit LangChain Agents to efficiently plan and combine calls across endpoints. This notebook demonstrates a sample composition of the Speak, Klarna, and Spoonacluar APIs.\n",
|
||||
"`Natural Language API` Toolkits (`NLAToolkits`) permit LangChain Agents to efficiently plan and combine calls across endpoints. \n",
|
||||
"\n",
|
||||
"This notebook demonstrates a sample composition of the `Speak`, `Klarna`, and `Spoonacluar` APIs.\n",
|
||||
"\n",
|
||||
"For a detailed walkthrough of the OpenAPI chains wrapped within the NLAToolkit, see the [OpenAPI Operation Chain](/docs/use_cases/apis/openapi.html) notebook.\n",
|
||||
"\n",
|
||||
@ -182,7 +184,7 @@
|
||||
"id": "c61d92a8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using Auth + Adding more Endpoints\n",
|
||||
"### Use Auth and add more Endpoints\n",
|
||||
"\n",
|
||||
"Some endpoints may require user authentication via things like access tokens. Here we show how to pass in the authentication information via the `Requests` wrapper object.\n",
|
||||
"\n",
|
||||
@ -420,7 +422,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -5,11 +5,11 @@
|
||||
"id": "c81da886",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Pandas Dataframe Agent\n",
|
||||
"# Pandas Dataframe\n",
|
||||
"\n",
|
||||
"This notebook shows how to use agents to interact with a pandas dataframe. It is mostly optimized for question answering.\n",
|
||||
"This notebook shows how to use agents to interact with a `Pandas DataFrame`. It is mostly optimized for question answering.\n",
|
||||
"\n",
|
||||
"**NOTE: this agent calls the Python agent under the hood, which executes LLM generated Python code - this can be bad if the LLM generated Python code is harmful. Use cautiously.**"
|
||||
"**NOTE: this agent calls the `Python` agent under the hood, which executes LLM generated Python code - this can be bad if the LLM generated Python code is harmful. Use cautiously.**"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -42,9 +42,9 @@
|
||||
"id": "a62858e2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using ZERO_SHOT_REACT_DESCRIPTION\n",
|
||||
"## Using `ZERO_SHOT_REACT_DESCRIPTION`\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the ZERO_SHOT_REACT_DESCRIPTION agent type. Note that this is an alternative to the above."
|
||||
"This shows how to initialize the agent using the `ZERO_SHOT_REACT_DESCRIPTION` agent type. Note that this is an alternative to the above."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -212,7 +212,7 @@
|
||||
"id": "c4bc0584",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Multi DataFrame Example\n",
|
||||
"## Multi DataFrame Example\n",
|
||||
"\n",
|
||||
"This next part shows how the agent can interact with multiple dataframes passed in as a list."
|
||||
]
|
||||
@ -292,7 +292,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -4,17 +4,19 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PlayWright Browser Toolkit\n",
|
||||
"# PlayWright Browser\n",
|
||||
"\n",
|
||||
"This toolkit is used to interact with the browser. While other tools (like the Requests tools) are fine for static sites, Browser toolkits let your agent navigate the web and interact with dynamically rendered sites. Some tools bundled within the Browser toolkit include:\n",
|
||||
"This toolkit is used to interact with the browser. While other tools (like the `Requests` tools) are fine for static sites, `PlayWright Browser` toolkits let your agent navigate the web and interact with dynamically rendered sites. \n",
|
||||
"\n",
|
||||
"- NavigateTool (navigate_browser) - navigate to a URL\n",
|
||||
"- NavigateBackTool (previous_page) - wait for an element to appear\n",
|
||||
"- ClickTool (click_element) - click on an element (specified by selector)\n",
|
||||
"- ExtractTextTool (extract_text) - use beautiful soup to extract text from the current web page\n",
|
||||
"- ExtractHyperlinksTool (extract_hyperlinks) - use beautiful soup to extract hyperlinks from the current web page\n",
|
||||
"- GetElementsTool (get_elements) - select elements by CSS selector\n",
|
||||
"- CurrentPageTool (current_page) - get the current page URL\n"
|
||||
"Some tools bundled within the `PlayWright Browser` toolkit include:\n",
|
||||
"\n",
|
||||
"- `NavigateTool` (navigate_browser) - navigate to a URL\n",
|
||||
"- `NavigateBackTool` (previous_page) - wait for an element to appear\n",
|
||||
"- `ClickTool` (click_element) - click on an element (specified by selector)\n",
|
||||
"- `ExtractTextTool` (extract_text) - use beautiful soup to extract text from the current web page\n",
|
||||
"- `ExtractHyperlinksTool` (extract_hyperlinks) - use beautiful soup to extract hyperlinks from the current web page\n",
|
||||
"- `GetElementsTool` (get_elements) - select elements by CSS selector\n",
|
||||
"- `CurrentPageTool` (current_page) - get the current page URL\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -327,7 +329,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -2,36 +2,40 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9363398d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PowerBI Dataset Agent\n",
|
||||
"# PowerBI Dataset\n",
|
||||
"\n",
|
||||
"This notebook showcases an agent designed to interact with a Power BI Dataset. The agent is designed to answer more general questions about a dataset, as well as recover from errors.\n",
|
||||
"This notebook showcases an agent interacting with a `Power BI Dataset`. The agent is answering more general questions about a dataset, as well as recover from errors.\n",
|
||||
"\n",
|
||||
"Note that, as this agent is in active development, all answers might not be correct. It runs against the [executequery endpoint](https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/execute-queries), which does not allow deletes.\n",
|
||||
"\n",
|
||||
"### Some notes\n",
|
||||
"### Notes:\n",
|
||||
"- It relies on authentication with the azure.identity package, which can be installed with `pip install azure-identity`. Alternatively you can create the powerbi dataset with a token as a string without supplying the credentials.\n",
|
||||
"- You can also supply a username to impersonate for use with datasets that have RLS enabled. \n",
|
||||
"- The toolkit uses a LLM to create the query from the question, the agent uses the LLM for the overall execution.\n",
|
||||
"- Testing was done mostly with a `text-davinci-003` model, codex models did not seem to perform ver well."
|
||||
],
|
||||
"metadata": {},
|
||||
"attachments": {},
|
||||
"id": "9363398d"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Initialization"
|
||||
],
|
||||
"id": "0725445e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"id": "0725445e"
|
||||
"source": [
|
||||
"## Initialization"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c82f33e9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents.agent_toolkits import create_pbi_agent\n",
|
||||
"from langchain.agents.agent_toolkits import PowerBIToolkit\n",
|
||||
@ -39,16 +43,16 @@
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.agents import AgentExecutor\n",
|
||||
"from azure.identity import DefaultAzureCredential"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"id": "c82f33e9"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0b2c5853",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fast_llm = ChatOpenAI(\n",
|
||||
" temperature=0.5, max_tokens=1000, model_name=\"gpt-3.5-turbo\", verbose=True\n",
|
||||
@ -69,99 +73,95 @@
|
||||
" toolkit=toolkit,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"id": "0b2c5853"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "80c92be3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: describing a table"
|
||||
],
|
||||
"metadata": {},
|
||||
"id": "80c92be3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"agent_executor.run(\"Describe table1\")"
|
||||
],
|
||||
"outputs": [],
|
||||
"id": "90f236cb",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"id": "90f236cb"
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.run(\"Describe table1\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b464930f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: simple query on a table\n",
|
||||
"In this example, the agent actually figures out the correct query to get a row count of the table."
|
||||
],
|
||||
"metadata": {},
|
||||
"attachments": {},
|
||||
"id": "b464930f"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b668c907",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.run(\"How many records are in table1?\")"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"id": "b668c907"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f2229a2f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: running queries"
|
||||
],
|
||||
"metadata": {},
|
||||
"id": "f2229a2f"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "865a420f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.run(\"How many records are there by dimension1 in table2?\")"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"id": "865a420f"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"agent_executor.run(\"What unique values are there for dimensions2 in table2\")"
|
||||
],
|
||||
"outputs": [],
|
||||
"id": "120cd49a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"id": "120cd49a"
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.run(\"What unique values are there for dimensions2 in table2\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ac584fb2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example: add your own few-shot prompts"
|
||||
],
|
||||
"metadata": {},
|
||||
"attachments": {},
|
||||
"id": "ac584fb2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ffa66827",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# fictional example\n",
|
||||
"few_shots = \"\"\"\n",
|
||||
@ -189,26 +189,27 @@
|
||||
" toolkit=toolkit,\n",
|
||||
" verbose=True,\n",
|
||||
")"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {},
|
||||
"id": "ffa66827"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3be44685",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.run(\"What was the maximum of value in revenue in dollars in 2022?\")"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {},
|
||||
"id": "3be44685"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"interpreter": {
|
||||
"hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b"
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"display_name": "Python 3.9.16 64-bit"
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@ -220,12 +221,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
},
|
||||
"interpreter": {
|
||||
"hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
@ -5,9 +5,9 @@
|
||||
"id": "82a4c2cc-20ea-4b20-a565-63e905dee8ff",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Python Agent\n",
|
||||
"# Python\n",
|
||||
"\n",
|
||||
"This notebook showcases an agent designed to write and execute python code to answer a question."
|
||||
"This notebook showcases an agent designed to write and execute `Python` code to answer a question."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -32,7 +32,7 @@
|
||||
"id": "ca30d64c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using ZERO_SHOT_REACT_DESCRIPTION\n",
|
||||
"## Using `ZERO_SHOT_REACT_DESCRIPTION`\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the ZERO_SHOT_REACT_DESCRIPTION agent type."
|
||||
]
|
||||
@ -149,9 +149,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "4b9f60e7-eb6a-4f14-8604-498d863d4482",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -271,7 +269,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -1,13 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Spark Dataframe Agent\n",
|
||||
"# Spark Dataframe\n",
|
||||
"\n",
|
||||
"This notebook shows how to use agents to interact with a Spark dataframe and Spark Connect. It is mostly optimized for question answering.\n",
|
||||
"This notebook shows how to use agents to interact with a `Spark DataFrame` and `Spark Connect`. It is mostly optimized for question answering.\n",
|
||||
"\n",
|
||||
"**NOTE: this agent calls the Python agent under the hood, which executes LLM generated Python code - this can be bad if the LLM generated Python code is harmful. Use cautiously.**"
|
||||
]
|
||||
@ -23,6 +22,13 @@
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"...input your openai api key here...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## `Spark DataFrame` example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
@ -225,11 +231,10 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Spark Connect Example"
|
||||
"## `Spark Connect` example"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -405,9 +410,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
@ -4,9 +4,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Spark SQL Agent\n",
|
||||
"# Spark SQL\n",
|
||||
"\n",
|
||||
"This notebook shows how to use agents to interact with a Spark SQL. Similar to [SQL Database Agent](https://python.langchain.com/docs/integrations/toolkits/sql_database), it is designed to address general inquiries about Spark SQL and facilitate error recovery.\n",
|
||||
"This notebook shows how to use agents to interact with `Spark SQL`. Similar to [SQL Database Agent](https://python.langchain.com/docs/integrations/toolkits/sql_database), it is designed to address general inquiries about `Spark SQL` and facilitate error recovery.\n",
|
||||
"\n",
|
||||
"**NOTE: Note that, as this agent is in active development, all answers might not be correct. Additionally, it is not guaranteed that the agent won't perform DML statements on your Spark cluster given certain questions. Be careful running it on sensitive data!**"
|
||||
]
|
||||
@ -163,7 +163,9 @@
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'The titanic table has the following columns: PassengerId (INT), Survived (INT), Pclass (INT), Name (STRING), Sex (STRING), Age (DOUBLE), SibSp (INT), Parch (INT), Ticket (STRING), Fare (DOUBLE), Cabin (STRING), and Embarked (STRING). Here are some sample rows from the table: \\n\\n1. PassengerId: 1, Survived: 0, Pclass: 3, Name: Braund, Mr. Owen Harris, Sex: male, Age: 22.0, SibSp: 1, Parch: 0, Ticket: A/5 21171, Fare: 7.25, Cabin: None, Embarked: S\\n2. PassengerId: 2, Survived: 1, Pclass: 1, Name: Cumings, Mrs. John Bradley (Florence Briggs Thayer), Sex: female, Age: 38.0, SibSp: 1, Parch: 0, Ticket: PC 17599, Fare: 71.2833, Cabin: C85, Embarked: C\\n3. PassengerId: 3, Survived: 1, Pclass: 3, Name: Heikkinen, Miss. Laina, Sex: female, Age: 26.0, SibSp: 0, Parch: 0, Ticket: STON/O2. 3101282, Fare: 7.925, Cabin: None, Embarked: S'"
|
||||
"text/plain": [
|
||||
"'The titanic table has the following columns: PassengerId (INT), Survived (INT), Pclass (INT), Name (STRING), Sex (STRING), Age (DOUBLE), SibSp (INT), Parch (INT), Ticket (STRING), Fare (DOUBLE), Cabin (STRING), and Embarked (STRING). Here are some sample rows from the table: \\n\\n1. PassengerId: 1, Survived: 0, Pclass: 3, Name: Braund, Mr. Owen Harris, Sex: male, Age: 22.0, SibSp: 1, Parch: 0, Ticket: A/5 21171, Fare: 7.25, Cabin: None, Embarked: S\\n2. PassengerId: 2, Survived: 1, Pclass: 1, Name: Cumings, Mrs. John Bradley (Florence Briggs Thayer), Sex: female, Age: 38.0, SibSp: 1, Parch: 0, Ticket: PC 17599, Fare: 71.2833, Cabin: C85, Embarked: C\\n3. PassengerId: 3, Survived: 1, Pclass: 3, Name: Heikkinen, Miss. Laina, Sex: female, Age: 26.0, SibSp: 0, Parch: 0, Ticket: STON/O2. 3101282, Fare: 7.925, Cabin: None, Embarked: S'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
@ -239,7 +241,9 @@
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'The square root of the average age is approximately 5.45.'"
|
||||
"text/plain": [
|
||||
"'The square root of the average age is approximately 5.45.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
@ -253,6 +257,12 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -305,7 +315,9 @@
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'The oldest survived passenger is Barkworth, Mr. Algernon Henry Wilson, who was 80 years old.'"
|
||||
"text/plain": [
|
||||
"'The oldest survived passenger is Barkworth, Mr. Algernon Henry Wilson, who was 80 years old.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
@ -314,10 +326,7 @@
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"What's the name of the oldest survived passenger?\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -336,9 +345,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
@ -1,22 +1,21 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "0e499e90-7a6d-4fab-8aab-31a4df417601",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SQL Database Agent\n",
|
||||
"# SQL Database\n",
|
||||
"\n",
|
||||
"This notebook showcases an agent designed to interact with a sql databases. The agent builds off of [SQLDatabaseChain](https://python.langchain.com/docs/use_cases/tabular/sqlite) and is designed to answer more general questions about a database, as well as recover from errors.\n",
|
||||
"This notebook showcases an agent designed to interact with a `SQL` databases. \n",
|
||||
"The agent builds off of [SQLDatabaseChain](https://python.langchain.com/docs/use_cases/tabular/sqlite) and is designed to answer more general questions about a database, as well as recover from errors.\n",
|
||||
"\n",
|
||||
"Note that, as this agent is in active development, all answers might not be correct. Additionally, it is not guaranteed that the agent won't perform DML statements on your database given certain questions. Be careful running it on sensitive data!\n",
|
||||
"\n",
|
||||
"This uses the example Chinook database. To set it up follow the instructions on https://database.guide/2-sample-databases-sqlite/, placing the .db file in a notebooks folder at the root of this repository."
|
||||
"This uses the example `Chinook` database. To set it up follow the instructions on https://database.guide/2-sample-databases-sqlite/, placing the .db file in a notebooks folder at the root of this repository."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "ec927ac6-9b2a-4e8a-9a6e-3e429191875c",
|
||||
"metadata": {
|
||||
@ -56,12 +55,11 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "f74d1792",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using ZERO_SHOT_REACT_DESCRIPTION\n",
|
||||
"## Using `ZERO_SHOT_REACT_DESCRIPTION`\n",
|
||||
"\n",
|
||||
"This shows how to initialize the agent using the ZERO_SHOT_REACT_DESCRIPTION agent type."
|
||||
]
|
||||
@ -84,7 +82,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "971cc455",
|
||||
"metadata": {},
|
||||
@ -110,7 +107,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "54c01168",
|
||||
"metadata": {},
|
||||
@ -136,7 +132,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "5a4a9455",
|
||||
"metadata": {},
|
||||
@ -147,7 +142,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "36ae48c7-cb08-4fef-977e-c7d4b96a464b",
|
||||
"metadata": {},
|
||||
@ -237,7 +231,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "9abcfe8e-1868-42a4-8345-ad2d9b44c681",
|
||||
"metadata": {},
|
||||
@ -312,7 +305,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "6fbc26af-97e4-4a21-82aa-48bdc992da26",
|
||||
"metadata": {},
|
||||
@ -495,7 +487,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "7c7503b5-d9d9-4faa-b064-29fcdb5ff213",
|
||||
"metadata": {},
|
||||
@ -639,7 +630,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -1,23 +1,21 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "18ada398-dce6-4049-9b56-fc0ede63da9c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Vectorstore Agent\n",
|
||||
"# Vectorstore\n",
|
||||
"\n",
|
||||
"This notebook showcases an agent designed to retrieve information from one or more vectorstores, either with or without sources."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "eecb683b-3a46-4b9d-81a3-7caefbfec1a1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the Vectorstores"
|
||||
"## Create Vectorstores"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -95,7 +93,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "f4814175-964d-42f1-aa9d-22801ce1e912",
|
||||
"metadata": {},
|
||||
@ -128,7 +125,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "8a38ad10",
|
||||
"metadata": {},
|
||||
@ -217,7 +213,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "7ca07707",
|
||||
"metadata": {},
|
||||
@ -263,7 +258,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "71680984-edaf-4a63-90f5-94edbd263550",
|
||||
"metadata": {},
|
||||
@ -422,7 +416,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Xorbits Agent"
|
||||
"# Xorbits"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -13,7 +13,7 @@
|
||||
"source": [
|
||||
"This notebook shows how to use agents to interact with [Xorbits Pandas](https://doc.xorbits.io/en/latest/reference/pandas/index.html) dataframe and [Xorbits Numpy](https://doc.xorbits.io/en/latest/reference/numpy/index.html) ndarray. It is mostly optimized for question answering.\n",
|
||||
"\n",
|
||||
"**NOTE: this agent calls the Python agent under the hood, which executes LLM generated Python code - this can be bad if the LLM generated Python code is harmful. Use cautiously.**"
|
||||
"**NOTE: this agent calls the `Python` agent under the hood, which executes LLM generated Python code - this can be bad if the LLM generated Python code is harmful. Use cautiously.**"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -734,9 +734,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.13"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
@ -1,13 +1,14 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Atlas\n",
|
||||
"\n",
|
||||
"\n",
|
||||
">[Atlas](https://docs.nomic.ai/index.html) is a platform for interacting with both small and internet scale unstructured datasets by `Nomic`. \n",
|
||||
">[Atlas](https://docs.nomic.ai/index.html) is a platform by Nomic made for interacting with both small and internet scale unstructured datasets. It enables anyone to visualize, search, and share massive datasets in their browser.\n",
|
||||
"\n",
|
||||
"This notebook shows you how to use functionality related to the `AtlasDB` vectorstore."
|
||||
]
|
||||
@ -49,6 +50,14 @@
|
||||
"!pip install nomic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load Packages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
@ -78,6 +87,14 @@
|
||||
"ATLAS_TEST_API_KEY = \"7xDPkYXSYDc1_ErdTPIcoAR9RNd8YDlkS3nVNXcVoIMZ6\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare the Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
@ -96,6 +113,14 @@
|
||||
"texts = [e.strip() for e in texts]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Map the Data using Nomic's Atlas"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -127,78 +152,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"\n",
|
||||
" <strong><a href=\"https://atlas.nomic.ai/dashboard/project/ee2354a3-7f9a-4c6b-af43-b0cda09d7198\">test_index_1677255228.136989</strong></a>\n",
|
||||
" <br>\n",
|
||||
" A description for your project 508 datums inserted.\n",
|
||||
" <br>\n",
|
||||
" 1 index built.\n",
|
||||
" <br><strong>Projections</strong>\n",
|
||||
"<ul>\n",
|
||||
"<li>test_index_1677255228.136989_index. Status Completed. <a target=\"_blank\" href=\"https://atlas.nomic.ai/map/ee2354a3-7f9a-4c6b-af43-b0cda09d7198/db996d77-8981-48a0-897a-ff2c22bbf541\">view online</a></li></ul><hr><script>\n",
|
||||
" destroy = function() {\n",
|
||||
" document.getElementById(\"iframedb996d77-8981-48a0-897a-ff2c22bbf541\").remove()\n",
|
||||
" }\n",
|
||||
" </script>\n",
|
||||
"\n",
|
||||
" <h4>Projection ID: db996d77-8981-48a0-897a-ff2c22bbf541</h4>\n",
|
||||
" <div class=\"actions\">\n",
|
||||
" <div id=\"hide\" class=\"action\" onclick=\"destroy()\">Hide embedded project</div>\n",
|
||||
" <div class=\"action\" id=\"out\">\n",
|
||||
" <a href=\"https://atlas.nomic.ai/map/ee2354a3-7f9a-4c6b-af43-b0cda09d7198/db996d77-8981-48a0-897a-ff2c22bbf541\" target=\"_blank\">Explore on atlas.nomic.ai</a>\n",
|
||||
" </div>\n",
|
||||
" </div>\n",
|
||||
" \n",
|
||||
" <iframe class=\"iframe\" id=\"iframedb996d77-8981-48a0-897a-ff2c22bbf541\" allow=\"clipboard-read; clipboard-write\" src=\"https://atlas.nomic.ai/map/ee2354a3-7f9a-4c6b-af43-b0cda09d7198/db996d77-8981-48a0-897a-ff2c22bbf541\">\n",
|
||||
" </iframe>\n",
|
||||
"\n",
|
||||
" <style>\n",
|
||||
" .iframe {\n",
|
||||
" /* vh can be **very** large in vscode html. */\n",
|
||||
" height: min(75vh, 66vw);\n",
|
||||
" width: 100%;\n",
|
||||
" }\n",
|
||||
" </style>\n",
|
||||
" \n",
|
||||
" <style>\n",
|
||||
" .actions {\n",
|
||||
" display: block;\n",
|
||||
" }\n",
|
||||
" .action {\n",
|
||||
" min-height: 18px;\n",
|
||||
" margin: 5px;\n",
|
||||
" transition: all 500ms ease-in-out;\n",
|
||||
" }\n",
|
||||
" .action:hover {\n",
|
||||
" cursor: pointer;\n",
|
||||
" }\n",
|
||||
" #hide:hover::after {\n",
|
||||
" content: \" X\";\n",
|
||||
" }\n",
|
||||
" #out:hover::after {\n",
|
||||
" content: \"\";\n",
|
||||
" }\n",
|
||||
" </style>\n",
|
||||
" "
|
||||
],
|
||||
"text/plain": [
|
||||
"AtlasProject: <{'id': 'ee2354a3-7f9a-4c6b-af43-b0cda09d7198', 'owner': '9c29afbb-a002-4d49-958e-ecf5ae1351ac', 'project_name': 'test_index_1677255228.136989', 'creator': 'auth0|63efc4b5462246f4d9a6ecf2', 'description': 'A description for your project', 'opensearch_index_id': 'f61fb8dd-0abf-4f31-9130-41870e443902', 'is_public': True, 'project_fields': ['atlas_id', 'text'], 'unique_id_field': 'atlas_id', 'modality': 'text', 'total_datums_in_project': 508, 'created_timestamp': '2023-02-24T16:13:50.313363+00:00', 'atlas_indices': [{'id': 'b1b01833-0964-4597-a4bc-a2d60700949d', 'project_id': 'ee2354a3-7f9a-4c6b-af43-b0cda09d7198', 'index_name': 'test_index_1677255228.136989_index', 'indexed_field': 'text', 'created_timestamp': '2023-02-24T16:13:52.957101+00:00', 'updated_timestamp': '2023-02-24T16:14:03.469621+00:00', 'atoms': ['charchunk', 'document'], 'colorable_fields': [], 'embedders': [{'id': '7ec0868a-4eed-4414-a482-25cce9803e1b', 'atlas_index_id': 'b1b01833-0964-4597-a4bc-a2d60700949d', 'ready': True, 'model_name': 'NomicEmbed', 'hyperparameters': {'norm': 'both', 'batch_size': 20, 'polymerize_by': 'charchunk', 'dataset_buffer_size': 1000}}], 'nearest_neighbor_indices': [{'id': '86f8e3ff-e07c-4678-a4d7-144db4b0301d', 'index_name': 'NomicOrganize', 'ready': True, 'hyperparameters': {'dim': 384, 'space': 'l2'}, 'atom_strategies': ['document']}], 'projections': [{'id': 'db996d77-8981-48a0-897a-ff2c22bbf541', 'projection_name': 'NomicProject', 'ready': True, 'hyperparameters': {'spread': 1.0, 'n_epochs': 50, 'n_neighbors': 15}, 'atom_strategies': ['document'], 'created_timestamp': '2023-02-24T16:13:52.979561+00:00', 'updated_timestamp': '2023-02-24T16:14:03.466309+00:00'}]}], 'insert_update_delete_lock': False}>"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db.project"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here is a map with the result of this code. This map displays the texts of the State of the Union.\n",
|
||||
"https://atlas.nomic.ai/map/3e4de075-89ff-486a-845c-36c23f30bb67/d8ce2284-8edb-4050-8b9b-9bb543d7f647"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
@ -6,7 +6,9 @@
|
||||
"source": [
|
||||
"# Azure Cognitive Search\n",
|
||||
"\n",
|
||||
"[Azure Cognitive Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n"
|
||||
"[Azure Cognitive Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n",
|
||||
"\n",
|
||||
"Vector search is currently in public preview. It's available through the Azure portal, preview REST API and beta client libraries. [More info](https://learn.microsoft.com/en-us/azure/search/vector-search-overview) Beta client libraries are subject to potential breaking changes, please be sure to use the SDK package version identified below. azure-search-documents==11.4.0b8"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -22,7 +24,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install azure-search-documents==11.4.0b6\n",
|
||||
"!pip install azure-search-documents==11.4.0b8\n",
|
||||
"!pip install azure-identity"
|
||||
]
|
||||
},
|
||||
@ -36,13 +38,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import openai\n",
|
||||
"import os\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores.azuresearch import AzureSearch"
|
||||
]
|
||||
},
|
||||
@ -57,7 +59,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -79,7 +81,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -98,7 +100,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -151,7 +153,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -178,6 +180,41 @@
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Perform a vector similarity search with relevance scores\n",
|
||||
" \n",
|
||||
"Execute a pure vector similarity search using the similarity_search_with_relevance_scores() method:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[(Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': 'C:\\\\repos\\\\langchain-fruocco-acs\\\\langchain\\\\docs\\\\extras\\\\modules\\\\state_of_the_union.txt'}),\n",
|
||||
" 0.8441472),\n",
|
||||
" (Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': 'C:\\\\repos\\\\langchain-fruocco-acs\\\\langchain\\\\docs\\\\extras\\\\modules\\\\state_of_the_union.txt'}),\n",
|
||||
" 0.8441472),\n",
|
||||
" (Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': 'C:\\\\repos\\\\langchain-fruocco-acs\\\\langchain\\\\docs\\\\extras\\\\modules\\\\state_of_the_union.txt'}),\n",
|
||||
" 0.82153815),\n",
|
||||
" (Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': 'C:\\\\repos\\\\langchain-fruocco-acs\\\\langchain\\\\docs\\\\extras\\\\modules\\\\state_of_the_union.txt'}),\n",
|
||||
" 0.82153815)]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs_and_scores = vector_store.similarity_search_with_relevance_scores(query=\"What did the president say about Ketanji Brown Jackson\", k=4, score_threshold=0.80)\n",
|
||||
"from pprint import pprint\n",
|
||||
"pprint(docs_and_scores)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
@ -190,7 +227,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -219,7 +256,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -254,7 +291,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -328,7 +365,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -348,7 +385,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -371,7 +408,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -400,7 +437,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -494,7 +531,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -530,7 +567,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -5,7 +5,7 @@
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ClickHouse Vector Search\n",
|
||||
"# ClickHouse\n",
|
||||
"\n",
|
||||
"> [ClickHouse](https://clickhouse.com/) is the fastest and most resource efficient open-source database for real-time apps and analytics with full SQL support and a wide range of functions to assist users in writing analytical queries. Lately added data structures and distance search functions (like `L2Distance`) as well as [approximate nearest neighbor search indexes](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/annindexes) enable ClickHouse to be used as a high performance and scalable vector database to store and search vectors with SQL.\n",
|
||||
"\n",
|
||||
@ -198,8 +198,7 @@
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:28:58.252991Z",
|
||||
"start_time": "2023-06-03T08:28:58.197560Z"
|
||||
},
|
||||
"scrolled": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@ -246,9 +245,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "54f4f561",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -395,7 +392,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -1,20 +1,18 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "2ce41f46-5711-4311-b04d-2fe233ac5b1b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DocArrayHnswSearch\n",
|
||||
"# DocArray HnswSearch\n",
|
||||
"\n",
|
||||
">[DocArrayHnswSearch](https://docs.docarray.org/user_guide/storing/index_hnswlib/) is a lightweight Document Index implementation provided by [Docarray](https://docs.docarray.org/) that runs fully locally and is best suited for small- to medium-sized datasets. It stores vectors on disk in [hnswlib](https://github.com/nmslib/hnswlib), and stores all other data in [SQLite](https://www.sqlite.org/index.html).\n",
|
||||
">[DocArrayHnswSearch](https://docs.docarray.org/user_guide/storing/index_hnswlib/) is a lightweight Document Index implementation provided by [Docarray](https://github.com/docarray/docarray) that runs fully locally and is best suited for small- to medium-sized datasets. It stores vectors on disk in [hnswlib](https://github.com/nmslib/hnswlib), and stores all other data in [SQLite](https://www.sqlite.org/index.html).\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `DocArrayHnswSearch`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "7ee37d28",
|
||||
"metadata": {},
|
||||
@ -57,7 +55,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "8dbb6de2",
|
||||
"metadata": {
|
||||
@ -103,7 +100,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "ed6f905b-4853-4a44-9730-614aa8e22b78",
|
||||
"metadata": {},
|
||||
@ -151,7 +147,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "3febb987-e903-416f-af26-6897d84c8d61",
|
||||
"metadata": {},
|
||||
@ -160,7 +155,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "bb1df11a",
|
||||
"metadata": {},
|
||||
@ -236,7 +230,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -1,20 +1,18 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "a3afefb0-7e99-4912-a222-c6b186da11af",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DocArrayInMemorySearch\n",
|
||||
"# DocArray InMemorySearch\n",
|
||||
"\n",
|
||||
">[DocArrayInMemorySearch](https://docs.docarray.org/user_guide/storing/index_in_memory/) is a document index provided by [Docarray](https://docs.docarray.org/) that stores documents in memory. It is a great starting point for small datasets, where you may not want to launch a database server.\n",
|
||||
">[DocArrayInMemorySearch](https://docs.docarray.org/user_guide/storing/index_in_memory/) is a document index provided by [Docarray](https://github.com/docarray/docarray) that stores documents in memory. It is a great starting point for small datasets, where you may not want to launch a database server.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `DocArrayInMemorySearch`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "5031a3ec",
|
||||
"metadata": {},
|
||||
@ -56,7 +54,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "6e57a389-f637-4b8f-9ab2-759ae7485f78",
|
||||
"metadata": {},
|
||||
@ -98,7 +95,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "efbb6684-3846-4332-a624-ddd4d75844c1",
|
||||
"metadata": {},
|
||||
@ -146,7 +142,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "43896697-f99e-47b6-9117-47a25e9afa9c",
|
||||
"metadata": {},
|
||||
@ -155,7 +150,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "414a9bc9",
|
||||
"metadata": {},
|
||||
@ -224,7 +218,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user