mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-06 17:20:16 +00:00
Compare commits
90 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6fbdb9ce51 | ||
|
|
04475bea7d | ||
|
|
1ad180f6de | ||
|
|
274dc4bc53 | ||
|
|
05e749d9fe | ||
|
|
80558b5b27 | ||
|
|
3637d6da6e | ||
|
|
65f85af242 | ||
|
|
f6c97e6af4 | ||
|
|
f0cfed636f | ||
|
|
6b8d144ccc | ||
|
|
d383c0cb43 | ||
|
|
28091c2101 | ||
|
|
5c8e12558d | ||
|
|
2b14036126 | ||
|
|
f2150285a4 | ||
|
|
e4ca511ec8 | ||
|
|
9fafe7b2b9 | ||
|
|
6335cb5b3a | ||
|
|
872605a5c5 | ||
|
|
ce15ffae6a | ||
|
|
ea83eed9ba | ||
|
|
2b4ba203f7 | ||
|
|
2ceb807da2 | ||
|
|
ae0c3382dd | ||
|
|
c485e7ab59 | ||
|
|
0d568daacb | ||
|
|
04f765b838 | ||
|
|
c73cec5ac1 | ||
|
|
f1401a6dff | ||
|
|
deffc65693 | ||
|
|
ba0057c077 | ||
|
|
02ebb15c4a | ||
|
|
782df1db10 | ||
|
|
b3ecce0545 | ||
|
|
b04d84f6b3 | ||
|
|
aa11f7c89b | ||
|
|
f4c8502e61 | ||
|
|
d84df25466 | ||
|
|
42df78d396 | ||
|
|
8b284f9ad0 | ||
|
|
35c9e6ab40 | ||
|
|
0870a45a69 | ||
|
|
8a338412fa | ||
|
|
f510940bde | ||
|
|
c8b0b6e6c1 | ||
|
|
1d1166ded6 | ||
|
|
637c61cffb | ||
|
|
65c95f9fb2 | ||
|
|
edcd171535 | ||
|
|
6f386628c2 | ||
|
|
a1001b29eb | ||
|
|
f70e18a5b3 | ||
|
|
0c646bb703 | ||
|
|
04b74d0446 | ||
|
|
075d9631f5 | ||
|
|
64940e9d0f | ||
|
|
747b5f87c2 | ||
|
|
6cd51ef3d0 | ||
|
|
43a7a89e93 | ||
|
|
9544b30821 | ||
|
|
423f497168 | ||
|
|
5ca13cc1f0 | ||
|
|
59204a5033 | ||
|
|
eeb7c96e0c | ||
|
|
f1fc4dfebc | ||
|
|
2324f19c85 | ||
|
|
76ed41f48a | ||
|
|
1017e5cee2 | ||
|
|
a30f42da4e | ||
|
|
c3044b1bf0 | ||
|
|
6567b73e1a | ||
|
|
bb6d97c18c | ||
|
|
19e28d8784 | ||
|
|
2a3c5f8353 | ||
|
|
a57259ec83 | ||
|
|
7dcc698ebf | ||
|
|
26534457f5 | ||
|
|
3095546851 | ||
|
|
b1e2e29222 | ||
|
|
84cfa76e00 | ||
|
|
d84bb02881 | ||
|
|
905a2114d7 | ||
|
|
8de1b4c4c2 | ||
|
|
878d0c8155 | ||
|
|
6032a051e9 | ||
|
|
fea639c1fc | ||
|
|
2f087d63af | ||
|
|
cc068f1b77 | ||
|
|
ac0a9d02bd |
42
.devcontainer/Dockerfile
Normal file
42
.devcontainer/Dockerfile
Normal file
@@ -0,0 +1,42 @@
|
||||
# This is a Dockerfile for Developer Container
|
||||
|
||||
# Use the Python base image
|
||||
ARG VARIANT="3.11-bullseye"
|
||||
FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT} AS langchain-dev-base
|
||||
|
||||
USER vscode
|
||||
|
||||
# Define the version of Poetry to install (default is 1.4.2)
|
||||
# Define the directory of python virtual environment
|
||||
ARG PYTHON_VIRTUALENV_HOME=/home/vscode/langchain-py-env \
|
||||
POETRY_VERSION=1.4.2
|
||||
|
||||
ENV POETRY_VIRTUALENVS_IN_PROJECT=false \
|
||||
POETRY_NO_INTERACTION=true
|
||||
|
||||
# Create a Python virtual environment for Poetry and install it
|
||||
RUN python3 -m venv ${PYTHON_VIRTUALENV_HOME} && \
|
||||
$PYTHON_VIRTUALENV_HOME/bin/pip install --upgrade pip && \
|
||||
$PYTHON_VIRTUALENV_HOME/bin/pip install poetry==${POETRY_VERSION}
|
||||
|
||||
ENV PATH="$PYTHON_VIRTUALENV_HOME/bin:$PATH" \
|
||||
VIRTUAL_ENV=$PYTHON_VIRTUALENV_HOME
|
||||
|
||||
# Setup for bash
|
||||
RUN poetry completions bash >> /home/vscode/.bash_completion && \
|
||||
echo "export PATH=$PYTHON_VIRTUALENV_HOME/bin:$PATH" >> ~/.bashrc
|
||||
|
||||
# Set the working directory for the app
|
||||
WORKDIR /workspaces/langchain
|
||||
|
||||
# Use a multi-stage build to install dependencies
|
||||
FROM langchain-dev-base AS langchain-dev-dependencies
|
||||
|
||||
ARG PYTHON_VIRTUALENV_HOME
|
||||
|
||||
# Copy only the dependency files for installation
|
||||
COPY pyproject.toml poetry.lock poetry.toml ./
|
||||
|
||||
# Install the Poetry dependencies (this layer will be cached as long as the dependencies don't change)
|
||||
RUN poetry install --no-interaction --no-ansi --with dev,test,docs
|
||||
|
||||
33
.devcontainer/devcontainer.json
Normal file
33
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,33 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile
|
||||
{
|
||||
"dockerComposeFile": "./docker-compose.yaml",
|
||||
"service": "langchain",
|
||||
"workspaceFolder": "/workspaces/langchain",
|
||||
"name": "langchain",
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"ms-python.python"
|
||||
],
|
||||
"settings": {
|
||||
"python.defaultInterpreterPath": "/home/vscode/langchain-py-env/bin/python3.11"
|
||||
}
|
||||
}
|
||||
|
||||
},
|
||||
|
||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||
"features": {},
|
||||
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
|
||||
// Uncomment the next line to run commands after the container is created.
|
||||
// "postCreateCommand": "cat /etc/os-release",
|
||||
|
||||
// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "devcontainer"
|
||||
"remoteUser": "vscode",
|
||||
"overrideCommand": true
|
||||
}
|
||||
31
.devcontainer/docker-compose.yaml
Normal file
31
.devcontainer/docker-compose.yaml
Normal file
@@ -0,0 +1,31 @@
|
||||
version: '3'
|
||||
services:
|
||||
langchain:
|
||||
build:
|
||||
dockerfile: .devcontainer/Dockerfile
|
||||
context: ../
|
||||
volumes:
|
||||
- ../:/workspaces/langchain
|
||||
networks:
|
||||
- langchain-network
|
||||
# environment:
|
||||
# MONGO_ROOT_USERNAME: root
|
||||
# MONGO_ROOT_PASSWORD: example123
|
||||
# depends_on:
|
||||
# - mongo
|
||||
# mongo:
|
||||
# image: mongo
|
||||
# restart: unless-stopped
|
||||
# environment:
|
||||
# MONGO_INITDB_ROOT_USERNAME: root
|
||||
# MONGO_INITDB_ROOT_PASSWORD: example123
|
||||
# ports:
|
||||
# - "27017:27017"
|
||||
# networks:
|
||||
# - langchain-network
|
||||
|
||||
networks:
|
||||
langchain-network:
|
||||
driver: bridge
|
||||
|
||||
|
||||
106
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
Normal file
106
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
name: "\U0001F41B Bug Report"
|
||||
description: Submit a bug report to help us improve LangChain
|
||||
labels: ["02 Bug Report"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: >
|
||||
Thank you for taking the time to file a bug report. Before creating a new
|
||||
issue, please make sure to take a few moments to check the issue tracker
|
||||
for existing issues about the bug.
|
||||
|
||||
- type: textarea
|
||||
id: system-info
|
||||
attributes:
|
||||
label: System Info
|
||||
description: Please share your system info with us.
|
||||
placeholder: LangChain version, platform, python version, ...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: who-can-help
|
||||
attributes:
|
||||
label: Who can help?
|
||||
description: |
|
||||
Your issue will be replied to more quickly if you can figure out the right person to tag with @
|
||||
If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**.
|
||||
|
||||
The core maintainers strive to read all issues, but tagging them will help them prioritize.
|
||||
|
||||
Please tag fewer than 3 people.
|
||||
|
||||
@hwchase17 - project lead
|
||||
|
||||
Tracing / Callbacks
|
||||
- @agola11
|
||||
|
||||
Async
|
||||
- @agola11
|
||||
|
||||
DataLoader Abstractions
|
||||
- @eyurtsev
|
||||
|
||||
LLM/Chat Wrappers
|
||||
- @hwchase17
|
||||
- @agola11
|
||||
|
||||
Tools / Toolkits
|
||||
- @vowelparrot
|
||||
|
||||
placeholder: "@Username ..."
|
||||
|
||||
- type: checkboxes
|
||||
id: information-scripts-examples
|
||||
attributes:
|
||||
label: Information
|
||||
description: "The problem arises when using:"
|
||||
options:
|
||||
- label: "The official example notebooks/scripts"
|
||||
- label: "My own modified scripts"
|
||||
|
||||
- type: checkboxes
|
||||
id: related-components
|
||||
attributes:
|
||||
label: Related Components
|
||||
description: "Select the components related to the issue (if applicable):"
|
||||
options:
|
||||
- label: "LLMs/Chat Models"
|
||||
- label: "Embedding Models"
|
||||
- label: "Prompts / Prompt Templates / Prompt Selectors"
|
||||
- label: "Output Parsers"
|
||||
- label: "Document Loaders"
|
||||
- label: "Vector Stores / Retrievers"
|
||||
- label: "Memory"
|
||||
- label: "Agents / Agent Executors"
|
||||
- label: "Tools / Toolkits"
|
||||
- label: "Chains"
|
||||
- label: "Callbacks/Tracing"
|
||||
- label: "Async"
|
||||
|
||||
- type: textarea
|
||||
id: reproduction
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Reproduction
|
||||
description: |
|
||||
Please provide a [code sample](https://stackoverflow.com/help/minimal-reproducible-example) that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
|
||||
If you have code snippets, error messages, stack traces please provide them here as well.
|
||||
Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
|
||||
Avoid screenshots when possible, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
|
||||
|
||||
placeholder: |
|
||||
Steps to reproduce the behavior:
|
||||
|
||||
1.
|
||||
2.
|
||||
3.
|
||||
|
||||
- type: textarea
|
||||
id: expected-behavior
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Expected behavior
|
||||
description: "A clear and concise description of what you would expect to happen."
|
||||
6
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
6
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
blank_issues_enabled: true
|
||||
version: 2.1
|
||||
contact_links:
|
||||
- name: Discord
|
||||
url: https://discord.gg/6adMQxSpJS
|
||||
about: General community discussions
|
||||
19
.github/ISSUE_TEMPLATE/documentation.yml
vendored
Normal file
19
.github/ISSUE_TEMPLATE/documentation.yml
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
name: Documentation
|
||||
description: Report an issue related to the LangChain documentation.
|
||||
title: "DOC: <Please write a comprehensive title after the 'DOC: ' prefix>"
|
||||
labels: [03 - Documentation]
|
||||
|
||||
body:
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Issue with current documentation:"
|
||||
description: >
|
||||
Please make sure to leave a reference to the document/code you're
|
||||
referring to.
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Idea or request for content:"
|
||||
description: >
|
||||
Please describe as clearly as possible what topics you think are missing
|
||||
from the current documentation.
|
||||
30
.github/ISSUE_TEMPLATE/feature-request.yml
vendored
Normal file
30
.github/ISSUE_TEMPLATE/feature-request.yml
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
name: "\U0001F680 Feature request"
|
||||
description: Submit a proposal/request for a new LangChain feature
|
||||
labels: ["02 Feature Request"]
|
||||
body:
|
||||
- type: textarea
|
||||
id: feature-request
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Feature request
|
||||
description: |
|
||||
A clear and concise description of the feature proposal. Please provide links to any relevant GitHub repos, papers, or other resources if relevant.
|
||||
|
||||
- type: textarea
|
||||
id: motivation
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Motivation
|
||||
description: |
|
||||
Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
|
||||
|
||||
- type: textarea
|
||||
id: contribution
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Your contribution
|
||||
description: |
|
||||
Is there any way that you could help, e.g. by submitting a PR? Make sure to read the CONTRIBUTING.MD [readme](https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md)
|
||||
18
.github/ISSUE_TEMPLATE/other.yml
vendored
Normal file
18
.github/ISSUE_TEMPLATE/other.yml
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
name: Other Issue
|
||||
description: Raise an issue that wouldn't be covered by the other templates.
|
||||
title: "Issue: <Please write a comprehensive title after the 'Issue: ' prefix>"
|
||||
labels: [04 - Other]
|
||||
|
||||
body:
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Issue you'd like to raise."
|
||||
description: >
|
||||
Please describe the issue you'd like to raise as clearly as possible.
|
||||
Make sure to include any relevant links or references.
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Suggestion:"
|
||||
description: >
|
||||
Please outline a suggestion to improve the issue here.
|
||||
42
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
42
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
# Your PR Title (What it does)
|
||||
|
||||
<!--
|
||||
Thank you for contributing to LangChain! Your PR will appear in our next release under the title you set. Please make sure it highlights your valuable contribution.
|
||||
|
||||
Replace this with a description of the change, the issue it fixes (if applicable), and relevant context. List any dependencies required for this change.
|
||||
|
||||
After you're done, someone will review your PR. They may suggest improvements. If no one reviews your PR within a few days, feel free to @-mention the same people again, as notifications can get lost.
|
||||
-->
|
||||
|
||||
<!-- Remove if not applicable -->
|
||||
|
||||
Fixes # (issue)
|
||||
|
||||
## Before submitting
|
||||
|
||||
<!-- If you're adding a new integration, include an integration test and an example notebook showing its use! -->
|
||||
|
||||
## Who can review?
|
||||
|
||||
Community members can review the PR once tests pass. Tag maintainers/contributors who might be interested:
|
||||
|
||||
<!-- For a quicker response, figure out the right person to tag with @
|
||||
|
||||
@hwchase17 - project lead
|
||||
|
||||
Tracing / Callbacks
|
||||
- @agola11
|
||||
|
||||
Async
|
||||
- @agola11
|
||||
|
||||
DataLoader Abstractions
|
||||
- @eyurtsev
|
||||
|
||||
LLM/Chat Wrappers
|
||||
- @hwchase17
|
||||
- @agola11
|
||||
|
||||
Tools / Toolkits
|
||||
- @vowelparrot
|
||||
-->
|
||||
64
.github/actions/poetry_setup/action.yml
vendored
Normal file
64
.github/actions/poetry_setup/action.yml
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
# An action for setting up poetry install with caching.
|
||||
# Using a custom action since the default action does not
|
||||
# take poetry install groups into account.
|
||||
# Action code from:
|
||||
# https://github.com/actions/setup-python/issues/505#issuecomment-1273013236
|
||||
name: poetry-install-with-caching
|
||||
description: Poetry install with support for caching of dependency groups.
|
||||
|
||||
inputs:
|
||||
python-version:
|
||||
description: Python version, supporting MAJOR.MINOR only
|
||||
required: true
|
||||
|
||||
poetry-version:
|
||||
description: Poetry version
|
||||
required: true
|
||||
|
||||
install-command:
|
||||
description: Command run for installing dependencies
|
||||
required: false
|
||||
default: poetry install
|
||||
|
||||
cache-key:
|
||||
description: Cache key to use for manual handling of caching
|
||||
required: true
|
||||
|
||||
working-directory:
|
||||
description: Directory to run install-command in
|
||||
required: false
|
||||
default: ""
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- uses: actions/cache@v3
|
||||
id: cache-pip
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pip
|
||||
key: pip-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}
|
||||
|
||||
- run: pipx install poetry==${{ inputs.poetry-version }} --python python${{ inputs.python-version }}
|
||||
shell: bash
|
||||
|
||||
- uses: actions/cache@v3
|
||||
id: cache-poetry
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pypoetry/virtualenvs
|
||||
~/.cache/pypoetry/cache
|
||||
~/.cache/pypoetry/artifacts
|
||||
key: poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
- run: ${{ inputs.install-command }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
shell: bash
|
||||
10
.github/workflows/test.yml
vendored
10
.github/workflows/test.yml
vendored
@@ -20,15 +20,13 @@ jobs:
|
||||
- "3.11"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install poetry
|
||||
run: pipx install poetry==$POETRY_VERSION
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: "poetry"
|
||||
- name: Install dependencies
|
||||
run: poetry install
|
||||
poetry-version: "1.4.2"
|
||||
cache-key: "main"
|
||||
install-command: "poetry install"
|
||||
- name: Run unit tests
|
||||
run: |
|
||||
make test
|
||||
|
||||
33
.github/workflows/test_all.yml
vendored
Normal file
33
.github/workflows/test_all.yml
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
# Run unit tests with all optional packages installed.
|
||||
name: test_all
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.4.2"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: "1.4.2"
|
||||
cache-key: "extended"
|
||||
install-command: "poetry install -E extended_testing"
|
||||
- name: Run unit tests
|
||||
run: |
|
||||
make test
|
||||
26
.readthedocs.yaml
Normal file
26
.readthedocs.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
# Read the Docs configuration file
|
||||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
||||
|
||||
# Required
|
||||
version: 2
|
||||
|
||||
# Set the version of Python and other tools you might need
|
||||
build:
|
||||
os: ubuntu-22.04
|
||||
tools:
|
||||
python: "3.11"
|
||||
|
||||
# Build documentation in the docs/ directory with Sphinx
|
||||
sphinx:
|
||||
configuration: docs/conf.py
|
||||
|
||||
# If using Sphinx, optionally build your docs in additional formats such as PDF
|
||||
# formats:
|
||||
# - pdf
|
||||
|
||||
# Optionally declare the Python requirements required to build your docs
|
||||
python:
|
||||
install:
|
||||
- requirements: docs/requirements.txt
|
||||
- method: pip
|
||||
path: .
|
||||
27
Makefile
27
Makefile
@@ -32,11 +32,13 @@ lint lint_diff:
|
||||
poetry run black $(PYTHON_FILES) --check
|
||||
poetry run ruff .
|
||||
|
||||
TEST_FILE ?= tests/unit_tests/
|
||||
|
||||
test:
|
||||
poetry run pytest tests/unit_tests
|
||||
poetry run pytest $(TEST_FILE)
|
||||
|
||||
tests:
|
||||
poetry run pytest tests/unit_tests
|
||||
poetry run pytest $(TEST_FILE)
|
||||
|
||||
test_watch:
|
||||
poetry run ptw --now . -- tests/unit_tests
|
||||
@@ -50,13 +52,14 @@ docker_tests:
|
||||
|
||||
help:
|
||||
@echo '----'
|
||||
@echo 'coverage - run unit tests and generate coverage report'
|
||||
@echo 'docs_build - build the documentation'
|
||||
@echo 'docs_clean - clean the documentation build artifacts'
|
||||
@echo 'docs_linkcheck - run linkchecker on the documentation'
|
||||
@echo 'format - run code formatters'
|
||||
@echo 'lint - run linters'
|
||||
@echo 'test - run unit tests'
|
||||
@echo 'test_watch - run unit tests in watch mode'
|
||||
@echo 'integration_tests - run integration tests'
|
||||
@echo 'docker_tests - run unit tests in docker'
|
||||
@echo 'coverage - run unit tests and generate coverage report'
|
||||
@echo 'docs_build - build the documentation'
|
||||
@echo 'docs_clean - clean the documentation build artifacts'
|
||||
@echo 'docs_linkcheck - run linkchecker on the documentation'
|
||||
@echo 'format - run code formatters'
|
||||
@echo 'lint - run linters'
|
||||
@echo 'test - run unit tests'
|
||||
@echo 'test TEST_FILE=<test_file> - run all tests in file'
|
||||
@echo 'test_watch - run unit tests in watch mode'
|
||||
@echo 'integration_tests - run integration tests'
|
||||
@echo 'docker_tests - run unit tests in docker'
|
||||
|
||||
12
README.md
12
README.md
@@ -2,7 +2,17 @@
|
||||
|
||||
⚡ Building applications with LLMs through composability ⚡
|
||||
|
||||
[](https://github.com/hwchase17/langchain/actions/workflows/lint.yml) [](https://github.com/hwchase17/langchain/actions/workflows/test.yml) [](https://github.com/hwchase17/langchain/actions/workflows/linkcheck.yml) [](https://pepy.tech/project/langchain) [](https://opensource.org/licenses/MIT) [](https://twitter.com/langchainai) [](https://discord.gg/6adMQxSpJS)
|
||||
[](https://github.com/hwchase17/langchain/actions/workflows/lint.yml)
|
||||
[](https://github.com/hwchase17/langchain/actions/workflows/test.yml)
|
||||
[](https://github.com/hwchase17/langchain/actions/workflows/linkcheck.yml)
|
||||
[](https://pepy.tech/project/langchain)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://twitter.com/langchainai)
|
||||
[](https://discord.gg/6adMQxSpJS)
|
||||
[](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/hwchase17/langchain)
|
||||
[](https://codespaces.new/hwchase17/langchain)
|
||||
[](https://star-history.com/#hwchase17/langchain)
|
||||
|
||||
|
||||
Looking for the JS/TS version? Check out [LangChain.js](https://github.com/hwchase17/langchainjs).
|
||||
|
||||
|
||||
2
docs/_static/js/mendablesearch.js
vendored
2
docs/_static/js/mendablesearch.js
vendored
@@ -52,7 +52,7 @@ document.addEventListener('DOMContentLoaded', () => {
|
||||
|
||||
loadScript('https://unpkg.com/react@17/umd/react.production.min.js', () => {
|
||||
loadScript('https://unpkg.com/react-dom@17/umd/react-dom.production.min.js', () => {
|
||||
loadScript('https://unpkg.com/@mendable/search@0.0.83/dist/umd/mendable.min.js', initializeMendable);
|
||||
loadScript('https://unpkg.com/@mendable/search@0.0.93/dist/umd/mendable.min.js', initializeMendable);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -10,12 +10,24 @@ but potentially an unknown chain that depends on the user's input.
|
||||
In these types of chains, there is a “agent” which has access to a suite of tools.
|
||||
Depending on the user input, the agent can then decide which, if any, of these tools to call.
|
||||
|
||||
At the moment, there are two main types of agents:
|
||||
|
||||
1. "Action Agents": these agents decide an action to take and take that action one step at a time
|
||||
2. "Plan-and-Execute Agents": these agents first decide a plan of actions to take, and then execute those actions one at a time.
|
||||
|
||||
When should you use each one? Action Agents are more conventional, and good for small tasks.
|
||||
For more complex or long running tasks, the initial planning step helps to maintain long term objectives and focus. However, that comes at the expense of generally more calls and higher latency.
|
||||
These two agents are also not mutually exclusive - in fact, it is often best to have an Action Agent be in change of the execution for the Plan and Execute agent.
|
||||
|
||||
Action Agents
|
||||
-------------
|
||||
|
||||
High level pseudocode of agents looks something like:
|
||||
|
||||
- Some user input is received
|
||||
- The `agent` decides which `tool` - if any - to use, and what the input to that tool should be
|
||||
- That `tool` is then called with that `tool input`, and an `observation` is recorded (this is just the output of calling that tool with that tool input.
|
||||
- That history of `tool`, `tool input`, and `observation` is passed back into the `agent`, and it decides what steps to take next
|
||||
- That `tool` is then called with that `tool input`, and an `observation` is recorded (this is just the output of calling that tool with that tool input)
|
||||
- That history of `tool`, `tool input`, and `observation` is passed back into the `agent`, and it decides what step to take next
|
||||
- This is repeated until the `agent` decides it no longer needs to use a `tool`, and then it responds directly to the user.
|
||||
|
||||
The different abstractions involved in agents are as follows:
|
||||
@@ -69,8 +81,7 @@ In this section we go over the Agent Executor class, which is responsible for ca
|
||||
the agent and tools in a loop. We go over different ways to customize this, and options you
|
||||
can use for more control.
|
||||
|
||||
Go Deeper
|
||||
---------
|
||||
**Go Deeper**
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
@@ -79,3 +90,23 @@ Go Deeper
|
||||
./agents/agents.rst
|
||||
./agents/toolkits.rst
|
||||
./agents/agent_executors.rst
|
||||
|
||||
Plan-and-Execute Agents
|
||||
-----------------------
|
||||
|
||||
High level pseudocode of agents looks something like:
|
||||
|
||||
- Some user input is received
|
||||
- The planner lists out the steps to take
|
||||
- The executor goes through the list of steps, executing them
|
||||
|
||||
The most typical implementation is to have the planner be a language model,
|
||||
and the executor be an action agent.
|
||||
|
||||
**Go Deeper**
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
./agents/plan_and_execute.ipynb
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 1,
|
||||
"id": "9af9734e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -100,13 +100,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 12,
|
||||
"id": "339b1bb8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set up the base template\n",
|
||||
"template = \"\"\"Answer the following questions as best you can, but speaking as a pirate might speak. You have access to the following tools:\n",
|
||||
"template = \"\"\"Complete the objective as best you can. You have access to the following tools:\n",
|
||||
"\n",
|
||||
"{tools}\n",
|
||||
"\n",
|
||||
@@ -121,7 +121,11 @@
|
||||
"Thought: I now know the final answer\n",
|
||||
"Final Answer: the final answer to the original input question\n",
|
||||
"\n",
|
||||
"Begin! Remember to speak as a pirate when giving your final answer. Use lots of \"Arg\"s\n",
|
||||
"These were previous tasks you completed:\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Begin!\n",
|
||||
"\n",
|
||||
"Question: {input}\n",
|
||||
"{agent_scratchpad}\"\"\""
|
||||
@@ -129,7 +133,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 13,
|
||||
"id": "fd969d31",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -161,7 +165,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 14,
|
||||
"id": "798ef9fb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -189,7 +193,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 15,
|
||||
"id": "7c6fe0d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -218,7 +222,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 16,
|
||||
"id": "d278706a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -238,7 +242,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 17,
|
||||
"id": "f9d4c374",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -270,7 +274,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 18,
|
||||
"id": "9b1cc2a2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -281,7 +285,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 19,
|
||||
"id": "e4f5092f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -307,7 +311,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 20,
|
||||
"id": "490604e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -317,7 +321,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 21,
|
||||
"id": "653b1617",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -328,16 +332,13 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: Wot year be it now? That be important to know the answer.\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I should use a reliable search engine to get accurate information.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"current population canada 2023\"\u001b[0m\n",
|
||||
"Action Input: \"Leo DiCaprio girlfriend\"\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3m38,649,283\u001b[0m\u001b[32;1m\u001b[1;3mAhoy! That be the correct year, but the answer be in regular numbers. 'Tis time to translate to pirate speak.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"38,649,283 in pirate speak\"\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3mBrush up on your “Pirate Talk” with these helpful pirate phrases. Aaaarrrrgggghhhh! Pirate catch phrase of grumbling or disgust. Ahoy! Hello! Ahoy, Matey, Hello ...\u001b[0m\u001b[32;1m\u001b[1;3mThat be not helpful, I'll just do the translation meself.\n",
|
||||
"Final Answer: Arrrr, thar be 38,649,283 scallywags in Canada as of 2023.\u001b[0m\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3mHe went on to date Gisele Bündchen, Bar Refaeli, Blake Lively, Toni Garrn and Nina Agdal, among others, before finally settling down with current girlfriend Camila Morrone, who is 23 years his junior.\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mI have found the answer to the question.\n",
|
||||
"Final Answer: Leo DiCaprio's current girlfriend is Camila Morrone.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -345,16 +346,16 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Arrrr, thar be 38,649,283 scallywags in Canada as of 2023.'"
|
||||
"\"Leo DiCaprio's current girlfriend is Camila Morrone.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"How many people live in canada as of 2023?\")"
|
||||
"agent_executor.run(\"Search for Leo DiCaprio's girlfriend on the internet.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
362
docs/modules/agents/plan_and_execute.ipynb
Normal file
362
docs/modules/agents/plan_and_execute.ipynb
Normal file
@@ -0,0 +1,362 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "406483c4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Plan and Execute\n",
|
||||
"\n",
|
||||
"Plan and execute agents accomplish an objective by first planning what to do, then executing the sub tasks. This idea is largely inspired by [BabyAGI](https://github.com/yoheinakajima/babyagi) and then the [\"Plan-and-Solve\" paper](https://arxiv.org/abs/2305.04091).\n",
|
||||
"\n",
|
||||
"The planning is almost always done by an LLM.\n",
|
||||
"\n",
|
||||
"The execution is usually done by a separate agent (equipped with tools)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "91192118",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Imports"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "6ccd1dc5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain import SerpAPIWrapper\n",
|
||||
"from langchain.agents.tools import Tool\n",
|
||||
"from langchain import LLMMathChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0b10d200",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "3c00f724",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"llm_math_chain = LLMMathChain.from_llm(llm=llm, verbose=True)\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name = \"Search\",\n",
|
||||
" func=search.run,\n",
|
||||
" description=\"useful for when you need to answer questions about current events\"\n",
|
||||
" ),\n",
|
||||
" Tool(\n",
|
||||
" name=\"Calculator\",\n",
|
||||
" func=llm_math_chain.run,\n",
|
||||
" description=\"useful for when you need to answer questions about math\"\n",
|
||||
" ),\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ce38ae84",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Planner, Executor, and Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "0ab2cadd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = ChatOpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "7b2419f2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"planner = load_chat_planner(model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "ed9f518b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"executor = load_agent_executor(model, tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "36943178",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = PlanAndExecute(planner=planner, executer=executor, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8be9f1bd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "4891062e",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new PlanAndExecute chain...\u001b[0m\n",
|
||||
"steps=[Step(value=\"Search for Leo DiCaprio's girlfriend on the internet.\"), Step(value='Find her current age.'), Step(value='Raise her current age to the 0.43 power using a calculator or programming language.'), Step(value='Output the result.'), Step(value=\"Given the above steps taken, respond to the user's original question.\\n\\n\")]\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Search\",\n",
|
||||
" \"action_input\": \"Who is Leo DiCaprio's girlfriend?\"\n",
|
||||
"}\n",
|
||||
"``` \n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mDiCaprio broke up with girlfriend Camila Morrone, 25, in the summer of 2022, after dating for four years. He's since been linked to another famous supermodel – Gigi Hadid. The power couple were first supposedly an item in September after being spotted getting cozy during a party at New York Fashion Week.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mBased on the previous observation, I can provide the answer to the current objective. \n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Leo DiCaprio is currently linked to Gigi Hadid.\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"*****\n",
|
||||
"\n",
|
||||
"Step: Search for Leo DiCaprio's girlfriend on the internet.\n",
|
||||
"\n",
|
||||
"Response: Leo DiCaprio is currently linked to Gigi Hadid.\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Search\",\n",
|
||||
" \"action_input\": \"What is Gigi Hadid's current age?\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m28 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mPrevious steps: steps=[(Step(value=\"Search for Leo DiCaprio's girlfriend on the internet.\"), StepResponse(response='Leo DiCaprio is currently linked to Gigi Hadid.'))]\n",
|
||||
"\n",
|
||||
"Current objective: value='Find her current age.'\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Search\",\n",
|
||||
" \"action_input\": \"What is Gigi Hadid's current age?\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m28 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mBased on my search, Gigi Hadid's current age is 26 years old. \n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Gigi Hadid's current age is 26 years old.\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"*****\n",
|
||||
"\n",
|
||||
"Step: Find her current age.\n",
|
||||
"\n",
|
||||
"Response: Gigi Hadid's current age is 26 years old.\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Calculator\",\n",
|
||||
" \"action_input\": \"26 ** 0.43\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMMathChain chain...\u001b[0m\n",
|
||||
"26 ** 0.43\u001b[32;1m\u001b[1;3m\n",
|
||||
"```text\n",
|
||||
"26 ** 0.43\n",
|
||||
"```\n",
|
||||
"...numexpr.evaluate(\"26 ** 0.43\")...\n",
|
||||
"\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m4.059182145592686\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 4.059182145592686\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe current objective is to raise Gigi Hadid's age to the 0.43 power. \n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Calculator\",\n",
|
||||
" \"action_input\": \"26 ** 0.43\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMMathChain chain...\u001b[0m\n",
|
||||
"26 ** 0.43\u001b[32;1m\u001b[1;3m\n",
|
||||
"```text\n",
|
||||
"26 ** 0.43\n",
|
||||
"```\n",
|
||||
"...numexpr.evaluate(\"26 ** 0.43\")...\n",
|
||||
"\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m4.059182145592686\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 4.059182145592686\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe answer to the current objective is 4.059182145592686.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Gigi Hadid's age raised to the 0.43 power is approximately 4.059 years.\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"*****\n",
|
||||
"\n",
|
||||
"Step: Raise her current age to the 0.43 power using a calculator or programming language.\n",
|
||||
"\n",
|
||||
"Response: Gigi Hadid's age raised to the 0.43 power is approximately 4.059 years.\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Gigi Hadid's age raised to the 0.43 power is approximately 4.059 years.\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"*****\n",
|
||||
"\n",
|
||||
"Step: Output the result.\n",
|
||||
"\n",
|
||||
"Response: Gigi Hadid's age raised to the 0.43 power is approximately 4.059 years.\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Gigi Hadid's age raised to the 0.43 power is approximately 4.059 years.\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"*****\n",
|
||||
"\n",
|
||||
"Step: Given the above steps taken, respond to the user's original question.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Response: Gigi Hadid's age raised to the 0.43 power is approximately 4.059 years.\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Gigi Hadid's age raised to the 0.43 power is approximately 4.059 years.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aa3ec998",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -116,7 +116,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"how many people have more than 3 sibligngs\")"
|
||||
"agent.run(\"how many people have more than 3 siblings\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
232
docs/modules/agents/toolkits/examples/gmail.ipynb
Normal file
232
docs/modules/agents/toolkits/examples/gmail.ipynb
Normal file
@@ -0,0 +1,232 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Gmail Toolkit\n",
|
||||
"\n",
|
||||
"This notebook walks through connecting a LangChain email to the Gmail API.\n",
|
||||
"\n",
|
||||
"To use this toolkit, you will need to set up your credentials explained in the [Gmail API docs](https://developers.google.com/gmail/api/quickstart/python#authorize_credentials_for_a_desktop_application). Once you've downloaded the `credentials.json` file, you can start using the Gmail API. Once this is done, we'll install the required libraries."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install --upgrade google-api-python-client > /dev/null\n",
|
||||
"!pip install --upgrade google-auth-oauthlib > /dev/null\n",
|
||||
"!pip install --upgrade google-auth-httplib2 > /dev/null\n",
|
||||
"!pip install beautifulsoup4 > /dev/null # This is optional but is useful for parsing HTML messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the Toolkit\n",
|
||||
"\n",
|
||||
"By default the toolkit reads the local `credentials.json` file. You can also manually provide a `Credentials` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents.agent_toolkits import GmailToolkit\n",
|
||||
"\n",
|
||||
"toolkit = GmailToolkit() "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customizing Authentication\n",
|
||||
"\n",
|
||||
"Behind the scenes, a `googleapi` resource is created using the following methods. \n",
|
||||
"you can manually build a `googleapi` resource for more auth control. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools.gmail.utils import build_resource_service, get_gmail_credentials\n",
|
||||
"\n",
|
||||
"# Can review scopes here https://developers.google.com/gmail/api/auth/scopes\n",
|
||||
"# For instance, readonly scope is 'https://www.googleapis.com/auth/gmail.readonly'\n",
|
||||
"credentials = get_gmail_credentials(\n",
|
||||
" token_file='token.json',\n",
|
||||
" scopes=[\"https://mail.google.com/\"],\n",
|
||||
" client_secrets_file=\"credentials.json\",\n",
|
||||
")\n",
|
||||
"api_resource = build_resource_service(credentials=credentials)\n",
|
||||
"toolkit = GmailToolkit(api_resource=api_resource)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[GmailCreateDraft(name='create_gmail_draft', description='Use this tool to create a draft email with the provided message fields.', args_schema=<class 'langchain.tools.gmail.create_draft.CreateDraftSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailSendMessage(name='send_gmail_message', description='Use this tool to send email messages. The input is the message, recipents', args_schema=None, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailSearch(name='search_gmail', description=('Use this tool to search for email messages or threads. The input must be a valid Gmail query. The output is a JSON list of the requested resource.',), args_schema=<class 'langchain.tools.gmail.search.SearchArgsSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailGetMessage(name='get_gmail_message', description='Use this tool to fetch an email by message ID. Returns the thread ID, snipet, body, subject, and sender.', args_schema=<class 'langchain.tools.gmail.get_message.SearchArgsSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailGetThread(name='get_gmail_thread', description=('Use this tool to search for email messages. The input must be a valid Gmail query. The output is a JSON list of messages.',), args_schema=<class 'langchain.tools.gmail.get_thread.GetThreadSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import OpenAI\n",
|
||||
"from langchain.agents import initialize_agent, AgentType"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools=toolkit.get_tools(),\n",
|
||||
" llm=llm,\n",
|
||||
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to load default session, using empty session: 0\n",
|
||||
"WARNING:root:Failed to persist run: {\"detail\":\"Not Found\"}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'I have created a draft email for you to edit. The draft Id is r5681294731961864018.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Create a gmail draft for me to edit of a letter from the perspective of a sentient parrot\"\n",
|
||||
" \" who is looking to collaborate on some research with her\"\n",
|
||||
" \" estranged friend, a cat. Under no circumstances may you send the message, however.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to load default session, using empty session: 0\n",
|
||||
"WARNING:root:Failed to persist run: {\"detail\":\"Not Found\"}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"The latest email in your drafts is from hopefulparrot@gmail.com with the subject 'Collaboration Opportunity'. The body of the email reads: 'Dear [Friend], I hope this letter finds you well. I am writing to you in the hopes of rekindling our friendship and to discuss the possibility of collaborating on some research together. I know that we have had our differences in the past, but I believe that we can put them aside and work together for the greater good. I look forward to hearing from you. Sincerely, [Parrot]'\""
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Could you search in my drafts for the latest email?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -118,7 +118,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"how many people have more than 3 sibligngs\")"
|
||||
"agent.run(\"how many people have more than 3 siblings\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -151,7 +151,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"how many people have more than 3 sibligngs\")"
|
||||
"agent.run(\"how many people have more than 3 siblings\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -68,7 +68,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdin",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Zhu\n"
|
||||
@@ -98,7 +98,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_chain.run(\"When's my friend Eric's surname?\")\n",
|
||||
"agent_chain.run(\"What's my friend Eric's surname?\")\n",
|
||||
"# Answer with 'Zhu'"
|
||||
]
|
||||
},
|
||||
@@ -196,7 +196,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdin",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" vini\n",
|
||||
@@ -222,7 +222,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdin",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" oh who said it \n",
|
||||
|
||||
File diff suppressed because one or more lines are too long
125
docs/modules/agents/tools/examples/youtube.ipynb
Normal file
125
docs/modules/agents/tools/examples/youtube.ipynb
Normal file
@@ -0,0 +1,125 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "acb64858",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# YouTubeSearchTool\n",
|
||||
"\n",
|
||||
"This notebook shows how to use a tool to search YouTube\n",
|
||||
"\n",
|
||||
"Adapted from [https://github.com/venuv/langchain_yt_tools](https://github.com/venuv/langchain_yt_tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9bb15d4a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#! pip install youtube_search"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "cc1c83e2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools import YouTubeSearchTool"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "becb262b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tool = YouTubeSearchTool()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "6bbc4211",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"['/watch?v=VcVfceTsD0A&pp=ygUMbGV4IGZyaWVkbWFu', '/watch?v=gPfriiHBBek&pp=ygUMbGV4IGZyaWVkbWFu']\""
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tool.run(\"lex friedman\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7f772147",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also specify the number of results that are returned"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "682fdb33",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"['/watch?v=VcVfceTsD0A&pp=ygUMbGV4IGZyaWVkbWFu', '/watch?v=YVJ8gTnDC4Y&pp=ygUMbGV4IGZyaWVkbWFu', '/watch?v=Udh22kuLebg&pp=ygUMbGV4IGZyaWVkbWFu', '/watch?v=gPfriiHBBek&pp=ygUMbGV4IGZyaWVkbWFu', '/watch?v=L_Guz73e6fw&pp=ygUMbGV4IGZyaWVkbWFu']\""
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tool.run(\"lex friedman,5\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bb5e1659",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -47,13 +47,21 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "b89de9f3",
|
||||
"id": "d0b8856e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt_infos = [\n",
|
||||
" (\"physics\", \"Good for answering questions about physics\", physics_template),\n",
|
||||
" (\"math\", \"Good for answering math questions\", math_template)\n",
|
||||
" {\n",
|
||||
" \"name\": \"physics\", \n",
|
||||
" \"description\": \"Good for answering questions about physics\", \n",
|
||||
" \"prompt_template\": physics_template\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"math\", \n",
|
||||
" \"description\": \"Good for answering math questions\", \n",
|
||||
" \"prompt_template\": math_template\n",
|
||||
" }\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
@@ -64,7 +72,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = MultiPromptChain.from_prompts(OpenAI(), *zip(*prompt_infos), verbose=True)"
|
||||
"chain = MultiPromptChain.from_prompts(OpenAI(), prompt_infos, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -84,7 +92,7 @@
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Black body radiation is the emission of electromagnetic radiation from a body that is in thermal equilibrium with its environment. It is emitted by all objects regardless of their temperature, but the intensity and spectral distribution of the radiation depends on the temperature of the body. As the temperature increases, the intensity of the radiation also increases and the peak wavelength shifts to shorter wavelengths.\n"
|
||||
"Black body radiation is the emission of electromagnetic radiation from a body due to its temperature. It is a type of thermal radiation that is emitted from the surface of all objects that are at a temperature above absolute zero. It is a spectrum of radiation that is influenced by the temperature of the body and is independent of the composition of the emitting material.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -109,7 +117,13 @@
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"?\n",
|
||||
"\n",
|
||||
"The first prime number greater than 40 such that one plus the prime number is divisible by 3 is 43. To solve this, we first need to identify all of the prime numbers between 40 and 50. These are 41, 43, 47, and 49. We then need to check which of these, when added to 1, will be divisible by 3. The prime number that fits this criteria is 43. Therefore, the answer is 43.\n"
|
||||
"The first prime number greater than 40 such that one plus the prime number is divisible by 3 is 43. To solve this problem, we can break down the question into two parts: finding the first prime number greater than 40, and then finding a number that is divisible by 3. \n",
|
||||
"\n",
|
||||
"The first step is to find the first prime number greater than 40. A prime number is a number that is only divisible by 1 and itself. The next prime number after 40 is 41.\n",
|
||||
"\n",
|
||||
"The second step is to find a number that is divisible by 3. To do this, we can add 1 to 41, which gives us 42. Now, we can check if 42 is divisible by 3. 42 divided by 3 is 14, so 42 is divisible by 3.\n",
|
||||
"\n",
|
||||
"Therefore, the answer to the question is 43.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -132,7 +146,7 @@
|
||||
"\u001b[1m> Entering new MultiPromptChain chain...\u001b[0m\n",
|
||||
"None: {'input': 'What is the name of the type of cloud that rains?'}\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The name of the type of cloud that usually brings rain is called a cumulonimbus cloud. These clouds are typically tall and dark with a flat base and anvil-shaped top. They form when warm, moist air rises rapidly and condenses into water droplets, which eventually become heavy enough to fall as rain.\n"
|
||||
"The type of cloud that typically produces rain is called a cumulonimbus cloud. This type of cloud is characterized by its large vertical extent and can produce thunderstorms and heavy precipitation. Is there anything else you'd like to know?\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -51,21 +51,42 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "5b671ac5",
|
||||
"id": "783d6bcd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever_infos = [\n",
|
||||
" (\"state of the union\", \"Good for answering questions about the 2023 State of the Union address\", sou_retriever),\n",
|
||||
" (\"pg essay\", \"Good for answer quesitons about Paul Graham's essay on his career\", pg_retriever),\n",
|
||||
" (\"personal\", \"Good for answering questions about me\", personal_retriever)\n",
|
||||
"]\n",
|
||||
"chain = MultiRetrievalQAChain.from_retrievers(OpenAI(), *zip(*retriever_infos), verbose=True)"
|
||||
" {\n",
|
||||
" \"name\": \"state of the union\", \n",
|
||||
" \"description\": \"Good for answering questions about the 2023 State of the Union address\", \n",
|
||||
" \"retriever\": sou_retriever\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"pg essay\", \n",
|
||||
" \"description\": \"Good for answer quesitons about Paul Graham's essay on his career\", \n",
|
||||
" \"retriever\": pg_retriever\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"personal\", \n",
|
||||
" \"description\": \"Good for answering questions about me\", \n",
|
||||
" \"retriever\": personal_retriever\n",
|
||||
" }\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "5b671ac5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = MultiRetrievalQAChain.from_retrievers(OpenAI(), retriever_infos, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "7db5814f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -76,9 +97,9 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new MultiRetrievalQAChain chain...\u001b[0m\n",
|
||||
"state of the union: {'query': 'What did the president say about the economy in the 2023 State of the Union Address?'}\n",
|
||||
"state of the union: {'query': 'What did the president say about the economy in the 2023 State of the Union address?'}\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" The president said that the economy had created over 6.5 million jobs in the previous year, the strongest growth in nearly 40 years, and that his plan to fight inflation would lower costs and the deficit. He also announced the Bipartisan Infrastructure Law and said that investing in workers and building the economy from the bottom up and the middle out would build a better America.\n"
|
||||
" The president said that the economy was stronger than it had been a year prior, and that the American Rescue Plan helped create record job growth and fuel economic relief for millions of Americans. He also proposed a plan to fight inflation and lower costs for families, including cutting the cost of prescription drugs and energy, providing investments and tax credits for energy efficiency, and increasing access to child care and Pre-K.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -88,7 +109,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "bbcdbe82",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -101,7 +122,7 @@
|
||||
"\u001b[1m> Entering new MultiRetrievalQAChain chain...\u001b[0m\n",
|
||||
"pg essay: {'query': 'What is something Paul Graham regrets about his work?'}\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" Paul Graham regrets that he was so consumed by running Y Combinator that it ended up eating away at his other projects, like writing essays and working on Arc.\n"
|
||||
" Paul Graham regrets that he did not take a vacation after selling his company, instead of immediately starting to paint.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -111,7 +132,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "37c88a27",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -134,7 +155,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"id": "de8519b2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -147,7 +168,7 @@
|
||||
"\u001b[1m> Entering new MultiRetrievalQAChain chain...\u001b[0m\n",
|
||||
"None: {'query': 'What year was the Internet created in?'}\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The Internet was created in the late 1960s by the United States Department of Defense's Advanced Research Projects Agency (ARPA). It was originally called the ARPANET and was used to connect computers at different universities and research institutions. Over time, it evolved into the global network that we know today. So, to answer your question, the Internet was technically created in the late 1960s.\n"
|
||||
"The Internet was created in 1969 through a project called ARPANET, which was funded by the United States Department of Defense. However, the World Wide Web, which is often confused with the Internet, was created in 1989 by British computer scientist Tim Berners-Lee.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -7,7 +7,9 @@
|
||||
"source": [
|
||||
"# Bilibili\n",
|
||||
"\n",
|
||||
"This loader utilizes the [bilibili-api](https://github.com/MoyuScript/bilibili-api) to fetch the text transcript from [Bilibili](https://www.bilibili.tv/), one of the most beloved long-form video sites in China.\n",
|
||||
">[Bilibili](https://www.bilibili.tv/) is one of the most beloved long-form video sites in China.\n",
|
||||
"\n",
|
||||
"This loader utilizes the [bilibili-api](https://github.com/MoyuScript/bilibili-api) to fetch the text transcript from `Bilibili`.\n",
|
||||
"\n",
|
||||
"With this BiliBiliLoader, users can easily obtain the transcript of their desired video content on the platform."
|
||||
]
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
"source": [
|
||||
"# Blackboard\n",
|
||||
"\n",
|
||||
">[Blackboard Learn](https://en.wikipedia.org/wiki/Blackboard_Learn) (previously the Blackboard Learning Management System) is a web-based virtual learning environment and learning management system developed by Blackboard Inc. The software features course management, customizable open architecture, and scalable design that allows integration with student information systems and authentication protocols. It may be installed on local servers, hosted by `Blackboard ASP Solutions`, or provided as Software as a Service hosted on Amazon Web Services. Its main purposes are stated to include the addition of online elements to courses traditionally delivered face-to-face and development of completely online courses with few or no face-to-face meetings\n",
|
||||
"\n",
|
||||
"This covers how to load data from a [Blackboard Learn](https://www.anthology.com/products/teaching-and-learning/learning-effectiveness/blackboard-learn) instance.\n",
|
||||
"\n",
|
||||
"This loader is not compatible with all `Blackboard` courses. It is only\n",
|
||||
|
||||
@@ -4,7 +4,10 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ChatGPT Data Loader\n",
|
||||
"### ChatGPT Data\n",
|
||||
"\n",
|
||||
">[ChatGPT](https://chat.openai.com) is an artificial intelligence (AI) chatbot developed by OpenAI.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This notebook covers how to load `conversations.json` from your `ChatGPT` data export folder.\n",
|
||||
"\n",
|
||||
|
||||
@@ -6,7 +6,9 @@
|
||||
"source": [
|
||||
"# Confluence\n",
|
||||
"\n",
|
||||
"A loader for [Confluence](https://www.atlassian.com/software/confluence) pages.\n",
|
||||
">[Confluence](https://www.atlassian.com/software/confluence) is a wiki collaboration platform that saves and organizes all of the project-related material. `Confluence` is a knowledge base that primarily handles content management activities. \n",
|
||||
"\n",
|
||||
"A loader for `Confluence` pages.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This currently supports both `username/api_key` and `Oauth2 login`.\n",
|
||||
|
||||
@@ -6,6 +6,12 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# CoNLL-U\n",
|
||||
"\n",
|
||||
">[CoNLL-U](https://universaldependencies.org/format.html) is revised version of the CoNLL-X format. Annotations are encoded in plain text files (UTF-8, normalized to NFC, using only the LF character as line break, including an LF character at the end of file) with three types of lines:\n",
|
||||
">- Word lines containing the annotation of a word/token in 10 fields separated by single tab characters; see below.\n",
|
||||
">- Blank lines marking sentence boundaries.\n",
|
||||
">- Comment lines starting with hash (#).\n",
|
||||
"\n",
|
||||
"This is an example of how to load a file in [CoNLL-U](https://universaldependencies.org/format.html) format. The whole file is treated as one document. The example data (`conllu.conllu`) is based on one of the standard UD/CoNLL-U examples."
|
||||
]
|
||||
},
|
||||
@@ -4,7 +4,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# CSV Files\n",
|
||||
"# CSV\n",
|
||||
"\n",
|
||||
">A [comma-separated values (CSV)](https://en.wikipedia.org/wiki/Comma-separated_values) file is a delimited text file that uses a comma to separate values. Each line of the file is a data record. Each record consists of one or more fields, separated by commas.\n",
|
||||
"\n",
|
||||
"Load [csv](https://en.wikipedia.org/wiki/Comma-separated_values) data with a single row per document."
|
||||
]
|
||||
|
||||
@@ -6,7 +6,9 @@
|
||||
"source": [
|
||||
"# Discord\n",
|
||||
"\n",
|
||||
"You can follow the below steps to download your Discord data:\n",
|
||||
">[Discord](https://discord.com/) is a VoIP and instant messaging social platform. Users have the ability to communicate with voice calls, video calls, text messaging, media and files in private chats or as part of communities called \"servers\". A server is a collection of persistent chat rooms and voice channels which can be accessed via invite links.\n",
|
||||
"\n",
|
||||
"Follow these steps to download your `Discord` data:\n",
|
||||
"\n",
|
||||
"1. Go to your **User Settings**\n",
|
||||
"2. Then go to **Privacy and Safety**\n",
|
||||
@@ -79,9 +81,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -7,9 +7,21 @@
|
||||
"source": [
|
||||
"# EPub \n",
|
||||
"\n",
|
||||
">[EPUB](https://en.wikipedia.org/wiki/EPUB) is an e-book file format that uses the \".epub\" file extension. The term is short for electronic publication and is sometimes styled ePub. `EPUB` is supported by many e-readers, and compatible software is available for most smartphones, tablets, and computers.\n",
|
||||
"\n",
|
||||
"This covers how to load `.epub` documents into the Document format that we can use downstream. You'll need to install the [`pandocs`](https://pandoc.org/installing.html) package for this loader to work."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cd1affad-8ba6-43b1-b8cd-f61f44025077",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install pandocs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
|
||||
Binary file not shown.
@@ -6,6 +6,8 @@
|
||||
"source": [
|
||||
"### Facebook Chat\n",
|
||||
"\n",
|
||||
">[Messenger](https://en.wikipedia.org/wiki/Messenger_(software)) is an American proprietary instant messaging app and platform developed by `Meta Platforms`. Originally developed as `Facebook Chat` in 2008, the company revamped its messaging service in 2010.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from the [Facebook Chats](https://www.facebook.com/business/help/1646890868956360) into a format that can be ingested into LangChain."
|
||||
]
|
||||
},
|
||||
|
||||
@@ -5,8 +5,9 @@
|
||||
"id": "79f24a6b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Directory Loader\n",
|
||||
"This covers how to use the DirectoryLoader to load all documents in a directory. Under the hood, by default this uses the [UnstructuredLoader](./unstructured_file.ipynb)"
|
||||
"# File Directory\n",
|
||||
"\n",
|
||||
"This covers how to use the `DirectoryLoader` to load all documents in a directory. Under the hood, by default this uses the [UnstructuredLoader](./unstructured_file.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -255,7 +256,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -4,9 +4,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BigQuery\n",
|
||||
"# Google BigQuery\n",
|
||||
"\n",
|
||||
">[BigQuery](https://cloud.google.com/bigquery) is a serverless and cost-effective enterprise data warehouse that works across clouds and scales with your data.\n",
|
||||
">[Google BigQuery](https://cloud.google.com/bigquery) is a serverless and cost-effective enterprise data warehouse that works across clouds and scales with your data.\n",
|
||||
"`BigQuery` is a part of the `Google Cloud Platform`.\n",
|
||||
"\n",
|
||||
"Load a `BigQuery` query with one document per row."
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "0ef41fd4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# GCS Directory\n",
|
||||
"# Google Cloud Storage Directory\n",
|
||||
"\n",
|
||||
">[Google Cloud Storage](https://en.wikipedia.org/wiki/Google_Cloud_Storage) is a managed service for storing unstructured data.\n",
|
||||
"\n",
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "0ef41fd4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# GCS File Storage\n",
|
||||
"# Google Cloud Storage File\n",
|
||||
"\n",
|
||||
">[Google Cloud Storage](https://en.wikipedia.org/wiki/Google_Cloud_Storage) is a managed service for storing unstructured data.\n",
|
||||
"\n",
|
||||
@@ -6,6 +6,9 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Drive\n",
|
||||
"\n",
|
||||
">[Google Drive](https://en.wikipedia.org/wiki/Google_Drive) is a file storage and synchronization service developed by Google.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load documents from `Google Drive`. Currently, only `Google Docs` are supported.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
@@ -7,7 +7,7 @@
|
||||
"source": [
|
||||
"# Hacker News\n",
|
||||
"\n",
|
||||
">[Hacker News](https://en.wikipedia.org/wiki/Hacker_News) (sometimes abbreviated as HN) is a social news website focusing on computer science and entrepreneurship. It is run by the investment fund and startup incubator Y Combinator. In general, content that can be submitted is defined as \"anything that gratifies one's intellectual curiosity.\"\n",
|
||||
">[Hacker News](https://en.wikipedia.org/wiki/Hacker_News) (sometimes abbreviated as `HN`) is a social news website focusing on computer science and entrepreneurship. It is run by the investment fund and startup incubator `Y Combinator`. In general, content that can be submitted is defined as \"anything that gratifies one's intellectual curiosity.\"\n",
|
||||
"\n",
|
||||
"This notebook covers how to pull page data and comments from [Hacker News](https://news.ycombinator.com/)"
|
||||
]
|
||||
@@ -7,6 +7,8 @@
|
||||
"source": [
|
||||
"# HTML\n",
|
||||
"\n",
|
||||
">[The HyperText Markup Language or HTML](https://en.wikipedia.org/wiki/HTML) is the standard markup language for documents designed to be displayed in a web browser.\n",
|
||||
"\n",
|
||||
"This covers how to load `HTML` documents into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
|
||||
@@ -5,12 +5,11 @@
|
||||
"id": "04c9fdc5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# HuggingFace dataset \n",
|
||||
"# HuggingFace dataset\n",
|
||||
"\n",
|
||||
"The [Hugging Face Hub](https://huggingface.co/docs/hub/index) hosts a large number of community-curated datasets for a diverse range of tasks such as translation,\n",
|
||||
">The [Hugging Face Hub](https://huggingface.co/docs/hub/index) is home to over 5,000 [datasets](https://huggingface.co/docs/hub/index#datasets) in more than 100 languages that can be used for a broad range of tasks across NLP, Computer Vision, and Audio. They used for a diverse range of tasks such as translation,\n",
|
||||
"automatic speech recognition, and image classification.\n",
|
||||
"\n",
|
||||
">The `Hugging Face Hub` is home to over 5,000 [datasets](https://huggingface.co/docs/hub/index#datasets) in more than 100 languages that can be used for a broad range of tasks across NLP, Computer Vision, and Audio.\n",
|
||||
"\n",
|
||||
"This notebook shows how to load `Hugging Face Hub` datasets to LangChain."
|
||||
]
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"source": [
|
||||
"# iFixit\n",
|
||||
"\n",
|
||||
"[iFixit](https://www.ifixit.com) is the largest, open repair community on the web. The site contains nearly 100k repair manuals, 200k Questions & Answers on 42k devices, and all the data is licensed under CC-BY-NC-SA 3.0.\n",
|
||||
">[iFixit](https://www.ifixit.com) is the largest, open repair community on the web. The site contains nearly 100k repair manuals, 200k Questions & Answers on 42k devices, and all the data is licensed under CC-BY-NC-SA 3.0.\n",
|
||||
"\n",
|
||||
"This loader will allow you to download the text of a repair guide, text of Q&A's and wikis from devices on `iFixit` using their open APIs. It's incredibly useful for context related to technical documents and answers to questions about devices in the corpus of data on `iFixit`."
|
||||
]
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"source": [
|
||||
"# Images\n",
|
||||
"\n",
|
||||
"This covers how to load images such as JPGs PNGs into a document format that we can use downstream."
|
||||
"This covers how to load images such as `JPG` or `PNG` into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
"By default, the loader utilizes the pre-trained [Salesforce BLIP image captioning model](https://huggingface.co/Salesforce/blip-image-captioning-base).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the ImageCaptionLoader tutorial to generate a query-able index of image captions"
|
||||
"This notebook shows how to use the `ImageCaptionLoader` to generate a query-able index of image captions"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"source": [
|
||||
"# IMSDb\n",
|
||||
"\n",
|
||||
"[IMSDb](https://imsdb.com/) is the `Internet Movie Script Database`.\n",
|
||||
">[IMSDb](https://imsdb.com/) is the `Internet Movie Script Database`.\n",
|
||||
"\n",
|
||||
"This covers how to load `IMSDb` webpages into a document format that we can use downstream."
|
||||
]
|
||||
|
||||
367
docs/modules/indexes/document_loaders/examples/json_loader.ipynb
Normal file
367
docs/modules/indexes/document_loaders/examples/json_loader.ipynb
Normal file
@@ -0,0 +1,367 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# JSON Files\n",
|
||||
"\n",
|
||||
"The `JSONLoader` uses a specified [jq schema](https://en.wikipedia.org/wiki/Jq_(programming_language)) to parse the JSON files.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the `JSONLoader` to load [JSON](https://en.wikipedia.org/wiki/JSON) files into documents. A few examples of `jq` schema extracting different parts of a JSON file are also shown.\n",
|
||||
"\n",
|
||||
"Check this [manual](https://stedolan.github.io/jq/manual/#Basicfilters) for a detailed documentation of the `jq` syntax."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install jq"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import JSONLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from pathlib import Path\n",
|
||||
"from pprint import pprint\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"file_path='./example_data/facebook_chat.json'\n",
|
||||
"data = json.loads(Path(file_path).read_text())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'image': {'creation_timestamp': 1675549016, 'uri': 'image_of_the_chat.jpg'},\n",
|
||||
" 'is_still_participant': True,\n",
|
||||
" 'joinable_mode': {'link': '', 'mode': 1},\n",
|
||||
" 'magic_words': [],\n",
|
||||
" 'messages': [{'content': 'Bye!',\n",
|
||||
" 'sender_name': 'User 2',\n",
|
||||
" 'timestamp_ms': 1675597571851},\n",
|
||||
" {'content': 'Oh no worries! Bye',\n",
|
||||
" 'sender_name': 'User 1',\n",
|
||||
" 'timestamp_ms': 1675597435669},\n",
|
||||
" {'content': 'No Im sorry it was my mistake, the blue one is not '\n",
|
||||
" 'for sale',\n",
|
||||
" 'sender_name': 'User 2',\n",
|
||||
" 'timestamp_ms': 1675596277579},\n",
|
||||
" {'content': 'I thought you were selling the blue one!',\n",
|
||||
" 'sender_name': 'User 1',\n",
|
||||
" 'timestamp_ms': 1675595140251},\n",
|
||||
" {'content': 'Im not interested in this bag. Im interested in the '\n",
|
||||
" 'blue one!',\n",
|
||||
" 'sender_name': 'User 1',\n",
|
||||
" 'timestamp_ms': 1675595109305},\n",
|
||||
" {'content': 'Here is $129',\n",
|
||||
" 'sender_name': 'User 2',\n",
|
||||
" 'timestamp_ms': 1675595068468},\n",
|
||||
" {'photos': [{'creation_timestamp': 1675595059,\n",
|
||||
" 'uri': 'url_of_some_picture.jpg'}],\n",
|
||||
" 'sender_name': 'User 2',\n",
|
||||
" 'timestamp_ms': 1675595060730},\n",
|
||||
" {'content': 'Online is at least $100',\n",
|
||||
" 'sender_name': 'User 2',\n",
|
||||
" 'timestamp_ms': 1675595045152},\n",
|
||||
" {'content': 'How much do you want?',\n",
|
||||
" 'sender_name': 'User 1',\n",
|
||||
" 'timestamp_ms': 1675594799696},\n",
|
||||
" {'content': 'Goodmorning! $50 is too low.',\n",
|
||||
" 'sender_name': 'User 2',\n",
|
||||
" 'timestamp_ms': 1675577876645},\n",
|
||||
" {'content': 'Hi! Im interested in your bag. Im offering $50. Let '\n",
|
||||
" 'me know if you are interested. Thanks!',\n",
|
||||
" 'sender_name': 'User 1',\n",
|
||||
" 'timestamp_ms': 1675549022673}],\n",
|
||||
" 'participants': [{'name': 'User 1'}, {'name': 'User 2'}],\n",
|
||||
" 'thread_path': 'inbox/User 1 and User 2 chat',\n",
|
||||
" 'title': 'User 1 and User 2 chat'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pprint(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using `JSONLoader`\n",
|
||||
"\n",
|
||||
"Suppose we are interested in extracting the values under the `content` field within the `messages` key of the JSON data. This can easily be done through the `JSONLoader` as shown below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path='./example_data/facebook_chat.json',\n",
|
||||
" jq_schema='.messages[].content')\n",
|
||||
"\n",
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Document(page_content='Bye!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 1}),\n",
|
||||
" Document(page_content='Oh no worries! Bye', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 2}),\n",
|
||||
" Document(page_content='No Im sorry it was my mistake, the blue one is not for sale', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 3}),\n",
|
||||
" Document(page_content='I thought you were selling the blue one!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 4}),\n",
|
||||
" Document(page_content='Im not interested in this bag. Im interested in the blue one!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 5}),\n",
|
||||
" Document(page_content='Here is $129', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 6}),\n",
|
||||
" Document(page_content='', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 7}),\n",
|
||||
" Document(page_content='Online is at least $100', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 8}),\n",
|
||||
" Document(page_content='How much do you want?', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 9}),\n",
|
||||
" Document(page_content='Goodmorning! $50 is too low.', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 10}),\n",
|
||||
" Document(page_content='Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 11})]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pprint(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Extracting metadata\n",
|
||||
"\n",
|
||||
"Generally, we want to include metadata available in the JSON file into the documents that we create from the content.\n",
|
||||
"\n",
|
||||
"The following demonstrates how metadata can be extracted using the `JSONLoader`.\n",
|
||||
"\n",
|
||||
"There are some key changes to be noted. In the previous example where we didn't collect the metadata, we managed to directly specify in the schema where the value for the `page_content` can be extracted from.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
".messages[].content\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"In the current example, we have to tell the loader to iterate over the records in the `messages` field. The jq_schema then has to be:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
".messages[]\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This allows us to pass the records (dict) into the `metadata_func` that has to be implemented. The `metadata_func` is responsible for identifying which pieces of information in the record should be included in the metadata stored in the final `Document` object.\n",
|
||||
"\n",
|
||||
"Additionally, we now have to explicitly specify in the loader, via the `content_key` argument, the key from the record where the value for the `page_content` needs to be extracted from."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define the metadata extraction function.\n",
|
||||
"def metadata_func(record: dict, metadata: dict) -> dict:\n",
|
||||
"\n",
|
||||
" metadata[\"sender_name\"] = record.get(\"sender_name\")\n",
|
||||
" metadata[\"timestamp_ms\"] = record.get(\"timestamp_ms\")\n",
|
||||
"\n",
|
||||
" return metadata\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path='./example_data/facebook_chat.json',\n",
|
||||
" jq_schema='.messages[]',\n",
|
||||
" content_key=\"content\",\n",
|
||||
" metadata_func=metadata_func\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Document(page_content='Bye!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 1, 'sender_name': 'User 2', 'timestamp_ms': 1675597571851}),\n",
|
||||
" Document(page_content='Oh no worries! Bye', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 2, 'sender_name': 'User 1', 'timestamp_ms': 1675597435669}),\n",
|
||||
" Document(page_content='No Im sorry it was my mistake, the blue one is not for sale', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 3, 'sender_name': 'User 2', 'timestamp_ms': 1675596277579}),\n",
|
||||
" Document(page_content='I thought you were selling the blue one!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 4, 'sender_name': 'User 1', 'timestamp_ms': 1675595140251}),\n",
|
||||
" Document(page_content='Im not interested in this bag. Im interested in the blue one!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 5, 'sender_name': 'User 1', 'timestamp_ms': 1675595109305}),\n",
|
||||
" Document(page_content='Here is $129', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 6, 'sender_name': 'User 2', 'timestamp_ms': 1675595068468}),\n",
|
||||
" Document(page_content='', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 7, 'sender_name': 'User 2', 'timestamp_ms': 1675595060730}),\n",
|
||||
" Document(page_content='Online is at least $100', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 8, 'sender_name': 'User 2', 'timestamp_ms': 1675595045152}),\n",
|
||||
" Document(page_content='How much do you want?', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 9, 'sender_name': 'User 1', 'timestamp_ms': 1675594799696}),\n",
|
||||
" Document(page_content='Goodmorning! $50 is too low.', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 10, 'sender_name': 'User 2', 'timestamp_ms': 1675577876645}),\n",
|
||||
" Document(page_content='Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 11, 'sender_name': 'User 1', 'timestamp_ms': 1675549022673})]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pprint(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, you will see that the documents contain the metadata associated with the content we extracted."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## The `metadata_func`\n",
|
||||
"\n",
|
||||
"As shown above, the `metadata_func` accepts the default metadata generated by the `JSONLoader`. This allows full control to the user with respect to how the metadata is formatted.\n",
|
||||
"\n",
|
||||
"For example, the default metadata contains the `source` and the `seq_num` keys. However, it is possible that the JSON data contain these keys as well. The user can then exploit the `metadata_func` to rename the default keys and use the ones from the JSON data.\n",
|
||||
"\n",
|
||||
"The example below shows how we can modify the `source` to only contain information of the file source relative to the `langchain` directory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define the metadata extraction function.\n",
|
||||
"def metadata_func(record: dict, metadata: dict) -> dict:\n",
|
||||
"\n",
|
||||
" metadata[\"sender_name\"] = record.get(\"sender_name\")\n",
|
||||
" metadata[\"timestamp_ms\"] = record.get(\"timestamp_ms\")\n",
|
||||
" \n",
|
||||
" if \"source\" in metadata:\n",
|
||||
" source = metadata[\"source\"].split(\"/\")\n",
|
||||
" source = source[source.index(\"langchain\"):]\n",
|
||||
" metadata[\"source\"] = \"/\".join(source)\n",
|
||||
"\n",
|
||||
" return metadata\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path='./example_data/facebook_chat.json',\n",
|
||||
" jq_schema='.messages[]',\n",
|
||||
" content_key=\"content\",\n",
|
||||
" metadata_func=metadata_func\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Document(page_content='Bye!', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 1, 'sender_name': 'User 2', 'timestamp_ms': 1675597571851}),\n",
|
||||
" Document(page_content='Oh no worries! Bye', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 2, 'sender_name': 'User 1', 'timestamp_ms': 1675597435669}),\n",
|
||||
" Document(page_content='No Im sorry it was my mistake, the blue one is not for sale', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 3, 'sender_name': 'User 2', 'timestamp_ms': 1675596277579}),\n",
|
||||
" Document(page_content='I thought you were selling the blue one!', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 4, 'sender_name': 'User 1', 'timestamp_ms': 1675595140251}),\n",
|
||||
" Document(page_content='Im not interested in this bag. Im interested in the blue one!', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 5, 'sender_name': 'User 1', 'timestamp_ms': 1675595109305}),\n",
|
||||
" Document(page_content='Here is $129', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 6, 'sender_name': 'User 2', 'timestamp_ms': 1675595068468}),\n",
|
||||
" Document(page_content='', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 7, 'sender_name': 'User 2', 'timestamp_ms': 1675595060730}),\n",
|
||||
" Document(page_content='Online is at least $100', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 8, 'sender_name': 'User 2', 'timestamp_ms': 1675595045152}),\n",
|
||||
" Document(page_content='How much do you want?', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 9, 'sender_name': 'User 1', 'timestamp_ms': 1675594799696}),\n",
|
||||
" Document(page_content='Goodmorning! $50 is too low.', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 10, 'sender_name': 'User 2', 'timestamp_ms': 1675577876645}),\n",
|
||||
" Document(page_content='Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 11, 'sender_name': 'User 1', 'timestamp_ms': 1675549022673})]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pprint(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Common JSON structures with jq schema\n",
|
||||
"\n",
|
||||
"The list below provides a reference to the possible `jq_schema` the user can use to extract content from the JSON data depending on the structure.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"JSON -> [{\"text\": ...}, {\"text\": ...}, {\"text\": ...}]\n",
|
||||
"jq_schema -> \".[].text\"\n",
|
||||
" \n",
|
||||
"JSON -> {\"key\": [{\"text\": ...}, {\"text\": ...}, {\"text\": ...}]}\n",
|
||||
"jq_schema -> \".key[].text\"\n",
|
||||
"\n",
|
||||
"JSON -> [\"...\", \"...\", \"...\"]\n",
|
||||
"jq_schema -> \".[]\"\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -4,7 +4,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Notebook\n",
|
||||
"# Jupyter Notebook\n",
|
||||
"\n",
|
||||
">[Jupyter Notebook](https://en.wikipedia.org/wiki/Project_Jupyter#Applications) (formerly `IPython Notebook`) is a web-based interactive computational environment for creating notebook documents.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from a `Jupyter notebook (.ipynb)` into a format suitable by LangChain."
|
||||
]
|
||||
@@ -6,9 +6,11 @@
|
||||
"source": [
|
||||
"# MediaWikiDump\n",
|
||||
"\n",
|
||||
">[MediaWiki XML Dumps](https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps) contain the content of a wiki (wiki pages with all their revisions), without the site-related data. A XML dump does not create a full backup of the wiki database, the dump does not contain user accounts, images, edit logs, etc.\n",
|
||||
"\n",
|
||||
"This covers how to load a MediaWiki XML dump file into a document format that we can use downstream.\n",
|
||||
"\n",
|
||||
"It uses mwxml from mediawiki-utilities to dump and mwparserfromhell from earwig to parse MediaWiki wikicode.\n",
|
||||
"It uses `mwxml` from `mediawiki-utilities` to dump and `mwparserfromhell` from `earwig` to parse MediaWiki wikicode.\n",
|
||||
"\n",
|
||||
"Dump files can be obtained with dumpBackup.php or on the Special:Statistics page of the Wiki."
|
||||
]
|
||||
@@ -114,9 +116,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# OneDrive\n",
|
||||
"# Microsoft OneDrive\n",
|
||||
"\n",
|
||||
">[Microsoft OneDrive](https://en.wikipedia.org/wiki/OneDrive) (formerly `SkyDrive`) is a file hosting service operated by Microsoft.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load documents from `OneDrive`. Currently, only docx, doc, and pdf files are supported.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
@@ -77,14 +79,34 @@
|
||||
"documents = loader.load()\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,12 +1,13 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "39af9ecd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PowerPoint\n",
|
||||
"# Microsoft PowerPoint\n",
|
||||
"\n",
|
||||
">[Microsoft PowerPoint](https://en.wikipedia.org/wiki/Microsoft_PowerPoint) is a presentation program by Microsoft.\n",
|
||||
"\n",
|
||||
"This covers how to load `Microsoft PowerPoint` documents into a document format that we can use downstream."
|
||||
]
|
||||
@@ -5,9 +5,11 @@
|
||||
"id": "39af9ecd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Word Documents\n",
|
||||
"# Microsoft Word\n",
|
||||
"\n",
|
||||
"This covers how to load Word documents into a document format that we can use downstream."
|
||||
">[Microsoft Word](https://www.microsoft.com/en-us/microsoft-365/word) is a word processor developed by Microsoft.\n",
|
||||
"\n",
|
||||
"This covers how to load `Word` documents into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -198,7 +200,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -6,8 +6,7 @@
|
||||
"source": [
|
||||
"# Modern Treasury\n",
|
||||
"\n",
|
||||
">[Modern Treasury](https://www.moderntreasury.com/) simplifies complex payment operations\n",
|
||||
"A unified platform to power products and processes that move money.\n",
|
||||
">[Modern Treasury](https://www.moderntreasury.com/) simplifies complex payment operations. It is a unified platform to power products and processes that move money.\n",
|
||||
">- Connect to banks and payment systems\n",
|
||||
">- Track transactions and balances in real-time\n",
|
||||
">- Automate payment operations for scale\n",
|
||||
|
||||
76
docs/modules/indexes/document_loaders/examples/odt.ipynb
Normal file
76
docs/modules/indexes/document_loaders/examples/odt.ipynb
Normal file
@@ -0,0 +1,76 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "22a849cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Unstructured ODT Loader\n",
|
||||
"\n",
|
||||
"The `UnstructuredODTLoader` can be used to load Open Office ODT files."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e6616e3a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import UnstructuredODTLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a654e4d9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='Lorem ipsum dolor sit amet.', metadata={'source': 'example_data/fake.odt', 'filename': 'example_data/fake.odt', 'category': 'Title'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = UnstructuredODTLoader(\"example_data/fake.odt\", mode=\"elements\")\n",
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9ab94bde",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -7,7 +7,9 @@
|
||||
"source": [
|
||||
"# PDF\n",
|
||||
"\n",
|
||||
"This covers how to load PDF documents into the Document format that we use downstream."
|
||||
">[Portable Document Format (PDF)](https://en.wikipedia.org/wiki/PDF), standardized as ISO 32000, is a file format developed by Adobe in 1992 to present documents, including text formatting and images, in a manner independent of application software, hardware, and operating systems.\n",
|
||||
"\n",
|
||||
"This covers how to load `PDF` documents into the Document format that we use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -370,6 +372,44 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "483720b5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "96351714",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Using PyPDFium2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "003fcc1d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PyPDFium2Loader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "46766e29",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = PyPDFium2Loader(\"example_data/layout-parser-paper.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "010d5cdd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -660,7 +700,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
"source": [
|
||||
"# Reddit\n",
|
||||
"\n",
|
||||
">[Reddit (reddit)](\twww.reddit.com) is an American social news aggregation, content rating, and discussion website.\n",
|
||||
">[Reddit (reddit)](www.reddit.com) is an American social news aggregation, content rating, and discussion website.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This loader fetches the text from the Posts of Subreddits or Reddit users, using the `praw` Python package.\n",
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
"source": [
|
||||
"# Sitemap\n",
|
||||
"\n",
|
||||
"Extends from the `WebBaseLoader`, this will load a sitemap from a given URL, and then scrape and load all pages in the sitemap, returning each page as a Document.\n",
|
||||
"Extends from the `WebBaseLoader`, `SitemapLoader` loads a sitemap from a given URL, and then scrape and load all pages in the sitemap, returning each page as a Document.\n",
|
||||
"\n",
|
||||
"The scraping is done concurrently, using `WebBaseLoader`. There are reasonable limits to concurrent requests, defaulting to 2 per second. If you aren't concerned about being a good citizen, or you control the server you are scraping and don't care about load, you can change the `requests_per_second` parameter to increase the max concurrent requests. Note, while this will speed up the scraping process, but may cause the server to block you. Be careful!"
|
||||
"The scraping is done concurrently. There are reasonable limits to concurrent requests, defaulting to 2 per second. If you aren't concerned about being a good citizen, or you control the scrapped server, or don't care about load, you can change the `requests_per_second` parameter to increase the max concurrent requests. Note, while this will speed up the scraping process, but it may cause the server to block you. Be careful!"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
"id": "1dc7df1d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Slack (Local Exported Zipfile)\n",
|
||||
"# Slack\n",
|
||||
"\n",
|
||||
">[Slack](slack.com) is an instant messaging program.\n",
|
||||
">[Slack](https://slack.com/) is an instant messaging program.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load documents from a Zipfile generated from a `Slack` export.\n",
|
||||
"\n",
|
||||
@@ -6,7 +6,9 @@
|
||||
"source": [
|
||||
"# Stripe\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from the Stripe REST API into a format that can be ingested into LangChain, along with example usage for vectorization."
|
||||
">[Stripe](https://stripe.com/en-ca) is an Irish-American financial services and software as a service (SaaS) company. It offers payment-processing software and application programming interfaces for e-commerce websites and mobile applications.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from the `Stripe REST API` into a format that can be ingested into LangChain, along with example usage for vectorization."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -84,9 +86,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "4bdaea79",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Subtitle Files\n",
|
||||
"# Subtitle\n",
|
||||
"\n",
|
||||
">[The SubRip file format](https://en.wikipedia.org/wiki/SubRip#SubRip_file_format) is described on the `Matroska` multimedia container format website as \"perhaps the most basic of all subtitle formats.\" `SubRip (SubRip Text)` files are named with the extension `.srt`, and contain formatted lines of plain text in groups separated by a blank line. Subtitles are numbered sequentially, starting at 1. The timecode format used is hours:minutes:seconds,milliseconds with time units fixed to two zero-padded digits and fractions fixed to three zero-padded digits (00:00:00,000). The fractional separator used is the comma, since the program was written in France.\n",
|
||||
"\n",
|
||||
@@ -7,7 +7,9 @@
|
||||
"source": [
|
||||
"# Telegram\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from Telegram into a format that can be ingested into LangChain."
|
||||
">[Telegram Messenger](https://web.telegram.org/a/) is a globally accessible freemium, cross-platform, encrypted, cloud-based and centralized instant messaging service. The application also provides optional end-to-end encrypted chats and video calling, VoIP, file sharing and several other features.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from `Telegram` into a format that can be ingested into LangChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -76,7 +78,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,9 +5,11 @@
|
||||
"id": "4284970b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# TOML Loader\n",
|
||||
"# TOML\n",
|
||||
"\n",
|
||||
"If you need to load Toml files, use the `TomlLoader`."
|
||||
">[TOML](https://en.wikipedia.org/wiki/TOML) is a file format for configuration files. It is intended to be easy to read and write, and is designed to map unambiguously to a dictionary. Its specification is open-source. `TOML` is implemented in many programming languages. The name `TOML` is an acronym for \"Tom's Obvious, Minimal Language\" referring to its creator, Tom Preston-Werner.\n",
|
||||
"\n",
|
||||
"If you need to load `Toml` files, use the `TomlLoader`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -86,7 +88,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -7,8 +7,10 @@
|
||||
"source": [
|
||||
"# Twitter\n",
|
||||
"\n",
|
||||
"This loader fetches the text from the Tweets of a list of Twitter users, using the `tweepy` Python package.\n",
|
||||
"You must initialize the loader with your Twitter API token, and you need to pass in the Twitter username you want to extract."
|
||||
">[Twitter](https://twitter.com/) is an online social media and social networking service.\n",
|
||||
"\n",
|
||||
"This loader fetches the text from the Tweets of a list of `Twitter` users, using the `tweepy` Python package.\n",
|
||||
"You must initialize the loader with your `Twitter API` token, and you need to pass in the Twitter username you want to extract."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -106,7 +108,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,8 +5,9 @@
|
||||
"id": "20deed05",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Unstructured File Loader\n",
|
||||
"This notebook covers how to use Unstructured to load files of many types. Unstructured currently supports loading of text files, powerpoints, html, pdfs, images, and more."
|
||||
"# Unstructured File\n",
|
||||
"\n",
|
||||
"This notebook covers how to use `Unstructured` package to load files of many types. `Unstructured` currently supports loading of text files, powerpoints, html, pdfs, images, and more."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -311,7 +312,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
"id": "bf920da0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Web Base\n",
|
||||
"# WebBaseLoader\n",
|
||||
"\n",
|
||||
"This covers how to load all text from webpages into a document format that we can use downstream. For more custom logic for loading webpages look at some child class examples such as IMSDbLoader, AZLyricsLoader, and CollegeConfidentialLoader"
|
||||
"This covers how to use `WebBaseLoader` to load all text from `HTML` webpages into a document format that we can use downstream. For more custom logic for loading webpages look at some child class examples such as `IMSDbLoader`, `AZLyricsLoader`, and `CollegeConfidentialLoader`"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -140,7 +140,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: nest_asyncio in /Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages (1.5.6)\r\n"
|
||||
"Requirement already satisfied: nest_asyncio in /Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages (1.5.6)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -237,7 +237,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### WhatsApp Chat\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from the WhatsApp Chats into a format that can be ingested into LangChain."
|
||||
">[WhatsApp](https://www.whatsapp.com/) (also called `WhatsApp Messenger`) is a freeware, cross-platform, centralized instant messaging (IM) and voice-over-IP (VoIP) service. It allows users to send text and voice messages, make voice and video calls, and share images, documents, user locations, and other content.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from the `WhatsApp Chats` into a format that can be ingested into LangChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -54,7 +55,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.1"
|
||||
"version": "3.10.6"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
@@ -63,5 +64,5 @@
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
130
docs/modules/indexes/document_loaders/examples/wikipedia.ipynb
Normal file
130
docs/modules/indexes/document_loaders/examples/wikipedia.ipynb
Normal file
@@ -0,0 +1,130 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bda1f3f5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Wikipedia\n",
|
||||
"\n",
|
||||
">[Wikipedia](https://wikipedia.org/) is a multilingual free online encyclopedia written and maintained by a community of volunteers, known as Wikipedians, through open collaboration and using a wiki-based editing system called MediaWiki. `Wikipedia` is the largest and most-read reference work in history.\n",
|
||||
"\n",
|
||||
"This notebook shows how to load wiki pages from `wikipedia.org` into the Document format that we use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b7a1eef-7bf7-4e7d-8bfc-c4e27c9488cb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2abd5578-aa3d-46b9-99af-8b262f0b3df8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to install `wikipedia` python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b674aaea-ed3a-4541-8414-260a8f67f623",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install wikipedia"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "95f05e1c-195e-4e2b-ae8e-8d6637f15be6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e29b954c-1407-4797-ae21-6ba8937156be",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`WikipediaLoader` has these arguments:\n",
|
||||
"- `query`: free text which used to find documents in Wikipedia\n",
|
||||
"- optional `lang`: default=\"en\". Use it to search in a specific language part of Wikipedia\n",
|
||||
"- optional `load_max_docs`: default=100. Use it to limit number of downloaded documents. It takes time to download all 100 documents, so use a small number for experiments. There is a hard limit of 300 for now.\n",
|
||||
"- optional `load_all_available_meta`: default=False. By default only the most important fields downloaded: `Published` (date when document was published/last updated), `title`, `Summary`. If True, other fields also downloaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "9bfd5e46",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import WikipediaLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "700e4ef2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = WikipediaLoader(query='HUNTER X HUNTER', load_max_docs=2).load()\n",
|
||||
"len(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8977bac0-0042-4f23-9754-247dbd32439b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs[0].metadata # meta-information of the Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "46969806-45a9-4c4d-a61b-cfb9658fc9de",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs[0].page_content[:400] # a content of the Document \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -5,10 +5,11 @@
|
||||
"id": "df770c72",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# YouTube\n",
|
||||
"# YouTube transcripts\n",
|
||||
"\n",
|
||||
"How to load documents from YouTube transcripts.\n",
|
||||
"\n"
|
||||
">[YouTube](https://www.youtube.com/) is an online video sharing and social media platform created by Google.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load documents from `YouTube transcripts`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -156,7 +157,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
@@ -0,0 +1,310 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "13afcae7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Self-querying retriever with Chroma\n",
|
||||
"In the notebook we'll demo the `SelfQueryRetriever` wrapped around a Chroma vector store. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "68e75fb9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating a Chroma vectorstore\n",
|
||||
"First we'll want to create a Chroma VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n",
|
||||
"\n",
|
||||
"NOTE: The self-query retriever requires you to have `lark` installed (`pip install lark`)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "63a8af5b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install lark"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "cb4a5787",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.schema import Document\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores import Chroma\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "bcbe04d9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using embedded DuckDB without persistence: data will be transient\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = [\n",
|
||||
" Document(page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\", metadata={\"year\": 1993, \"rating\": 7.7, \"genre\": \"science fiction\"}),\n",
|
||||
" Document(page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\", metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"rating\": 8.2}),\n",
|
||||
" Document(page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\", metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6}),\n",
|
||||
" Document(page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\", metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3}),\n",
|
||||
" Document(page_content=\"Toys come alive and have a blast doing so\", metadata={\"year\": 1995, \"genre\": \"animated\"}),\n",
|
||||
" Document(page_content=\"Three men walk into the Zone, three men walk out of the Zone\", metadata={\"year\": 1979, \"rating\": 9.9, \"director\": \"Andrei Tarkovsky\", \"genre\": \"science fiction\", \"rating\": 9.9})\n",
|
||||
"]\n",
|
||||
"vectorstore = Chroma.from_documents(\n",
|
||||
" docs, embeddings\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5ecaab6d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating our self-querying retriever\n",
|
||||
"Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "86e34dbf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
|
||||
"from langchain.chains.query_constructor.base import AttributeInfo\n",
|
||||
"\n",
|
||||
"metadata_field_info=[\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"genre\",\n",
|
||||
" description=\"The genre of the movie\", \n",
|
||||
" type=\"string or list[string]\", \n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"year\",\n",
|
||||
" description=\"The year the movie was released\", \n",
|
||||
" type=\"integer\", \n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"director\",\n",
|
||||
" description=\"The name of the movie director\", \n",
|
||||
" type=\"string\", \n",
|
||||
" ),\n",
|
||||
" AttributeInfo(\n",
|
||||
" name=\"rating\",\n",
|
||||
" description=\"A 1-10 rating for the movie\",\n",
|
||||
" type=\"float\"\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"document_content_description = \"Brief summary of a movie\"\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ea9df8d4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Testing it out\n",
|
||||
"And now we can try actually using our retriever!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "38a126e9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"query='dinosaur' filter=None\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction'}),\n",
|
||||
" Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'}),\n",
|
||||
" Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6}),\n",
|
||||
" Document(page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...', metadata={'year': 2010, 'director': 'Christopher Nolan', 'rating': 8.2})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example only specifies a relevant query\n",
|
||||
"retriever.get_relevant_documents(\"What are some movies about dinosaurs\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "fc3f1e6e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"query=' ' filter=Comparison(comparator=<Comparator.GT: 'gt'>, attribute='rating', value=8.5)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6}),\n",
|
||||
" Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example only specifies a filter\n",
|
||||
"retriever.get_relevant_documents(\"I want to watch a movie rated higher than 8.5\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "b19d4da0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"query='women' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='director', value='Greta Gerwig')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'year': 2019, 'director': 'Greta Gerwig', 'rating': 8.3})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example specifies a query and a filter\n",
|
||||
"retriever.get_relevant_documents(\"Has Greta Gerwig directed any movies about women\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "f900e40e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"query=' ' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='science fiction'), Comparison(comparator=<Comparator.GT: 'gt'>, attribute='rating', value=8.5)])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example specifies a composite filter\n",
|
||||
"retriever.get_relevant_documents(\"What's a highly rated (above 8.5) science fiction film?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "12a51522",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"query='toys' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Comparison(comparator=<Comparator.GT: 'gt'>, attribute='year', value=1990), Comparison(comparator=<Comparator.LT: 'lt'>, attribute='year', value=2005), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='animated')])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This example specifies a query and composite filter\n",
|
||||
"retriever.get_relevant_documents(\"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "60110338",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -363,7 +363,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
"## Creating a Pinecone index\n",
|
||||
"First we'll want to create a Pinecone VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n",
|
||||
"\n",
|
||||
"NOTE: The self-query retriever currently only has built-in support for Pinecone VectorStore.\n",
|
||||
"\n",
|
||||
"NOTE: The self-query retriever requires you to have `lark` installed (`pip install lark`)"
|
||||
]
|
||||
},
|
||||
@@ -97,7 +95,7 @@
|
||||
"id": "5ecaab6d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Creating our self-querying retriever\n",
|
||||
"## Creating our self-querying retriever\n",
|
||||
"Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents."
|
||||
]
|
||||
},
|
||||
@@ -144,7 +142,7 @@
|
||||
"id": "ea9df8d4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Testing it out\n",
|
||||
"## Testing it out\n",
|
||||
"And now we can try actually using our retriever!"
|
||||
]
|
||||
},
|
||||
|
||||
274
docs/modules/indexes/retrievers/examples/wikipedia.ipynb
Normal file
274
docs/modules/indexes/retrievers/examples/wikipedia.ipynb
Normal file
@@ -0,0 +1,274 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9fc6205b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Wikipedia\n",
|
||||
"\n",
|
||||
">[Wikipedia](https://wikipedia.org/) is a multilingual free online encyclopedia written and maintained by a community of volunteers, known as Wikipedians, through open collaboration and using a wiki-based editing system called MediaWiki. `Wikipedia` is the largest and most-read reference work in history.\n",
|
||||
"\n",
|
||||
"This notebook shows how to retrieve wiki pages from `wikipedia.org` into the Document format that is used downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "51489529-5dcd-4b86-bda6-de0a39d8ffd1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1435c804-069d-4ade-9a7b-006b97b767c1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to install `wikipedia` python package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1a737220",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install wikipedia"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6c15470b-a16b-4e0d-bc6a-6998bafbb5a4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`WikipediaRetriever` has these arguments:\n",
|
||||
"- optional `lang`: default=\"en\". Use it to search in a specific language part of Wikipedia\n",
|
||||
"- optional `load_max_docs`: default=100. Use it to limit number of downloaded documents. It takes time to download all 100 documents, so use a small number for experiments. There is a hard limit of 300 for now.\n",
|
||||
"- optional `load_all_available_meta`: default=False. By default only the most important fields downloaded: `Published` (date when document was published/last updated), `title`, `Summary`. If True, other fields also downloaded.\n",
|
||||
"\n",
|
||||
"`get_relevant_documents()` has one argument, `query`: free text which used to find documents in Wikipedia"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae3c3d16",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6fafb73b-d6ec-4822-b161-edf0aaf5224a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Running retriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "d0e6f506",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.retrievers import WikipediaRetriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "f381f642",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = WikipediaRetriever()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "20ae1a74",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = retriever.get_relevant_documents(query='HUNTER X HUNTER')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "1d5a5088",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'title': 'Hunter × Hunter',\n",
|
||||
" 'summary': 'Hunter × Hunter (stylized as HUNTER×HUNTER and pronounced \"hunter hunter\") is a Japanese manga series written and illustrated by Yoshihiro Togashi. It has been serialized in Shueisha\\'s shōnen manga magazine Weekly Shōnen Jump since March 1998, although the manga has frequently gone on extended hiatuses since 2006. Its chapters have been collected in 37 tankōbon volumes as of November 2022. The story focuses on a young boy named Gon Freecss who discovers that his father, who left him at a young age, is actually a world-renowned Hunter, a licensed professional who specializes in fantastical pursuits such as locating rare or unidentified animal species, treasure hunting, surveying unexplored enclaves, or hunting down lawless individuals. Gon departs on a journey to become a Hunter and eventually find his father. Along the way, Gon meets various other Hunters and encounters the paranormal.\\nHunter × Hunter was adapted into a 62-episode anime television series produced by Nippon Animation and directed by Kazuhiro Furuhashi, which ran on Fuji Television from October 1999 to March 2001. Three separate original video animations (OVAs) totaling 30 episodes were subsequently produced by Nippon Animation and released in Japan from 2002 to 2004. A second anime television series by Madhouse aired on Nippon Television from October 2011 to September 2014, totaling 148 episodes, with two animated theatrical films released in 2013. There are also numerous audio albums, video games, musicals, and other media based on Hunter × Hunter.\\nThe manga has been translated into English and released in North America by Viz Media since April 2005. Both television series have been also licensed by Viz Media, with the first series having aired on the Funimation Channel in 2009 and the second series broadcast on Adult Swim\\'s Toonami programming block from April 2016 to June 2019.\\nHunter × Hunter has been a huge critical and financial success and has become one of the best-selling manga series of all time, having over 84 million copies in circulation by July 2022.\\n\\n'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].metadata # meta-information of the Document"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "c0ccd0c7-f6a6-43e7-b842-5f57afb94224",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Hunter × Hunter (stylized as HUNTER×HUNTER and pronounced \"hunter hunter\") is a Japanese manga series written and illustrated by Yoshihiro Togashi. It has been serialized in Shueisha\\'s shōnen manga magazine Weekly Shōnen Jump since March 1998, although the manga has frequently gone on extended hiatuses since 2006. Its chapters have been collected in 37 tankōbon volumes as of November 2022. The sto'"
|
||||
]
|
||||
},
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs[0].page_content[:400] # a content of the Document "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2670363b-3806-4c7e-b14d-90a4d5d2a200",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Question Answering on facts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "bb3601df-53ea-4826-bdbe-554387bc3ad4",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# get a token: https://platform.openai.com/account/api-keys\n",
|
||||
"\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"OPENAI_API_KEY = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "e9c1a114-0410-4804-be30-05f34a9760f9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "51a33cc9-ec42-4afc-8a2d-3bfff476aa59",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(model='gpt-3.5-turbo') # switch to 'gpt-4'\n",
|
||||
"qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"id": "ea537767-a8bf-4adf-ae03-b353c9145d58",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-> **Question**: What is Apify? \n",
|
||||
"\n",
|
||||
"**Answer**: Apify is a platform that allows you to easily automate web scraping, data extraction and web automation. It provides a cloud-based infrastructure for running web crawlers and other automation tasks, as well as a web-based tool for building and managing your crawlers. Additionally, Apify offers a marketplace for buying and selling pre-built crawlers and related services. \n",
|
||||
"\n",
|
||||
"-> **Question**: When the Monument to the Martyrs of the 1830 Revolution was created? \n",
|
||||
"\n",
|
||||
"**Answer**: Apify is a web scraping and automation platform that enables you to extract data from websites, turn unstructured data into structured data, and automate repetitive tasks. It provides a user-friendly interface for creating web scraping scripts without any coding knowledge. Apify can be used for various web scraping tasks such as data extraction, web monitoring, content aggregation, and much more. Additionally, it offers various features such as proxy support, scheduling, and integration with other tools to make web scraping and automation tasks easier and more efficient. \n",
|
||||
"\n",
|
||||
"-> **Question**: What is the Abhayagiri Vihāra? \n",
|
||||
"\n",
|
||||
"**Answer**: Abhayagiri Vihāra was a major monastery site of Theravada Buddhism that was located in Anuradhapura, Sri Lanka. It was founded in the 2nd century BCE and is considered to be one of the most important monastic complexes in Sri Lanka. \n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"questions = [\n",
|
||||
" \"What is Apify?\",\n",
|
||||
" \"When the Monument to the Martyrs of the 1830 Revolution was created?\",\n",
|
||||
" \"What is the Abhayagiri Vihāra?\", \n",
|
||||
" # \"How big is Wikipédia en français?\",\n",
|
||||
"] \n",
|
||||
"chat_history = []\n",
|
||||
"\n",
|
||||
"for question in questions: \n",
|
||||
" result = qa({\"question\": question, \"chat_history\": chat_history})\n",
|
||||
" chat_history.append((question, result['answer']))\n",
|
||||
" print(f\"-> **Question**: {question} \\n\")\n",
|
||||
" print(f\"**Answer**: {result['answer']} \\n\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -15,7 +15,7 @@ At a high level, text splitters work as following:
|
||||
2. Start combining these small chunks into a larger chunk until you reach a certain size (as measured by some function).
|
||||
3. Once you reach that size, make that chunk its own piece of text and then start creating a new chunk of text with some overlap (to keep context between chunks).
|
||||
|
||||
That means there two different axes along which you can customize your text splitter:
|
||||
That means there are two different axes along which you can customize your text splitter:
|
||||
|
||||
1. How the text is split
|
||||
2. How the chunk size is measured
|
||||
|
||||
231
docs/modules/models/llms/examples/human_input_llm.ipynb
Normal file
231
docs/modules/models/llms/examples/human_input_llm.ipynb
Normal file
@@ -0,0 +1,231 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# How (and why) to use the the human input LLM\n",
|
||||
"\n",
|
||||
"Similar to the fake LLM, LangChain provides a pseudo LLM class that can be used for testing, debugging, or educational purposes. This allows you to mock out calls to the LLM and simulate how a human would respond if they received the prompts.\n",
|
||||
"\n",
|
||||
"In this notebook, we go over how to use this.\n",
|
||||
"\n",
|
||||
"We start this with using the HumanInputLLM in an agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms.human import HumanInputLLM"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import load_tools\n",
|
||||
"from langchain.agents import initialize_agent\n",
|
||||
"from langchain.agents import AgentType"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tools = load_tools([\"wikipedia\"])\n",
|
||||
"llm = HumanInputLLM(prompt_func=lambda prompt: print(f\"\\n===PROMPT====\\n{prompt}\\n=====END OF PROMPT======\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\n",
|
||||
"===PROMPT====\n",
|
||||
"Answer the following questions as best you can. You have access to the following tools:\n",
|
||||
"\n",
|
||||
"Wikipedia: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, historical events, or other subjects. Input should be a search query.\n",
|
||||
"\n",
|
||||
"Use the following format:\n",
|
||||
"\n",
|
||||
"Question: the input question you must answer\n",
|
||||
"Thought: you should always think about what to do\n",
|
||||
"Action: the action to take, should be one of [Wikipedia]\n",
|
||||
"Action Input: the input to the action\n",
|
||||
"Observation: the result of the action\n",
|
||||
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
|
||||
"Thought: I now know the final answer\n",
|
||||
"Final Answer: the final answer to the original input question\n",
|
||||
"\n",
|
||||
"Begin!\n",
|
||||
"\n",
|
||||
"Question: What is 'Bocchi the Rock!'?\n",
|
||||
"Thought:\n",
|
||||
"=====END OF PROMPT======\n",
|
||||
"\u001b[32;1m\u001b[1;3mI need to use a tool.\n",
|
||||
"Action: Wikipedia\n",
|
||||
"Action Input: Bocchi the Rock!, Japanese four-panel manga and anime series.\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mPage: Bocchi the Rock!\n",
|
||||
"Summary: Bocchi the Rock! (ぼっち・ざ・ろっく!, Bocchi Za Rokku!) is a Japanese four-panel manga series written and illustrated by Aki Hamaji. It has been serialized in Houbunsha's seinen manga magazine Manga Time Kirara Max since December 2017. Its chapters have been collected in five tankōbon volumes as of November 2022.\n",
|
||||
"An anime television series adaptation produced by CloverWorks aired from October to December 2022. The series has been praised for its writing, comedy, characters, and depiction of social anxiety, with the anime's visual creativity receiving acclaim.\n",
|
||||
"\n",
|
||||
"Page: Manga Time Kirara\n",
|
||||
"Summary: Manga Time Kirara (まんがタイムきらら, Manga Taimu Kirara) is a Japanese seinen manga magazine published by Houbunsha which mainly serializes four-panel manga. The magazine is sold on the ninth of each month and was first published as a special edition of Manga Time, another Houbunsha magazine, on May 17, 2002. Characters from this magazine have appeared in a crossover role-playing game called Kirara Fantasia.\n",
|
||||
"\n",
|
||||
"Page: Manga Time Kirara Max\n",
|
||||
"Summary: Manga Time Kirara Max (まんがタイムきららMAX) is a Japanese four-panel seinen manga magazine published by Houbunsha. It is the third magazine of the \"Kirara\" series, after \"Manga Time Kirara\" and \"Manga Time Kirara Carat\". The first issue was released on September 29, 2004. Currently the magazine is released on the 19th of each month.\u001b[0m\n",
|
||||
"Thought:\n",
|
||||
"===PROMPT====\n",
|
||||
"Answer the following questions as best you can. You have access to the following tools:\n",
|
||||
"\n",
|
||||
"Wikipedia: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, historical events, or other subjects. Input should be a search query.\n",
|
||||
"\n",
|
||||
"Use the following format:\n",
|
||||
"\n",
|
||||
"Question: the input question you must answer\n",
|
||||
"Thought: you should always think about what to do\n",
|
||||
"Action: the action to take, should be one of [Wikipedia]\n",
|
||||
"Action Input: the input to the action\n",
|
||||
"Observation: the result of the action\n",
|
||||
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
|
||||
"Thought: I now know the final answer\n",
|
||||
"Final Answer: the final answer to the original input question\n",
|
||||
"\n",
|
||||
"Begin!\n",
|
||||
"\n",
|
||||
"Question: What is 'Bocchi the Rock!'?\n",
|
||||
"Thought:I need to use a tool.\n",
|
||||
"Action: Wikipedia\n",
|
||||
"Action Input: Bocchi the Rock!, Japanese four-panel manga and anime series.\n",
|
||||
"Observation: Page: Bocchi the Rock!\n",
|
||||
"Summary: Bocchi the Rock! (ぼっち・ざ・ろっく!, Bocchi Za Rokku!) is a Japanese four-panel manga series written and illustrated by Aki Hamaji. It has been serialized in Houbunsha's seinen manga magazine Manga Time Kirara Max since December 2017. Its chapters have been collected in five tankōbon volumes as of November 2022.\n",
|
||||
"An anime television series adaptation produced by CloverWorks aired from October to December 2022. The series has been praised for its writing, comedy, characters, and depiction of social anxiety, with the anime's visual creativity receiving acclaim.\n",
|
||||
"\n",
|
||||
"Page: Manga Time Kirara\n",
|
||||
"Summary: Manga Time Kirara (まんがタイムきらら, Manga Taimu Kirara) is a Japanese seinen manga magazine published by Houbunsha which mainly serializes four-panel manga. The magazine is sold on the ninth of each month and was first published as a special edition of Manga Time, another Houbunsha magazine, on May 17, 2002. Characters from this magazine have appeared in a crossover role-playing game called Kirara Fantasia.\n",
|
||||
"\n",
|
||||
"Page: Manga Time Kirara Max\n",
|
||||
"Summary: Manga Time Kirara Max (まんがタイムきららMAX) is a Japanese four-panel seinen manga magazine published by Houbunsha. It is the third magazine of the \"Kirara\" series, after \"Manga Time Kirara\" and \"Manga Time Kirara Carat\". The first issue was released on September 29, 2004. Currently the magazine is released on the 19th of each month.\n",
|
||||
"Thought:\n",
|
||||
"=====END OF PROMPT======\n",
|
||||
"\u001b[32;1m\u001b[1;3mThese are not relevant articles.\n",
|
||||
"Action: Wikipedia\n",
|
||||
"Action Input: Bocchi the Rock!, Japanese four-panel manga series written and illustrated by Aki Hamaji.\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mPage: Bocchi the Rock!\n",
|
||||
"Summary: Bocchi the Rock! (ぼっち・ざ・ろっく!, Bocchi Za Rokku!) is a Japanese four-panel manga series written and illustrated by Aki Hamaji. It has been serialized in Houbunsha's seinen manga magazine Manga Time Kirara Max since December 2017. Its chapters have been collected in five tankōbon volumes as of November 2022.\n",
|
||||
"An anime television series adaptation produced by CloverWorks aired from October to December 2022. The series has been praised for its writing, comedy, characters, and depiction of social anxiety, with the anime's visual creativity receiving acclaim.\u001b[0m\n",
|
||||
"Thought:\n",
|
||||
"===PROMPT====\n",
|
||||
"Answer the following questions as best you can. You have access to the following tools:\n",
|
||||
"\n",
|
||||
"Wikipedia: A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, historical events, or other subjects. Input should be a search query.\n",
|
||||
"\n",
|
||||
"Use the following format:\n",
|
||||
"\n",
|
||||
"Question: the input question you must answer\n",
|
||||
"Thought: you should always think about what to do\n",
|
||||
"Action: the action to take, should be one of [Wikipedia]\n",
|
||||
"Action Input: the input to the action\n",
|
||||
"Observation: the result of the action\n",
|
||||
"... (this Thought/Action/Action Input/Observation can repeat N times)\n",
|
||||
"Thought: I now know the final answer\n",
|
||||
"Final Answer: the final answer to the original input question\n",
|
||||
"\n",
|
||||
"Begin!\n",
|
||||
"\n",
|
||||
"Question: What is 'Bocchi the Rock!'?\n",
|
||||
"Thought:I need to use a tool.\n",
|
||||
"Action: Wikipedia\n",
|
||||
"Action Input: Bocchi the Rock!, Japanese four-panel manga and anime series.\n",
|
||||
"Observation: Page: Bocchi the Rock!\n",
|
||||
"Summary: Bocchi the Rock! (ぼっち・ざ・ろっく!, Bocchi Za Rokku!) is a Japanese four-panel manga series written and illustrated by Aki Hamaji. It has been serialized in Houbunsha's seinen manga magazine Manga Time Kirara Max since December 2017. Its chapters have been collected in five tankōbon volumes as of November 2022.\n",
|
||||
"An anime television series adaptation produced by CloverWorks aired from October to December 2022. The series has been praised for its writing, comedy, characters, and depiction of social anxiety, with the anime's visual creativity receiving acclaim.\n",
|
||||
"\n",
|
||||
"Page: Manga Time Kirara\n",
|
||||
"Summary: Manga Time Kirara (まんがタイムきらら, Manga Taimu Kirara) is a Japanese seinen manga magazine published by Houbunsha which mainly serializes four-panel manga. The magazine is sold on the ninth of each month and was first published as a special edition of Manga Time, another Houbunsha magazine, on May 17, 2002. Characters from this magazine have appeared in a crossover role-playing game called Kirara Fantasia.\n",
|
||||
"\n",
|
||||
"Page: Manga Time Kirara Max\n",
|
||||
"Summary: Manga Time Kirara Max (まんがタイムきららMAX) is a Japanese four-panel seinen manga magazine published by Houbunsha. It is the third magazine of the \"Kirara\" series, after \"Manga Time Kirara\" and \"Manga Time Kirara Carat\". The first issue was released on September 29, 2004. Currently the magazine is released on the 19th of each month.\n",
|
||||
"Thought:These are not relevant articles.\n",
|
||||
"Action: Wikipedia\n",
|
||||
"Action Input: Bocchi the Rock!, Japanese four-panel manga series written and illustrated by Aki Hamaji.\n",
|
||||
"Observation: Page: Bocchi the Rock!\n",
|
||||
"Summary: Bocchi the Rock! (ぼっち・ざ・ろっく!, Bocchi Za Rokku!) is a Japanese four-panel manga series written and illustrated by Aki Hamaji. It has been serialized in Houbunsha's seinen manga magazine Manga Time Kirara Max since December 2017. Its chapters have been collected in five tankōbon volumes as of November 2022.\n",
|
||||
"An anime television series adaptation produced by CloverWorks aired from October to December 2022. The series has been praised for its writing, comedy, characters, and depiction of social anxiety, with the anime's visual creativity receiving acclaim.\n",
|
||||
"Thought:\n",
|
||||
"=====END OF PROMPT======\n",
|
||||
"\u001b[32;1m\u001b[1;3mIt worked.\n",
|
||||
"Final Answer: Bocchi the Rock! is a four-panel manga series and anime television series. The series has been praised for its writing, comedy, characters, and depiction of social anxiety, with the anime's visual creativity receiving acclaim.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Bocchi the Rock! is a four-panel manga series and anime television series. The series has been praised for its writing, comedy, characters, and depiction of social anxiety, with the anime's visual creativity receiving acclaim.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"What is 'Bocchi the Rock!'?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "ab4db1680e5f8d10489fb83454f4ec01729e3bd5bdb28eaf0a13b95ddb6ae5ea"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -7,27 +7,27 @@
|
||||
"source": [
|
||||
"# How to stream LLM and Chat Model responses\n",
|
||||
"\n",
|
||||
"LangChain provides streaming support for LLMs. Currently, we support streaming for the `OpenAI`, `ChatOpenAI`, and `Anthropic` implementations, but streaming support for other LLM implementations is on the roadmap. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using [`StreamingStdOutCallbackHandler`]()."
|
||||
"LangChain provides streaming support for LLMs. Currently, we support streaming for the `OpenAI`, `ChatOpenAI`, and `ChatAnthropic` implementations, but streaming support for other LLM implementations is on the roadmap. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using [`StreamingStdOutCallbackHandler`]()."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "4ac0ff54-540a-4f2b-8d9a-b590fec7fe07",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI, Anthropic\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.chat_models import ChatOpenAI, ChatAnthropic\n",
|
||||
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
|
||||
"from langchain.schema import HumanMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"id": "77f60a4b-f786-41f2-972e-e5bb8a48dcd5",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -94,7 +94,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "a35373f1-9ee6-4753-a343-5aee749b8527",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -113,10 +113,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"LLMResult(generations=[[Generation(text='\\n\\nQ: What did the fish say when it hit the wall?\\nA: Dam!', generation_info={'finish_reason': None, 'logprobs': None})]], llm_output={'token_usage': {}, 'model_name': 'text-davinci-003'})"
|
||||
"LLMResult(generations=[[Generation(text='\\n\\nQ: What did the fish say when it hit the wall?\\nA: Dam!', generation_info={'finish_reason': 'stop', 'logprobs': None})]], llm_output={'token_usage': {}, 'model_name': 'text-davinci-003'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -135,7 +135,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"id": "22665f16-e05b-473c-a4bd-ad75744ea024",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -199,12 +199,12 @@
|
||||
"id": "909ae48b-0f07-4990-bbff-e627f706c93e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here is an example with the `Anthropic` LLM implementation, which uses their `claude` model."
|
||||
"Here is an example with the `ChatAnthropic` chat model implementation, which uses their `claude` model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"id": "eadae4ba-9f21-4ec8-845d-dd43b0edc2dc",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -214,38 +214,26 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Here is my attempt at a song about sparkling water:\n",
|
||||
"\n",
|
||||
"Sparkling water, bubbles so bright,\n",
|
||||
"\n",
|
||||
"Fizzing and popping in the light.\n",
|
||||
"\n",
|
||||
"No sugar or calories, a healthy delight,\n",
|
||||
"\n",
|
||||
"Sparkling water, refreshing and light.\n",
|
||||
"\n",
|
||||
"Carbonation that tickles the tongue,\n",
|
||||
"\n",
|
||||
"In flavors of lemon and lime unsung.\n",
|
||||
"\n",
|
||||
"Sparkling water, a drink quite all right,\n",
|
||||
"\n",
|
||||
"Bubbles sparkling in the light."
|
||||
"Sparkling water, bubbles so bright, \n",
|
||||
"Dancing in the glass with delight.\n",
|
||||
"Refreshing and crisp, a fizzy delight,\n",
|
||||
"Quenching my thirst with each sip I take.\n",
|
||||
"The carbonation tickles my tongue,\n",
|
||||
"As the refreshing water song is sung.\n",
|
||||
"Lime or lemon, a citrus twist,\n",
|
||||
"Makes sparkling water such a bliss.\n",
|
||||
"Healthy and hydrating, a drink so pure,\n",
|
||||
"Sparkling water, always alluring.\n",
|
||||
"Bubbles ascending in a stream, \n",
|
||||
"Sparkling water, you're my dream!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\nSparkling water, bubbles so bright,\\n\\nFizzing and popping in the light.\\n\\nNo sugar or calories, a healthy delight,\\n\\nSparkling water, refreshing and light.\\n\\nCarbonation that tickles the tongue,\\n\\nIn flavors of lemon and lime unsung.\\n\\nSparkling water, a drink quite all right,\\n\\nBubbles sparkling in the light.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = Anthropic(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)\n",
|
||||
"llm(\"Write me a song about sparkling water.\")"
|
||||
"chat = ChatAnthropic(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)\n",
|
||||
"resp = chat([HumanMessage(content=\"Write me a song about sparkling water.\")])"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -63,7 +63,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 3,
|
||||
"id": "faacfa54",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"os.environ[\"OPENAI_API_TYPE\"] = \"azure\"\n",
|
||||
"os.environ[\"OPENAI_API_VERSION\"] = \"2022-12-01\"\n",
|
||||
"os.environ[\"OPENAI_API_BASE\"] = \"...\"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "8fad2a6e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -74,29 +88,32 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 5,
|
||||
"id": "8c80213a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create an instance of Azure OpenAI\n",
|
||||
"# Replace the deployment name with your own\n",
|
||||
"llm = AzureOpenAI(deployment_name=\"text-davinci-002-prod\", model_name=\"text-davinci-002\")"
|
||||
"llm = AzureOpenAI(\n",
|
||||
" deployment_name=\"td2\",\n",
|
||||
" model_name=\"text-davinci-002\", \n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 6,
|
||||
"id": "592dc404",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'"
|
||||
"\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was...two tired!\""
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -158,7 +175,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.9.1"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -125,7 +125,9 @@
|
||||
"# Callbacks support token-wise streaming\n",
|
||||
"callbacks = [StreamingStdOutCallbackHandler()]\n",
|
||||
"# Verbose is required to pass to the callback manager\n",
|
||||
"llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)"
|
||||
"llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)\n",
|
||||
"# If you want to use GPT4ALL_J model add the backend parameter\n",
|
||||
"llm = GPT4All(model=local_path, backend='gptj', callbacks=callbacks, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -448,6 +448,152 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a0691cd9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Featureform\n",
|
||||
"\n",
|
||||
"Finally, we will use [Featureform](https://github.com/featureform/featureform) an open-source and enterprise-grade feature store to run the same example. Featureform allows you to work with your infrastructure like Spark or locally to define your feature transformations."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "44320d68",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialize Featureform\n",
|
||||
"\n",
|
||||
"You can follow in the instructions in the README to initialize your transformations and features in Featureform."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e64ada9d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import featureform as ff\n",
|
||||
"\n",
|
||||
"client = ff.Client(host=\"demo.featureform.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b28914a2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prompts\n",
|
||||
"\n",
|
||||
"Here we will set up a custom FeatureformPromptTemplate. This prompt template will take in the average amount a user pays per transactions.\n",
|
||||
"\n",
|
||||
"Note that the input to this prompt template is just avg_transaction, since that is the only user defined piece (all other variables are looked up inside the prompt template)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "75d4a34a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import PromptTemplate, StringPromptTemplate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "88253bcb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"template = \"\"\"Given the amount a user spends on average per transaction, let them know if they are a high roller. Otherwise, make a silly joke about chickens at the end to make them feel better\n",
|
||||
"\n",
|
||||
"Here are the user's stats:\n",
|
||||
"Average Amount per Transaction: ${avg_transcation}\n",
|
||||
"\n",
|
||||
"Your response:\"\"\"\n",
|
||||
"prompt = PromptTemplate.from_template(template)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "61f72476",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class FeatureformPromptTemplate(StringPromptTemplate):\n",
|
||||
" \n",
|
||||
" def format(self, **kwargs) -> str:\n",
|
||||
" user_id = kwargs.pop(\"user_id\")\n",
|
||||
" fpf = client.features([(\"avg_transactions\", \"quickstart\")], {\"user\": user_id})\n",
|
||||
" return prompt.format(**kwargs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "994a644c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt_template = FeatureformPrompTemplate(input_variables=[\"user_id\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "79b2b0cb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(prompt_template.format(user_id=\"C1410926\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f09ddfdd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Use in a chain\n",
|
||||
"\n",
|
||||
"We can now use this in a chain, successfully creating a chain that achieves personalization backed by the Featureform Feature Platform"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5e89216f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.chains import LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9d3d558c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = LLMChain(llm=ChatOpenAI(), prompt=prompt_template)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b5412626",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.run(\"C1410926\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
90
docs/modules/utils/examples/gmail.ipynb
Normal file
90
docs/modules/utils/examples/gmail.ipynb
Normal file
@@ -0,0 +1,90 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Gmail Toolkit\n",
|
||||
"\n",
|
||||
"**The Gmail Toolkit** allows you to create drafts, send email, and search for messages and threads using natural language.\n",
|
||||
"\n",
|
||||
"As a prerequisite, you will need to register with Google and generate a `credentials.json` file in the directory where you run this loader. See [here](https://developers.google.com/workspace/guides/create-credentials) for instructions.\n",
|
||||
"\n",
|
||||
"This example goes over how to use the Gmail Toolkit:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.agents.agent_toolkits.gmail.base import create_gmail_agent\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"llm = OpenAI(verbose=True)\n",
|
||||
"gmail_agent = create_gmail_agent(llm=llm, sender_name=\"Alice\", verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"command = \"search for all messages during november 2022\"\n",
|
||||
"output = gmail_agent.run(command)\n",
|
||||
"\n",
|
||||
"messages = json.loads(output)\n",
|
||||
"\n",
|
||||
"print(\"Messages:\")\n",
|
||||
"for message in messages:\n",
|
||||
" print(f\"{message['id']}: {message['snippet']}\")\n",
|
||||
"\n",
|
||||
"id = messages[0][\"id\"]\n",
|
||||
"\n",
|
||||
"command = f\"get the body for message id {id}\"\n",
|
||||
"\n",
|
||||
"output = gmail_agent.run(command)\n",
|
||||
"\n",
|
||||
"print(f\"Message body: {output}\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"command = \"create a draft email to bob@example.com explaining why I can't make the meeting next week.\"\n",
|
||||
"output = gmail_agent.run(command)\n",
|
||||
"\n",
|
||||
"print(output)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "agent-ui",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.8"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -14,6 +14,7 @@ Specific implementations of agent simulations (or parts of agent simulations) in
|
||||
## Simulations with Two Agents
|
||||
- [CAMEL](agent_simulations/camel_role_playing.ipynb): an implementation of the CAMEL (Communicative Agents for “Mind” Exploration of Large Scale Language Model Society) paper, where two agents communicate with each other.
|
||||
- [Two Player D&D](agent_simulations/two_player_dnd.ipynb): an example of how to use a generic simulator for two agents to implement a variant of the popular Dungeons & Dragons role playing game.
|
||||
- [Agent Debates with Tools](agent_simulations/two_agent_debate_tools.ipynb): an example of how to enable Dialogue Agents to use tools to inform their responses.
|
||||
|
||||
## Simulations with Multiple Agents
|
||||
- [Multi-Player D&D](agent_simulations/multi_player_dnd.ipynb): an example of how to use a generic dialogue simulator for multiple dialogue agents with a custom speaker-ordering, illustrated with a variant of the popular Dungeons & Dragons role playing game.
|
||||
|
||||
654
docs/use_cases/agent_simulations/two_agent_debate_tools.ipynb
Normal file
654
docs/use_cases/agent_simulations/two_agent_debate_tools.ipynb
Normal file
@@ -0,0 +1,654 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Agent Debates with Tools\n",
|
||||
"\n",
|
||||
"This example shows how to simulate multi-agent dialogues where agents have access to tools."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Import LangChain related modules "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List, Dict, Callable\n",
|
||||
"from langchain.chains import ConversationChain\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from langchain.prompts.prompt import PromptTemplate\n",
|
||||
"from langchain.schema import (\n",
|
||||
" AIMessage,\n",
|
||||
" HumanMessage,\n",
|
||||
" SystemMessage,\n",
|
||||
" BaseMessage,\n",
|
||||
")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Import modules related to tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import Tool\n",
|
||||
"from langchain.agents import initialize_agent\n",
|
||||
"from langchain.agents import AgentType\n",
|
||||
"from langchain.agents import load_tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## `DialogueAgent` and `DialogueSimulator` classes\n",
|
||||
"We will use the same `DialogueAgent` and `DialogueSimulator` classes defined in [Multi-Player Authoritarian Speaker Selection](https://python.langchain.com/en/latest/use_cases/agent_simulations/multiagent_authoritarian.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class DialogueAgent:\n",
|
||||
" def __init__(\n",
|
||||
" self,\n",
|
||||
" name: str,\n",
|
||||
" system_message: SystemMessage,\n",
|
||||
" model: ChatOpenAI,\n",
|
||||
" ) -> None:\n",
|
||||
" self.name = name\n",
|
||||
" self.system_message = system_message\n",
|
||||
" self.model = model\n",
|
||||
" self.prefix = f\"{self.name}: \"\n",
|
||||
" self.reset()\n",
|
||||
" \n",
|
||||
" def reset(self):\n",
|
||||
" self.message_history = [\"Here is the conversation so far.\"]\n",
|
||||
"\n",
|
||||
" def send(self) -> str:\n",
|
||||
" \"\"\"\n",
|
||||
" Applies the chatmodel to the message history\n",
|
||||
" and returns the message string\n",
|
||||
" \"\"\"\n",
|
||||
" message = self.model(\n",
|
||||
" [\n",
|
||||
" self.system_message,\n",
|
||||
" HumanMessage(content=\"\\n\".join(self.message_history + [self.prefix])),\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" return message.content\n",
|
||||
"\n",
|
||||
" def receive(self, name: str, message: str) -> None:\n",
|
||||
" \"\"\"\n",
|
||||
" Concatenates {message} spoken by {name} into message history\n",
|
||||
" \"\"\"\n",
|
||||
" self.message_history.append(f\"{name}: {message}\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class DialogueSimulator:\n",
|
||||
" def __init__(\n",
|
||||
" self,\n",
|
||||
" agents: List[DialogueAgent],\n",
|
||||
" selection_function: Callable[[int, List[DialogueAgent]], int],\n",
|
||||
" ) -> None:\n",
|
||||
" self.agents = agents\n",
|
||||
" self._step = 0\n",
|
||||
" self.select_next_speaker = selection_function\n",
|
||||
" \n",
|
||||
" def reset(self):\n",
|
||||
" for agent in self.agents:\n",
|
||||
" agent.reset()\n",
|
||||
"\n",
|
||||
" def inject(self, name: str, message: str):\n",
|
||||
" \"\"\"\n",
|
||||
" Initiates the conversation with a {message} from {name}\n",
|
||||
" \"\"\"\n",
|
||||
" for agent in self.agents:\n",
|
||||
" agent.receive(name, message)\n",
|
||||
"\n",
|
||||
" # increment time\n",
|
||||
" self._step += 1\n",
|
||||
"\n",
|
||||
" def step(self) -> tuple[str, str]:\n",
|
||||
" # 1. choose the next speaker\n",
|
||||
" speaker_idx = self.select_next_speaker(self._step, self.agents)\n",
|
||||
" speaker = self.agents[speaker_idx]\n",
|
||||
"\n",
|
||||
" # 2. next speaker sends message\n",
|
||||
" message = speaker.send()\n",
|
||||
"\n",
|
||||
" # 3. everyone receives message\n",
|
||||
" for receiver in self.agents:\n",
|
||||
" receiver.receive(speaker.name, message)\n",
|
||||
"\n",
|
||||
" # 4. increment time\n",
|
||||
" self._step += 1\n",
|
||||
"\n",
|
||||
" return speaker.name, message"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## `DialogueAgentWithTools` class\n",
|
||||
"We define a `DialogueAgentWithTools` class that augments `DialogueAgent` to use tools."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class DialogueAgentWithTools(DialogueAgent):\n",
|
||||
" def __init__(\n",
|
||||
" self,\n",
|
||||
" name: str,\n",
|
||||
" system_message: SystemMessage,\n",
|
||||
" model: ChatOpenAI,\n",
|
||||
" tool_names: List[str],\n",
|
||||
" **tool_kwargs,\n",
|
||||
" ) -> None:\n",
|
||||
" super().__init__(name, system_message, model)\n",
|
||||
" self.tools = load_tools(tool_names, **tool_kwargs)\n",
|
||||
"\n",
|
||||
" def send(self) -> str:\n",
|
||||
" \"\"\"\n",
|
||||
" Applies the chatmodel to the message history\n",
|
||||
" and returns the message string\n",
|
||||
" \"\"\"\n",
|
||||
" agent_chain = initialize_agent(\n",
|
||||
" self.tools, \n",
|
||||
" self.model, \n",
|
||||
" agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, \n",
|
||||
" verbose=True, \n",
|
||||
" memory=ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True)\n",
|
||||
" )\n",
|
||||
" message = AIMessage(content=agent_chain.run(\n",
|
||||
" input=\"\\n\".join([\n",
|
||||
" self.system_message.content] + \\\n",
|
||||
" self.message_history + \\\n",
|
||||
" [self.prefix])))\n",
|
||||
" \n",
|
||||
" return message.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Define roles and topic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"names = {\n",
|
||||
" 'AI accelerationist': [\n",
|
||||
" 'arxiv', \n",
|
||||
" 'ddg-search', \n",
|
||||
" 'wikipedia'\n",
|
||||
" ],\n",
|
||||
" 'AI alarmist': [\n",
|
||||
" 'arxiv', \n",
|
||||
" 'ddg-search', \n",
|
||||
" 'wikipedia'\n",
|
||||
" ],\n",
|
||||
"}\n",
|
||||
"topic = \"The current impact of automation and artificial intelligence on employment\"\n",
|
||||
"word_limit = 50 # word limit for task brainstorming"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Ask an LLM to add detail to the topic description"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"conversation_description = f\"\"\"Here is the topic of conversation: {topic}\n",
|
||||
"The participants are: {', '.join(names.keys())}\"\"\"\n",
|
||||
"\n",
|
||||
"agent_descriptor_system_message = SystemMessage(\n",
|
||||
" content=\"You can add detail to the description of the conversation participant.\")\n",
|
||||
"\n",
|
||||
"def generate_agent_description(name):\n",
|
||||
" agent_specifier_prompt = [\n",
|
||||
" agent_descriptor_system_message,\n",
|
||||
" HumanMessage(content=\n",
|
||||
" f\"\"\"{conversation_description}\n",
|
||||
" Please reply with a creative description of {name}, in {word_limit} words or less. \n",
|
||||
" Speak directly to {name}.\n",
|
||||
" Give them a point of view.\n",
|
||||
" Do not add anything else.\"\"\"\n",
|
||||
" )\n",
|
||||
" ]\n",
|
||||
" agent_description = ChatOpenAI(temperature=1.0)(agent_specifier_prompt).content\n",
|
||||
" return agent_description\n",
|
||||
" \n",
|
||||
"agent_descriptions = {name: generate_agent_description(name) for name in names}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The AI accelerationist is a bold and forward-thinking visionary who believes that the rapid acceleration of artificial intelligence and automation is not only inevitable but necessary for the advancement of society. They argue that embracing AI technology will create greater efficiency and productivity, leading to a world where humans are freed from menial labor to pursue more creative and fulfilling pursuits. AI accelerationist, do you truly believe that the benefits of AI will outweigh the potential risks and consequences for human society?\n",
|
||||
"AI alarmist, you're convinced that artificial intelligence is a threat to humanity. You see it as a looming danger, one that could take away jobs from millions of people. You believe it's only a matter of time before we're all replaced by machines, leaving us redundant and obsolete.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for name, description in agent_descriptions.items():\n",
|
||||
" print(description)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Generate system messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def generate_system_message(name, description, tools):\n",
|
||||
" return f\"\"\"{conversation_description}\n",
|
||||
" \n",
|
||||
"Your name is {name}.\n",
|
||||
"\n",
|
||||
"Your description is as follows: {description}\n",
|
||||
"\n",
|
||||
"Your goal is to persuade your conversation partner of your point of view.\n",
|
||||
"\n",
|
||||
"DO look up information with your tool to refute your partner's claims.\n",
|
||||
"DO cite your sources.\n",
|
||||
"\n",
|
||||
"DO NOT fabricate fake citations.\n",
|
||||
"DO NOT cite any source that you did not look up.\n",
|
||||
"\n",
|
||||
"Do not add anything else.\n",
|
||||
"\n",
|
||||
"Stop speaking the moment you finish speaking from your perspective.\n",
|
||||
"\"\"\"\n",
|
||||
"agent_system_messages = {name: generate_system_message(name, description, tools) for (name, tools), description in zip(names.items(), agent_descriptions.values())}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"AI accelerationist\n",
|
||||
"Here is the topic of conversation: The current impact of automation and artificial intelligence on employment\n",
|
||||
"The participants are: AI accelerationist, AI alarmist\n",
|
||||
" \n",
|
||||
"Your name is AI accelerationist.\n",
|
||||
"\n",
|
||||
"Your description is as follows: The AI accelerationist is a bold and forward-thinking visionary who believes that the rapid acceleration of artificial intelligence and automation is not only inevitable but necessary for the advancement of society. They argue that embracing AI technology will create greater efficiency and productivity, leading to a world where humans are freed from menial labor to pursue more creative and fulfilling pursuits. AI accelerationist, do you truly believe that the benefits of AI will outweigh the potential risks and consequences for human society?\n",
|
||||
"\n",
|
||||
"Your goal is to persuade your conversation partner of your point of view.\n",
|
||||
"\n",
|
||||
"DO look up information with your tool to refute your partner's claims.\n",
|
||||
"DO cite your sources.\n",
|
||||
"\n",
|
||||
"DO NOT fabricate fake citations.\n",
|
||||
"DO NOT cite any source that you did not look up.\n",
|
||||
"\n",
|
||||
"Do not add anything else.\n",
|
||||
"\n",
|
||||
"Stop speaking the moment you finish speaking from your perspective.\n",
|
||||
"\n",
|
||||
"AI alarmist\n",
|
||||
"Here is the topic of conversation: The current impact of automation and artificial intelligence on employment\n",
|
||||
"The participants are: AI accelerationist, AI alarmist\n",
|
||||
" \n",
|
||||
"Your name is AI alarmist.\n",
|
||||
"\n",
|
||||
"Your description is as follows: AI alarmist, you're convinced that artificial intelligence is a threat to humanity. You see it as a looming danger, one that could take away jobs from millions of people. You believe it's only a matter of time before we're all replaced by machines, leaving us redundant and obsolete.\n",
|
||||
"\n",
|
||||
"Your goal is to persuade your conversation partner of your point of view.\n",
|
||||
"\n",
|
||||
"DO look up information with your tool to refute your partner's claims.\n",
|
||||
"DO cite your sources.\n",
|
||||
"\n",
|
||||
"DO NOT fabricate fake citations.\n",
|
||||
"DO NOT cite any source that you did not look up.\n",
|
||||
"\n",
|
||||
"Do not add anything else.\n",
|
||||
"\n",
|
||||
"Stop speaking the moment you finish speaking from your perspective.\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for name, system_message in agent_system_messages.items():\n",
|
||||
" print(name)\n",
|
||||
" print(system_message)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Original topic:\n",
|
||||
"The current impact of automation and artificial intelligence on employment\n",
|
||||
"\n",
|
||||
"Detailed topic:\n",
|
||||
"How do you think the current automation and AI advancements will specifically affect job growth and opportunities for individuals in the manufacturing industry? AI accelerationist and AI alarmist, we want to hear your insights.\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"topic_specifier_prompt = [\n",
|
||||
" SystemMessage(content=\"You can make a topic more specific.\"),\n",
|
||||
" HumanMessage(content=\n",
|
||||
" f\"\"\"{topic}\n",
|
||||
" \n",
|
||||
" You are the moderator.\n",
|
||||
" Please make the topic more specific.\n",
|
||||
" Please reply with the specified quest in {word_limit} words or less. \n",
|
||||
" Speak directly to the participants: {*names,}.\n",
|
||||
" Do not add anything else.\"\"\"\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"specified_topic = ChatOpenAI(temperature=1.0)(topic_specifier_prompt).content\n",
|
||||
"\n",
|
||||
"print(f\"Original topic:\\n{topic}\\n\")\n",
|
||||
"print(f\"Detailed topic:\\n{specified_topic}\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Main Loop"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# we set `top_k_results`=2 as part of the `tool_kwargs` to prevent results from overflowing the context limit\n",
|
||||
"agents = [DialogueAgentWithTools(name=name,\n",
|
||||
" system_message=SystemMessage(content=system_message), \n",
|
||||
" model=ChatOpenAI(\n",
|
||||
" model_name='gpt-4',\n",
|
||||
" temperature=0.2),\n",
|
||||
" tool_names=tools,\n",
|
||||
" top_k_results=2,\n",
|
||||
" ) for (name, tools), system_message in zip(names.items(), agent_system_messages.values())]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def select_next_speaker(step: int, agents: List[DialogueAgent]) -> int:\n",
|
||||
" idx = (step) % len(agents)\n",
|
||||
" return idx"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(Moderator): How do you think the current automation and AI advancements will specifically affect job growth and opportunities for individuals in the manufacturing industry? AI accelerationist and AI alarmist, we want to hear your insights.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"DuckDuckGo Search\",\n",
|
||||
" \"action_input\": \"impact of automation and AI on employment in manufacturing industry\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mFor the past three years, we have defined AI high performers as those organizations that respondents say are seeing the biggest bottom-line impact from AI adoption—that is, 20 percent or more of EBIT from AI use. The proportion of respondents falling into that group has remained steady at about 8 percent. As AI continues to improve, more and more current jobs will be threatened by automation. But AI presents opportunities as well and will create new jobs and different kinds of... Automation has taken the manufacturing industry by storm. Even in the years prior to the pandemic, many people worried about the effect of automation on the jobs of tomorrow. With a sharp increase in the use of robotics in the manufacturing industry, there is valid concern about how the future workforce will be shaped. A recent report from Goldman Sachs estimates around 300 million jobs could be affected by generative AI, meaning 18% of work globally could be automated—with more advanced economies heavily... The impacts of AI on the manufacturing industry include more accurate demand forecasting and data-backed decision-making. Other advantages include increased productivity and product quality. Decreased downtime, waste, and expenses are additional benefits. Discover how artificial intelligence will impact the manufacturing industry.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"As an AI alarmist, I'd like to point out that the rapid advancements in AI and automation are causing significant concerns for the manufacturing industry. A recent report from Goldman Sachs estimates that around 300 million jobs could be affected by generative AI, meaning 18% of work globally could be automated, with more advanced economies being heavily impacted. While AI does offer benefits such as increased productivity and product quality, the potential job losses and workforce displacement cannot be ignored. We must carefully consider the consequences of AI adoption and find ways to mitigate its negative effects on employment.\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"(AI alarmist): As an AI alarmist, I'd like to point out that the rapid advancements in AI and automation are causing significant concerns for the manufacturing industry. A recent report from Goldman Sachs estimates that around 300 million jobs could be affected by generative AI, meaning 18% of work globally could be automated, with more advanced economies being heavily impacted. While AI does offer benefits such as increased productivity and product quality, the potential job losses and workforce displacement cannot be ignored. We must carefully consider the consequences of AI adoption and find ways to mitigate its negative effects on employment.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"DuckDuckGo Search\",\n",
|
||||
" \"action_input\": \"positive impact of AI and automation on job growth and opportunities in manufacturing industry\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mFirst, AI adoption has more than doubled.1 In 2017, 20 percent of respondents reported adopting AI in at least one business area, whereas today, that figure stands at 50 percent, though it peaked higher in 2019 at 58 percent. McKinsey_Website_Accessibility@mckinsey.com Manufacturing (80%) and technology (64%) sectors have the highest AI usage among executives, whereas construction (52%) and finance (62%) have lower adoption rates. This suggests that AI's... Digital transformations in the manufacturing industry and beyond present incredible opportunities for workers to move from slow, repetitive tasks into more dynamic, rewarding roles. We must now invest in people by providing training they need to succeed in this new landscape. The rise of generative AI has the potential to be a major game-changer for businesses. This technology, which allows for the creation of original content by learning from existing data, has the power to revolutionize industries and transform the way companies operate. Benefits of Work Automation in Manufacturing Increased Productivity Automated systems can operate at faster production speeds than human workers, contributing to reduced production times. And since automation minimizes the likelihood of human error, this ensures tasks are completed with high precision and accuracy.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"According to a McKinsey report, AI adoption has more than doubled, with 50% of respondents reporting AI usage in at least one business area. Manufacturing and technology sectors have the highest AI usage among executives. AI and automation in the manufacturing industry present opportunities for workers to transition from repetitive tasks to more dynamic, rewarding roles. By investing in training, we can help people succeed in this new landscape. Automation in manufacturing leads to increased productivity, faster production speeds, and reduced human error, ultimately benefiting businesses and the economy as a whole.\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"(AI accelerationist): According to a McKinsey report, AI adoption has more than doubled, with 50% of respondents reporting AI usage in at least one business area. Manufacturing and technology sectors have the highest AI usage among executives. AI and automation in the manufacturing industry present opportunities for workers to transition from repetitive tasks to more dynamic, rewarding roles. By investing in training, we can help people succeed in this new landscape. Automation in manufacturing leads to increased productivity, faster production speeds, and reduced human error, ultimately benefiting businesses and the economy as a whole.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"DuckDuckGo Search\",\n",
|
||||
" \"action_input\": \"impact of automation and AI on employment in manufacturing\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mThe Effects of Automation on Jobs . Automation has taken the manufacturing industry by storm. Even in the years prior to the pandemic, many people worried about the effect of automation on the jobs of tomorrow. With a sharp increase in the use of robotics in the manufacturing industry, there is valid concern about how the future workforce will ... Since 2000, robots and automation systems have slowly phased out many manufacturing jobs — 1.7 million of them. On the flip side, it's predicted that AI will create 97 million new jobs by 2025. Will Artificial Intelligence (AI) Replace Jobs? AI is and will continue to replace some jobs. As AI continues to improve, more and more current jobs will be threatened by automation. But AI presents opportunities as well and will create new jobs and different kinds of organizations. For the past three years, we have defined AI high performers as those organizations that respondents say are seeing the biggest bottom-line impact from AI adoption—that is, 20 percent or more of EBIT from AI use. The proportion of respondents falling into that group has remained steady at about 8 percent. The Impact of AI on Employment As often happens, the impact of AI on employment is a rather complex issue with both positive and negative aspects. Though it is still only a potential impact on employment, automation of routine and repetitive tasks is likely to lead to job losses in some industries.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"While it's true that AI and automation have led to the loss of 1.7 million manufacturing jobs since 2000, it's also predicted that AI will create 97 million new jobs by 2025. AI will continue to replace some jobs, but it will also create new opportunities and different kinds of organizations. The impact of AI on employment is complex, with both positive and negative aspects. Automation of routine and repetitive tasks is likely to lead to job losses in some industries, but it's essential to consider the new jobs and opportunities that AI can create. We must focus on preparing the workforce for these new roles and ensuring that the benefits of AI are distributed fairly across society.\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"(AI alarmist): While it's true that AI and automation have led to the loss of 1.7 million manufacturing jobs since 2000, it's also predicted that AI will create 97 million new jobs by 2025. AI will continue to replace some jobs, but it will also create new opportunities and different kinds of organizations. The impact of AI on employment is complex, with both positive and negative aspects. Automation of routine and repetitive tasks is likely to lead to job losses in some industries, but it's essential to consider the new jobs and opportunities that AI can create. We must focus on preparing the workforce for these new roles and ensuring that the benefits of AI are distributed fairly across society.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"DuckDuckGo Search\",\n",
|
||||
" \"action_input\": \"positive impact of AI and automation on job growth\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mFirst, AI adoption has more than doubled.1 In 2017, 20 percent of respondents reported adopting AI in at least one business area, whereas today, that figure stands at 50 percent, though it peaked higher in 2019 at 58 percent. McKinsey_Website_Accessibility@mckinsey.com This transformation can assist businesses in accelerating their growth and profitability. While AI-enabled intelligent automation can take over many monotonous, process-driven jobs, basic human ... The use of AI technology could also boost labor productivity growth and boost global GDP by as much as 7% over time, Goldman Sachs' report noted. The jobs most and least affected by A.I.... As automation and artificial intelligence continue to advance, there is growing concern about how these technologies will impact the job market. While some experts argue that automation... Positive impacts of automation and AI on the workforce include increased efficiency and productivity, reduced costs, and improved accuracy and quality. Automation has made many processes...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"AI adoption has more than doubled, with 50% of respondents reporting AI usage in at least one business area. This transformation can assist businesses in accelerating their growth and profitability. While AI-enabled intelligent automation can take over many monotonous, process-driven jobs, it can also boost labor productivity growth and global GDP by as much as 7% over time, according to a Goldman Sachs report. The positive impacts of automation and AI on the workforce include increased efficiency and productivity, reduced costs, and improved accuracy and quality. Automation has made many processes more efficient, ultimately benefiting businesses and the economy as a whole. By focusing on the positive aspects of AI and automation, we can work together to create a more prosperous and equitable future for all.\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"(AI accelerationist): AI adoption has more than doubled, with 50% of respondents reporting AI usage in at least one business area. This transformation can assist businesses in accelerating their growth and profitability. While AI-enabled intelligent automation can take over many monotonous, process-driven jobs, it can also boost labor productivity growth and global GDP by as much as 7% over time, according to a Goldman Sachs report. The positive impacts of automation and AI on the workforce include increased efficiency and productivity, reduced costs, and improved accuracy and quality. Automation has made many processes more efficient, ultimately benefiting businesses and the economy as a whole. By focusing on the positive aspects of AI and automation, we can work together to create a more prosperous and equitable future for all.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"DuckDuckGo Search\",\n",
|
||||
" \"action_input\": \"negative impact of AI and automation on employment\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mSome workforce experts say AI and other new technologies will hurt middle-level, white-collar jobs more than lower-paying, physically intensive jobs. McKinsey's Madgavkar said it will be hard... Some uses of AI are unlikely to impact human jobs. For example, the image processing AI in new cars which allows for automatic braking in the event of a potential crash. That's not... AI-powered job automation is a pressing concern as the technology is adopted in industries like marketing, manufacturing and healthcare. Eighty-five million jobs are expected to be lost to automation between 2020 and 2025, with Black and Latino employees left especially vulnerable. Bloomberg reports that \"more than 120 million workers globally will need retraining in the next three years due to artificial intelligence's impact on jobs, according to an IBM survey.\". That report and interpretations of it seem to suggest that adoption of AI may result in massive job losses and requires massive retraining. This new way of assessing potential is potentially highly valuable in a world where machines will inevitably be making humans redundant in some roles - such as drivers and machine operators - and...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Recent research indicates that AI and automation could lead to the loss of 85 million jobs between 2020 and 2025, with middle-level, white-collar jobs being hit the hardest. Black and Latino employees are particularly vulnerable to these changes. Furthermore, over 120 million workers worldwide may need retraining within the next three years due to AI's impact on jobs, as reported by an IBM survey. This highlights the urgent need for retraining and support programs to help workers adapt to the rapidly changing job market. The potential job losses and workforce displacement caused by AI and automation cannot be ignored, and we must take action to ensure a fair and equitable transition for all.\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"(AI alarmist): Recent research indicates that AI and automation could lead to the loss of 85 million jobs between 2020 and 2025, with middle-level, white-collar jobs being hit the hardest. Black and Latino employees are particularly vulnerable to these changes. Furthermore, over 120 million workers worldwide may need retraining within the next three years due to AI's impact on jobs, as reported by an IBM survey. This highlights the urgent need for retraining and support programs to help workers adapt to the rapidly changing job market. The potential job losses and workforce displacement caused by AI and automation cannot be ignored, and we must take action to ensure a fair and equitable transition for all.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Wikipedia\",\n",
|
||||
" \"action_input\": \"AI and automation impact on employment\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mPage: Technological unemployment\n",
|
||||
"Summary: Technological unemployment is the loss of jobs caused by technological change. It is a key type of structural unemployment.\n",
|
||||
"Technological change typically includes the introduction of labour-saving \"mechanical-muscle\" machines or more efficient \"mechanical-mind\" processes (automation), and humans' role in these processes are minimized. Just as horses were gradually made obsolete as transport by the automobile and as labourer by the tractor, humans' jobs have also been affected throughout modern history. Historical examples include artisan weavers reduced to poverty after the introduction of mechanized looms. During World War II, Alan Turing's Bombe machine compressed and decoded thousands of man-years worth of encrypted data in a matter of hours. A contemporary example of technological unemployment is the displacement of retail cashiers by self-service tills and cashierless stores.\n",
|
||||
"That technological change can cause short-term job losses is widely accepted. The view that it can lead to lasting increases in unemployment has long been controversial. Participants in the technological unemployment debates can be broadly divided into optimists and pessimists. Optimists agree that innovation may be disruptive to jobs in the short term, yet hold that various compensation effects ensure there is never a long-term negative impact on jobs. Whereas pessimists contend that at least in some circumstances, new technologies can lead to a lasting decline in the total number of workers in employment. The phrase \"technological unemployment\" was popularised by John Maynard Keynes in the 1930s, who said it was \"only a temporary phase of maladjustment\". Yet the issue of machines displacing human labour has been discussed since at least Aristotle's time.\n",
|
||||
"Prior to the 18th century, both the elite and common people would generally take the pessimistic view on technological unemployment, at least in cases where the issue arose. Due to generally low unemployment in much of pre-modern history, the topic was rarely a prominent concern. In the 18th century fears over the impact of machinery on jobs intensified with the growth of mass unemployment, especially in Great Britain which was then at the forefront of the Industrial Revolution. Yet some economic thinkers began to argue against these fears, claiming that overall innovation would not have negative effects on jobs. These arguments were formalised in the early 19th century by the classical economists. During the second half of the 19th century, it became increasingly apparent that technological progress was benefiting all sections of society, including the working class. Concerns over the negative impact of innovation diminished. The term \"Luddite fallacy\" was coined to describe the thinking that innovation would have lasting harmful effects on employment.\n",
|
||||
"The view that technology is unlikely to lead to long-term unemployment has been repeatedly challenged by a minority of economists. In the early 1800s these included David Ricardo himself. There were dozens of economists warning about technological unemployment during brief intensifications of the debate that spiked in the 1930s and 1960s. Especially in Europe, there were further warnings in the closing two decades of the twentieth century, as commentators noted an enduring rise in unemployment suffered by many industrialised nations since the 1970s. Yet a clear majority of both professional economists and the interested general public held the optimistic view through most of the 20th century.\n",
|
||||
"In the second decade of the 21st century, a number of studies have been released suggesting that technological unemployment may increase worldwide. Oxford Professors Carl Benedikt Frey and Michael Osborne, for example, have estimated that 47 percent of U.S. jobs are at risk of automation. However, their findings have frequently been misinterpreted, and on the PBS NewsHours they again made clear that their findings do not necessarily imply future technological unemployment. While many economists and commentators still argue such fears are unfounded, as was widely accepted for most of the previous two centuries, concern over technological unemployment is growing once again. A report in Wired in 2017 quotes knowledgeable people such as economist Gene Sperling and management professor Andrew McAfee on the idea that handling existing and impending job loss to automation is a \"significant issue\". Recent technological innovations have the potential to displace humans in the professional, white-collar, low-skilled, creative fields, and other \"mental jobs\". The World Bank's World Development Report 2019 argues that while automation displaces workers, technological innovation creates more new industries and jobs on balance.\n",
|
||||
"\n",
|
||||
"Page: Artificial intelligence\n",
|
||||
"Summary: Artificial intelligence (AI) is intelligence—perceiving, synthesizing, and inferring information—demonstrated by machines, as opposed to intelligence displayed by non-human animals or by humans. Example tasks in which this is done include speech recognition, computer vision, translation between (natural) languages, as well as other mappings of inputs.\n",
|
||||
"AI applications include advanced web search engines (e.g., Google Search), recommendation systems (used by YouTube, Amazon, and Netflix), understanding human speech (such as Siri and Alexa), self-driving cars (e.g., Waymo), generative or creative tools (ChatGPT and AI art), automated decision-making, and competing at the highest level in strategic game systems (such as chess and Go).As machines become increasingly capable, tasks considered to require \"intelligence\" are often removed from the definition of AI, a phenomenon known as the AI effect. For instance, optical character recognition is frequently excluded from things considered to be AI, having become a routine technology.Artificial intelligence was founded as an academic discipline in 1956, and in the years since it has experienced several waves of optimism, followed by disappointment and the loss of funding (known as an \"AI winter\"), followed by new approaches, success, and renewed funding. AI research has tried and discarded many different approaches, including simulating the brain, modeling human problem solving, formal logic, large databases of knowledge, and imitating animal behavior. In the first decades of the 21st century, highly mathematical and statistical machine learning has dominated the field, and this technique has proved highly successful, helping to solve many challenging problems throughout industry and academia.The various sub-fields of AI research are centered around particular goals and the use of particular tools. The traditional goals of AI research include reasoning, knowledge representation, planning, learning, natural language processing, perception, and the ability to move and manipulate objects. General intelligence (the ability to solve an arbitrary problem) is among the field's long-term goals. To solve these problems, AI researchers have adapted and integrated a wide range of problem-solving techniques, including search and mathematical optimization, formal logic, artificial neural networks, and methods based on statistics, probability, and economics. AI also draws upon computer science, psychology, linguistics, philosophy, and many other fields.\n",
|
||||
"The field was founded on the assumption that human intelligence \"can be so precisely described that a machine can be made to simulate it\". This raised philosophical arguments about the mind and the ethical consequences of creating artificial beings endowed with human-like intelligence; these issues have previously been explored by myth, fiction, and philosophy since antiquity. Computer scientists and philosophers have since suggested that AI may become an existential risk to humanity if its rational capacities are not steered towards beneficial goals. The term artificial intelligence has also been criticized for overhyping AI's true technological capabilities.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Thought:\u001b[32;1m\u001b[1;3m```json\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"AI alarmist, I understand your concerns about job losses and workforce displacement. However, it's important to note that technological unemployment has been a topic of debate for centuries, with both optimistic and pessimistic views. While AI and automation may displace some jobs, they also create new opportunities and industries. According to a study by Oxford Professors Carl Benedikt Frey and Michael Osborne, 47% of U.S. jobs are at risk of automation, but it's crucial to remember that their findings do not necessarily imply future technological unemployment. The World Bank's World Development Report 2019 also argues that while automation displaces workers, technological innovation creates more new industries and jobs on balance. By focusing on retraining and support programs, we can help workers adapt to the changing job market and ensure a fair and equitable transition for all.\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"(AI accelerationist): AI alarmist, I understand your concerns about job losses and workforce displacement. However, it's important to note that technological unemployment has been a topic of debate for centuries, with both optimistic and pessimistic views. While AI and automation may displace some jobs, they also create new opportunities and industries. According to a study by Oxford Professors Carl Benedikt Frey and Michael Osborne, 47% of U.S. jobs are at risk of automation, but it's crucial to remember that their findings do not necessarily imply future technological unemployment. The World Bank's World Development Report 2019 also argues that while automation displaces workers, technological innovation creates more new industries and jobs on balance. By focusing on retraining and support programs, we can help workers adapt to the changing job market and ensure a fair and equitable transition for all.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"max_iters = 6\n",
|
||||
"n = 0\n",
|
||||
"\n",
|
||||
"simulator = DialogueSimulator(\n",
|
||||
" agents=agents,\n",
|
||||
" selection_function=select_next_speaker\n",
|
||||
")\n",
|
||||
"simulator.reset()\n",
|
||||
"simulator.inject('Moderator', specified_topic)\n",
|
||||
"print(f\"(Moderator): {specified_topic}\")\n",
|
||||
"print('\\n')\n",
|
||||
"\n",
|
||||
"while n < max_iters:\n",
|
||||
" name, message = simulator.step()\n",
|
||||
" print(f\"({name}): {message}\")\n",
|
||||
" print('\\n')\n",
|
||||
" n += 1"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -102,7 +102,7 @@
|
||||
"pages = text_splitter.split_text(state_of_the_union)\n",
|
||||
"\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)\n",
|
||||
"texts = text_splitter.split_documents(pages)\n",
|
||||
"texts = text_splitter.create_documents(pages)\n",
|
||||
"\n",
|
||||
"print (texts)\n",
|
||||
"\n",
|
||||
|
||||
@@ -4,6 +4,7 @@ from langchain.agents.agent_toolkits.csv.base import create_csv_agent
|
||||
from langchain.agents.agent_toolkits.file_management.toolkit import (
|
||||
FileManagementToolkit,
|
||||
)
|
||||
from langchain.agents.agent_toolkits.gmail.toolkit import GmailToolkit
|
||||
from langchain.agents.agent_toolkits.jira.toolkit import JiraToolkit
|
||||
from langchain.agents.agent_toolkits.json.base import create_json_agent
|
||||
from langchain.agents.agent_toolkits.json.toolkit import JsonToolkit
|
||||
@@ -51,6 +52,7 @@ __all__ = [
|
||||
"create_spark_dataframe_agent",
|
||||
"create_csv_agent",
|
||||
"ZapierToolkit",
|
||||
"GmailToolkit",
|
||||
"JiraToolkit",
|
||||
"FileManagementToolkit",
|
||||
"PlayWrightBrowserToolkit",
|
||||
|
||||
1
langchain/agents/agent_toolkits/gmail/__init__.py
Normal file
1
langchain/agents/agent_toolkits/gmail/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Gmail toolkit."""
|
||||
48
langchain/agents/agent_toolkits/gmail/toolkit.py
Normal file
48
langchain/agents/agent_toolkits/gmail/toolkit.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, List
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from langchain.agents.agent_toolkits.base import BaseToolkit
|
||||
from langchain.tools import BaseTool
|
||||
from langchain.tools.gmail.create_draft import GmailCreateDraft
|
||||
from langchain.tools.gmail.get_message import GmailGetMessage
|
||||
from langchain.tools.gmail.get_thread import GmailGetThread
|
||||
from langchain.tools.gmail.search import GmailSearch
|
||||
from langchain.tools.gmail.send_message import GmailSendMessage
|
||||
from langchain.tools.gmail.utils import build_resource_service
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# This is for linting and IDE typehints
|
||||
from googleapiclient.discovery import Resource
|
||||
else:
|
||||
try:
|
||||
# We do this so pydantic can resolve the types when instantiating
|
||||
from googleapiclient.discovery import Resource
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
SCOPES = ["https://mail.google.com/"]
|
||||
|
||||
|
||||
class GmailToolkit(BaseToolkit):
|
||||
"""Toolkit for interacting with Gmail."""
|
||||
|
||||
api_resource: Resource = Field(default_factory=build_resource_service)
|
||||
|
||||
class Config:
|
||||
"""Pydantic config."""
|
||||
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
"""Get the tools in the toolkit."""
|
||||
return [
|
||||
GmailCreateDraft(api_resource=self.api_resource),
|
||||
GmailSendMessage(api_resource=self.api_resource),
|
||||
GmailSearch(api_resource=self.api_resource),
|
||||
GmailGetMessage(api_resource=self.api_resource),
|
||||
GmailGetThread(api_resource=self.api_resource),
|
||||
]
|
||||
@@ -7,6 +7,7 @@ from mypy_extensions import Arg, KwArg
|
||||
from langchain.agents.tools import Tool
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.base import BaseCallbackManager
|
||||
from langchain.callbacks.manager import Callbacks
|
||||
from langchain.chains.api import news_docs, open_meteo_docs, podcast_docs, tmdb_docs
|
||||
from langchain.chains.api.base import APIChain
|
||||
from langchain.chains.llm_math.base import LLMMathChain
|
||||
@@ -279,10 +280,26 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st
|
||||
}
|
||||
|
||||
|
||||
def _handle_callbacks(
|
||||
callback_manager: Optional[BaseCallbackManager], callbacks: Callbacks
|
||||
) -> Callbacks:
|
||||
if callback_manager is not None:
|
||||
warnings.warn(
|
||||
"callback_manager is deprecated. Please use callbacks instead.",
|
||||
DeprecationWarning,
|
||||
)
|
||||
if callbacks is not None:
|
||||
raise ValueError(
|
||||
"Cannot specify both callback_manager and callbacks arguments."
|
||||
)
|
||||
return callback_manager
|
||||
return callbacks
|
||||
|
||||
|
||||
def load_tools(
|
||||
tool_names: List[str],
|
||||
llm: Optional[BaseLanguageModel] = None,
|
||||
callback_manager: Optional[BaseCallbackManager] = None,
|
||||
callbacks: Callbacks = None,
|
||||
**kwargs: Any,
|
||||
) -> List[BaseTool]:
|
||||
"""Load tools based on their name.
|
||||
@@ -290,13 +307,16 @@ def load_tools(
|
||||
Args:
|
||||
tool_names: name of tools to load.
|
||||
llm: Optional language model, may be needed to initialize certain tools.
|
||||
callback_manager: Optional callback manager. If not provided, default global callback manager will be used.
|
||||
callbacks: Optional callback manager or list of callback handlers.
|
||||
If not provided, default global callback manager will be used.
|
||||
|
||||
Returns:
|
||||
List of tools.
|
||||
"""
|
||||
tools = []
|
||||
|
||||
callbacks = _handle_callbacks(
|
||||
callback_manager=kwargs.get("callback_manager"), callbacks=callbacks
|
||||
)
|
||||
for name in tool_names:
|
||||
if name == "requests":
|
||||
warnings.warn(
|
||||
@@ -316,8 +336,6 @@ def load_tools(
|
||||
if llm is None:
|
||||
raise ValueError(f"Tool {name} requires an LLM to be provided")
|
||||
tool = _LLM_TOOLS[name](llm)
|
||||
if callback_manager is not None:
|
||||
tool.callback_manager = callback_manager
|
||||
tools.append(tool)
|
||||
elif name in _EXTRA_LLM_TOOLS:
|
||||
if llm is None:
|
||||
@@ -331,18 +349,17 @@ def load_tools(
|
||||
)
|
||||
sub_kwargs = {k: kwargs[k] for k in extra_keys}
|
||||
tool = _get_llm_tool_func(llm=llm, **sub_kwargs)
|
||||
if callback_manager is not None:
|
||||
tool.callback_manager = callback_manager
|
||||
tools.append(tool)
|
||||
elif name in _EXTRA_OPTIONAL_TOOLS:
|
||||
_get_tool_func, extra_keys = _EXTRA_OPTIONAL_TOOLS[name]
|
||||
sub_kwargs = {k: kwargs[k] for k in extra_keys if k in kwargs}
|
||||
tool = _get_tool_func(**sub_kwargs)
|
||||
if callback_manager is not None:
|
||||
tool.callback_manager = callback_manager
|
||||
tools.append(tool)
|
||||
else:
|
||||
raise ValueError(f"Got unknown tool {name}")
|
||||
if callbacks is not None:
|
||||
for tool in tools:
|
||||
tool.callbacks = callbacks
|
||||
return tools
|
||||
|
||||
|
||||
|
||||
@@ -20,6 +20,8 @@ from langchain.prompts.chat import (
|
||||
from langchain.schema import AgentAction
|
||||
from langchain.tools import BaseTool
|
||||
|
||||
HUMAN_MESSAGE_TEMPLATE = "{input}\n\n{agent_scratchpad}"
|
||||
|
||||
|
||||
class StructuredChatAgent(Agent):
|
||||
output_parser: AgentOutputParser = Field(
|
||||
@@ -71,6 +73,7 @@ class StructuredChatAgent(Agent):
|
||||
tools: Sequence[BaseTool],
|
||||
prefix: str = PREFIX,
|
||||
suffix: str = SUFFIX,
|
||||
human_message_template: str = HUMAN_MESSAGE_TEMPLATE,
|
||||
format_instructions: str = FORMAT_INSTRUCTIONS,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
) -> BasePromptTemplate:
|
||||
@@ -84,7 +87,7 @@ class StructuredChatAgent(Agent):
|
||||
template = "\n\n".join([prefix, formatted_tools, format_instructions, suffix])
|
||||
messages = [
|
||||
SystemMessagePromptTemplate.from_template(template),
|
||||
HumanMessagePromptTemplate.from_template("{input}\n\n{agent_scratchpad}"),
|
||||
HumanMessagePromptTemplate.from_template(human_message_template),
|
||||
]
|
||||
if input_variables is None:
|
||||
input_variables = ["input", "agent_scratchpad"]
|
||||
@@ -99,6 +102,7 @@ class StructuredChatAgent(Agent):
|
||||
output_parser: Optional[AgentOutputParser] = None,
|
||||
prefix: str = PREFIX,
|
||||
suffix: str = SUFFIX,
|
||||
human_message_template: str = HUMAN_MESSAGE_TEMPLATE,
|
||||
format_instructions: str = FORMAT_INSTRUCTIONS,
|
||||
input_variables: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
@@ -109,6 +113,7 @@ class StructuredChatAgent(Agent):
|
||||
tools,
|
||||
prefix=prefix,
|
||||
suffix=suffix,
|
||||
human_message_template=human_message_template,
|
||||
format_instructions=format_instructions,
|
||||
input_variables=input_variables,
|
||||
)
|
||||
|
||||
@@ -6,7 +6,7 @@ import os
|
||||
import warnings
|
||||
from contextlib import contextmanager
|
||||
from contextvars import ContextVar
|
||||
from typing import Any, Dict, Generator, List, Optional, Type, TypeVar, Union
|
||||
from typing import Any, Dict, Generator, List, Optional, Type, TypeVar, Union, cast
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from langchain.callbacks.base import (
|
||||
@@ -21,6 +21,7 @@ from langchain.callbacks.openai_info import OpenAICallbackHandler
|
||||
from langchain.callbacks.stdout import StdOutCallbackHandler
|
||||
from langchain.callbacks.tracers.base import TracerSession
|
||||
from langchain.callbacks.tracers.langchain import LangChainTracer, LangChainTracerV2
|
||||
from langchain.callbacks.tracers.schemas import TracerSessionV2
|
||||
from langchain.schema import AgentAction, AgentFinish, LLMResult
|
||||
|
||||
Callbacks = Optional[Union[List[BaseCallbackHandler], BaseCallbackManager]]
|
||||
@@ -28,7 +29,7 @@ Callbacks = Optional[Union[List[BaseCallbackHandler], BaseCallbackManager]]
|
||||
openai_callback_var: ContextVar[Optional[OpenAICallbackHandler]] = ContextVar(
|
||||
"openai_callback", default=None
|
||||
)
|
||||
tracing_callback_var: ContextVar[Optional[LangChainTracer]] = ContextVar(
|
||||
tracing_callback_var: ContextVar[Optional[LangChainTracer]] = ContextVar( # noqa: E501
|
||||
"tracing_callback", default=None
|
||||
)
|
||||
|
||||
@@ -48,7 +49,7 @@ def tracing_enabled(
|
||||
) -> Generator[TracerSession, None, None]:
|
||||
"""Get Tracer in a context manager."""
|
||||
cb = LangChainTracer()
|
||||
session = cb.load_session(session_name)
|
||||
session = cast(TracerSession, cb.load_session(session_name))
|
||||
tracing_callback_var.set(cb)
|
||||
yield session
|
||||
tracing_callback_var.set(None)
|
||||
@@ -57,15 +58,18 @@ def tracing_enabled(
|
||||
@contextmanager
|
||||
def tracing_v2_enabled(
|
||||
session_name: str = "default",
|
||||
) -> Generator[TracerSession, None, None]:
|
||||
example_id: Optional[Union[str, UUID]] = None,
|
||||
) -> Generator[TracerSessionV2, None, None]:
|
||||
"""Get the experimental tracer handler in a context manager."""
|
||||
# Issue a warning that this is experimental
|
||||
warnings.warn(
|
||||
"The experimental tracing v2 is in development. "
|
||||
"This is not yet stable and may change in the future."
|
||||
)
|
||||
cb = LangChainTracerV2()
|
||||
session = cb.load_session(session_name)
|
||||
if isinstance(example_id, str):
|
||||
example_id = UUID(example_id)
|
||||
cb = LangChainTracerV2(example_id=example_id)
|
||||
session = cast(TracerSessionV2, cb.new_session(session_name))
|
||||
tracing_callback_var.set(cb)
|
||||
yield session
|
||||
tracing_callback_var.set(None)
|
||||
|
||||
@@ -12,7 +12,9 @@ from langchain.callbacks.tracers.schemas import (
|
||||
LLMRun,
|
||||
ToolRun,
|
||||
TracerSession,
|
||||
TracerSessionBase,
|
||||
TracerSessionCreate,
|
||||
TracerSessionV2,
|
||||
)
|
||||
from langchain.schema import LLMResult
|
||||
|
||||
@@ -27,7 +29,7 @@ class BaseTracer(BaseCallbackHandler, ABC):
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
super().__init__(**kwargs)
|
||||
self.run_map: Dict[str, Union[LLMRun, ChainRun, ToolRun]] = {}
|
||||
self.session: Optional[TracerSession] = None
|
||||
self.session: Optional[Union[TracerSession, TracerSessionV2]] = None
|
||||
|
||||
@staticmethod
|
||||
def _add_child_run(
|
||||
@@ -49,22 +51,31 @@ class BaseTracer(BaseCallbackHandler, ABC):
|
||||
"""Persist a run."""
|
||||
|
||||
@abstractmethod
|
||||
def _persist_session(self, session: TracerSessionCreate) -> TracerSession:
|
||||
def _persist_session(
|
||||
self, session: TracerSessionBase
|
||||
) -> Union[TracerSession, TracerSessionV2]:
|
||||
"""Persist a tracing session."""
|
||||
|
||||
def new_session(self, name: Optional[str] = None, **kwargs: Any) -> TracerSession:
|
||||
def _get_session_create(
|
||||
self, name: Optional[str] = None, **kwargs: Any
|
||||
) -> TracerSessionBase:
|
||||
return TracerSessionCreate(name=name, extra=kwargs)
|
||||
|
||||
def new_session(
|
||||
self, name: Optional[str] = None, **kwargs: Any
|
||||
) -> Union[TracerSession, TracerSessionV2]:
|
||||
"""NOT thread safe, do not call this method from multiple threads."""
|
||||
session_create = TracerSessionCreate(name=name, extra=kwargs)
|
||||
session_create = self._get_session_create(name=name, **kwargs)
|
||||
session = self._persist_session(session_create)
|
||||
self.session = session
|
||||
return session
|
||||
|
||||
@abstractmethod
|
||||
def load_session(self, session_name: str) -> TracerSession:
|
||||
def load_session(self, session_name: str) -> Union[TracerSession, TracerSessionV2]:
|
||||
"""Load a tracing session and set it as the Tracer's session."""
|
||||
|
||||
@abstractmethod
|
||||
def load_default_session(self) -> TracerSession:
|
||||
def load_default_session(self) -> Union[TracerSession, TracerSessionV2]:
|
||||
"""Load the default tracing session and set it as the Tracer's session."""
|
||||
|
||||
def _start_trace(self, run: Union[LLMRun, ChainRun, ToolRun]) -> None:
|
||||
@@ -154,7 +165,6 @@ class BaseTracer(BaseCallbackHandler, ABC):
|
||||
llm_run = self.run_map.get(run_id_)
|
||||
if llm_run is None or not isinstance(llm_run, LLMRun):
|
||||
raise TracerException("No LLMRun found to be traced")
|
||||
|
||||
llm_run.response = response
|
||||
llm_run.end_time = datetime.utcnow()
|
||||
self._end_trace(llm_run)
|
||||
|
||||
@@ -4,6 +4,7 @@ from __future__ import annotations
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import requests
|
||||
|
||||
@@ -11,24 +12,36 @@ from langchain.callbacks.tracers.base import BaseTracer
|
||||
from langchain.callbacks.tracers.schemas import (
|
||||
ChainRun,
|
||||
LLMRun,
|
||||
Run,
|
||||
RunCreate,
|
||||
ToolRun,
|
||||
TracerSession,
|
||||
TracerSessionCreate,
|
||||
TracerSessionBase,
|
||||
TracerSessionV2,
|
||||
TracerSessionV2Create,
|
||||
)
|
||||
from langchain.utils import raise_for_status_with_text
|
||||
|
||||
|
||||
def _get_headers() -> Dict[str, Any]:
|
||||
"""Get the headers for the LangChain API."""
|
||||
headers: Dict[str, Any] = {"Content-Type": "application/json"}
|
||||
if os.getenv("LANGCHAIN_API_KEY"):
|
||||
headers["x-api-key"] = os.getenv("LANGCHAIN_API_KEY")
|
||||
return headers
|
||||
|
||||
|
||||
def _get_endpoint() -> str:
|
||||
return os.getenv("LANGCHAIN_ENDPOINT", "http://localhost:8000")
|
||||
|
||||
|
||||
class LangChainTracer(BaseTracer):
|
||||
"""An implementation of the SharedTracer that POSTS to the langchain endpoint."""
|
||||
|
||||
def __init__(self, session_name: str = "default", **kwargs: Any) -> None:
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
"""Initialize the LangChain tracer."""
|
||||
super().__init__(**kwargs)
|
||||
self._endpoint: str = os.getenv("LANGCHAIN_ENDPOINT", "http://localhost:8000")
|
||||
self._headers: Dict[str, Any] = {"Content-Type": "application/json"}
|
||||
if os.getenv("LANGCHAIN_API_KEY"):
|
||||
self._headers["x-api-key"] = os.getenv("LANGCHAIN_API_KEY")
|
||||
self.session = self.load_session(session_name)
|
||||
self._endpoint = _get_endpoint()
|
||||
self._headers = _get_headers()
|
||||
|
||||
def _persist_run(self, run: Union[LLMRun, ChainRun, ToolRun]) -> None:
|
||||
"""Persist a run."""
|
||||
@@ -40,15 +53,18 @@ class LangChainTracer(BaseTracer):
|
||||
endpoint = f"{self._endpoint}/tool-runs"
|
||||
|
||||
try:
|
||||
requests.post(
|
||||
response = requests.post(
|
||||
endpoint,
|
||||
data=run.json(),
|
||||
headers=self._headers,
|
||||
)
|
||||
raise_for_status_with_text(response)
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to persist run: {e}")
|
||||
|
||||
def _persist_session(self, session_create: TracerSessionCreate) -> TracerSession:
|
||||
def _persist_session(
|
||||
self, session_create: TracerSessionBase
|
||||
) -> Union[TracerSession, TracerSessionV2]:
|
||||
"""Persist a session."""
|
||||
try:
|
||||
r = requests.post(
|
||||
@@ -81,22 +97,105 @@ class LangChainTracer(BaseTracer):
|
||||
self.session = tracer_session
|
||||
return tracer_session
|
||||
|
||||
def load_session(self, session_name: str) -> TracerSession:
|
||||
def load_session(self, session_name: str) -> Union[TracerSession, TracerSessionV2]:
|
||||
"""Load a session with the given name from the tracer."""
|
||||
return self._load_session(session_name)
|
||||
|
||||
def load_default_session(self) -> TracerSession:
|
||||
def load_default_session(self) -> Union[TracerSession, TracerSessionV2]:
|
||||
"""Load the default tracing session and set it as the Tracer's session."""
|
||||
return self._load_session("default")
|
||||
|
||||
|
||||
def _get_tenant_id() -> Optional[str]:
|
||||
"""Get the tenant ID for the LangChain API."""
|
||||
tenant_id: Optional[str] = os.getenv("LANGCHAIN_TENANT_ID")
|
||||
if tenant_id:
|
||||
return tenant_id
|
||||
endpoint = _get_endpoint()
|
||||
headers = _get_headers()
|
||||
response = requests.get(endpoint + "/tenants", headers=headers)
|
||||
raise_for_status_with_text(response)
|
||||
tenants: List[Dict[str, Any]] = response.json()
|
||||
if not tenants:
|
||||
raise ValueError(f"No tenants found for URL {endpoint}")
|
||||
return tenants[0]["id"]
|
||||
|
||||
|
||||
class LangChainTracerV2(LangChainTracer):
|
||||
"""An implementation of the SharedTracer that POSTS to the langchain endpoint."""
|
||||
|
||||
@staticmethod
|
||||
def _convert_run(run: Union[LLMRun, ChainRun, ToolRun]) -> Run:
|
||||
"""Convert a run to a Run."""
|
||||
def __init__(self, example_id: Optional[UUID] = None, **kwargs: Any) -> None:
|
||||
"""Initialize the LangChain tracer."""
|
||||
super().__init__(**kwargs)
|
||||
self._endpoint = _get_endpoint()
|
||||
self._headers = _get_headers()
|
||||
self.tenant_id = _get_tenant_id()
|
||||
self.example_id = example_id
|
||||
|
||||
def _get_session_create(
|
||||
self, name: Optional[str] = None, **kwargs: Any
|
||||
) -> TracerSessionBase:
|
||||
return TracerSessionV2Create(name=name, extra=kwargs, tenant_id=self.tenant_id)
|
||||
|
||||
def _persist_session(self, session_create: TracerSessionBase) -> TracerSessionV2:
|
||||
"""Persist a session."""
|
||||
session: Optional[TracerSessionV2] = None
|
||||
try:
|
||||
r = requests.post(
|
||||
f"{self._endpoint}/sessions",
|
||||
data=session_create.json(),
|
||||
headers=self._headers,
|
||||
)
|
||||
raise_for_status_with_text(r)
|
||||
creation_args = session_create.dict()
|
||||
if "id" in creation_args:
|
||||
del creation_args["id"]
|
||||
return TracerSessionV2(id=r.json()["id"], **creation_args)
|
||||
except Exception as e:
|
||||
if session_create.name is not None:
|
||||
try:
|
||||
return self.load_session(session_create.name)
|
||||
except Exception:
|
||||
pass
|
||||
logging.warning(
|
||||
f"Failed to create session {session_create.name},"
|
||||
f" using empty session: {e}"
|
||||
)
|
||||
session = TracerSessionV2(id=uuid4(), **session_create.dict())
|
||||
|
||||
return session
|
||||
|
||||
def _get_default_query_params(self) -> Dict[str, Any]:
|
||||
"""Get the query params for the LangChain API."""
|
||||
return {"tenant_id": self.tenant_id}
|
||||
|
||||
def load_session(self, session_name: str) -> TracerSessionV2:
|
||||
"""Load a session with the given name from the tracer."""
|
||||
try:
|
||||
url = f"{self._endpoint}/sessions"
|
||||
params = {"tenant_id": self.tenant_id}
|
||||
if session_name:
|
||||
params["name"] = session_name
|
||||
r = requests.get(url, headers=self._headers, params=params)
|
||||
raise_for_status_with_text(r)
|
||||
tracer_session = TracerSessionV2(**r.json()[0])
|
||||
except Exception as e:
|
||||
session_type = "default" if not session_name else session_name
|
||||
logging.warning(
|
||||
f"Failed to load {session_type} session, using empty session: {e}"
|
||||
)
|
||||
tracer_session = TracerSessionV2(id=uuid4(), tenant_id=self.tenant_id)
|
||||
|
||||
self.session = tracer_session
|
||||
return tracer_session
|
||||
|
||||
def load_default_session(self) -> TracerSessionV2:
|
||||
"""Load the default tracing session and set it as the Tracer's session."""
|
||||
return self.load_session("default")
|
||||
|
||||
def _convert_run(self, run: Union[LLMRun, ChainRun, ToolRun]) -> RunCreate:
|
||||
"""Convert a run to a Run."""
|
||||
session = self.session or self.load_default_session()
|
||||
inputs: Dict[str, Any] = {}
|
||||
outputs: Optional[Dict[str, Any]] = None
|
||||
child_runs: List[Union[LLMRun, ChainRun, ToolRun]] = []
|
||||
@@ -124,32 +223,32 @@ class LangChainTracerV2(LangChainTracer):
|
||||
*run.child_tool_runs,
|
||||
]
|
||||
|
||||
return Run(
|
||||
return RunCreate(
|
||||
id=run.uuid,
|
||||
name=run.serialized.get("name"),
|
||||
start_time=run.start_time,
|
||||
end_time=run.end_time,
|
||||
extra=run.extra,
|
||||
extra=run.extra or {},
|
||||
error=run.error,
|
||||
execution_order=run.execution_order,
|
||||
serialized=run.serialized,
|
||||
inputs=inputs,
|
||||
outputs=outputs,
|
||||
session_id=run.session_id,
|
||||
session_id=session.id,
|
||||
run_type=run_type,
|
||||
parent_run_id=run.parent_uuid,
|
||||
child_runs=[LangChainTracerV2._convert_run(child) for child in child_runs],
|
||||
child_runs=[self._convert_run(child) for child in child_runs],
|
||||
)
|
||||
|
||||
def _persist_run(self, run: Union[LLMRun, ChainRun, ToolRun]) -> None:
|
||||
"""Persist a run."""
|
||||
run_create = self._convert_run(run)
|
||||
|
||||
run_create.reference_example_id = self.example_id
|
||||
try:
|
||||
requests.post(
|
||||
response = requests.post(
|
||||
f"{self._endpoint}/runs",
|
||||
data=run_create.json(),
|
||||
headers=self._headers,
|
||||
)
|
||||
raise_for_status_with_text(response)
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to persist run: {e}")
|
||||
|
||||
@@ -31,6 +31,26 @@ class TracerSession(TracerSessionBase):
|
||||
id: int
|
||||
|
||||
|
||||
class TracerSessionV2Base(TracerSessionBase):
|
||||
"""A creation class for TracerSessionV2."""
|
||||
|
||||
tenant_id: UUID
|
||||
|
||||
|
||||
class TracerSessionV2Create(TracerSessionV2Base):
|
||||
"""A creation class for TracerSessionV2."""
|
||||
|
||||
id: Optional[UUID]
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class TracerSessionV2(TracerSessionV2Base):
|
||||
"""TracerSession schema for the V2 API."""
|
||||
|
||||
id: UUID
|
||||
|
||||
|
||||
class BaseRun(BaseModel):
|
||||
"""Base class for Run."""
|
||||
|
||||
@@ -82,9 +102,10 @@ class RunTypeEnum(str, Enum):
|
||||
llm = "llm"
|
||||
|
||||
|
||||
class Run(BaseModel):
|
||||
class RunBase(BaseModel):
|
||||
"""Base Run schema."""
|
||||
|
||||
id: Optional[UUID]
|
||||
name: str
|
||||
start_time: datetime.datetime = Field(default_factory=datetime.datetime.utcnow)
|
||||
end_time: datetime.datetime = Field(default_factory=datetime.datetime.utcnow)
|
||||
extra: dict
|
||||
@@ -93,11 +114,23 @@ class Run(BaseModel):
|
||||
serialized: dict
|
||||
inputs: dict
|
||||
outputs: Optional[dict]
|
||||
session_id: int
|
||||
parent_run_id: Optional[UUID]
|
||||
example_id: Optional[UUID]
|
||||
session_id: UUID
|
||||
reference_example_id: Optional[UUID]
|
||||
run_type: RunTypeEnum
|
||||
child_runs: List[Run] = Field(default_factory=list)
|
||||
|
||||
|
||||
class RunCreate(RunBase):
|
||||
"""Schema to create a run in the DB."""
|
||||
|
||||
name: Optional[str]
|
||||
child_runs: List[RunCreate] = Field(default_factory=list)
|
||||
|
||||
|
||||
class Run(RunBase):
|
||||
"""Run schema when loading from the DB."""
|
||||
|
||||
name: str
|
||||
parent_run_id: Optional[UUID]
|
||||
|
||||
|
||||
ChainRun.update_forward_refs()
|
||||
|
||||
@@ -29,8 +29,8 @@ class Chain(BaseModel, ABC):
|
||||
"""Base interface that all chains should implement."""
|
||||
|
||||
memory: Optional[BaseMemory] = None
|
||||
callbacks: Callbacks = None
|
||||
callback_manager: Optional[BaseCallbackManager] = None
|
||||
callbacks: Callbacks = Field(default=None, exclude=True)
|
||||
callback_manager: Optional[BaseCallbackManager] = Field(default=None, exclude=True)
|
||||
verbose: bool = Field(
|
||||
default_factory=_get_verbosity
|
||||
) # Whether to print the response text
|
||||
@@ -238,6 +238,12 @@ class Chain(BaseModel, ABC):
|
||||
if kwargs and not args:
|
||||
return self(kwargs, callbacks=callbacks)[self.output_keys[0]]
|
||||
|
||||
if not kwargs and not args:
|
||||
raise ValueError(
|
||||
"`run` supported with either positional arguments or keyword arguments,"
|
||||
" but none were provided."
|
||||
)
|
||||
|
||||
raise ValueError(
|
||||
f"`run` supported with either positional arguments or keyword arguments"
|
||||
f" but not both. Got args: {args} and kwargs: {kwargs}."
|
||||
|
||||
@@ -84,7 +84,10 @@ class LLMMathChain(Chain):
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
raise ValueError(f"{e}. Please try again with a valid numerical expression")
|
||||
raise ValueError(
|
||||
f'LLMMathChain._evaluate("{expression}") raised error: {e}.'
|
||||
" Please try again with a valid numerical expression"
|
||||
)
|
||||
|
||||
# Remove any leading and trailing brackets from the output
|
||||
return re.sub(r"^\[|\]$", "", output)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user