mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-08 10:09:46 +00:00
Compare commits
136 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6fbdb9ce51 | ||
|
|
04475bea7d | ||
|
|
1ad180f6de | ||
|
|
274dc4bc53 | ||
|
|
05e749d9fe | ||
|
|
80558b5b27 | ||
|
|
3637d6da6e | ||
|
|
65f85af242 | ||
|
|
f6c97e6af4 | ||
|
|
f0cfed636f | ||
|
|
6b8d144ccc | ||
|
|
d383c0cb43 | ||
|
|
28091c2101 | ||
|
|
5c8e12558d | ||
|
|
2b14036126 | ||
|
|
f2150285a4 | ||
|
|
e4ca511ec8 | ||
|
|
9fafe7b2b9 | ||
|
|
6335cb5b3a | ||
|
|
872605a5c5 | ||
|
|
ce15ffae6a | ||
|
|
ea83eed9ba | ||
|
|
2b4ba203f7 | ||
|
|
2ceb807da2 | ||
|
|
ae0c3382dd | ||
|
|
c485e7ab59 | ||
|
|
0d568daacb | ||
|
|
04f765b838 | ||
|
|
c73cec5ac1 | ||
|
|
f1401a6dff | ||
|
|
deffc65693 | ||
|
|
ba0057c077 | ||
|
|
02ebb15c4a | ||
|
|
782df1db10 | ||
|
|
b3ecce0545 | ||
|
|
b04d84f6b3 | ||
|
|
aa11f7c89b | ||
|
|
f4c8502e61 | ||
|
|
d84df25466 | ||
|
|
42df78d396 | ||
|
|
8b284f9ad0 | ||
|
|
35c9e6ab40 | ||
|
|
0870a45a69 | ||
|
|
8a338412fa | ||
|
|
f510940bde | ||
|
|
c8b0b6e6c1 | ||
|
|
1d1166ded6 | ||
|
|
637c61cffb | ||
|
|
65c95f9fb2 | ||
|
|
edcd171535 | ||
|
|
6f386628c2 | ||
|
|
a1001b29eb | ||
|
|
f70e18a5b3 | ||
|
|
0c646bb703 | ||
|
|
04b74d0446 | ||
|
|
075d9631f5 | ||
|
|
64940e9d0f | ||
|
|
747b5f87c2 | ||
|
|
6cd51ef3d0 | ||
|
|
43a7a89e93 | ||
|
|
9544b30821 | ||
|
|
423f497168 | ||
|
|
5ca13cc1f0 | ||
|
|
59204a5033 | ||
|
|
eeb7c96e0c | ||
|
|
f1fc4dfebc | ||
|
|
2324f19c85 | ||
|
|
76ed41f48a | ||
|
|
1017e5cee2 | ||
|
|
a30f42da4e | ||
|
|
c3044b1bf0 | ||
|
|
6567b73e1a | ||
|
|
bb6d97c18c | ||
|
|
19e28d8784 | ||
|
|
2a3c5f8353 | ||
|
|
a57259ec83 | ||
|
|
7dcc698ebf | ||
|
|
26534457f5 | ||
|
|
3095546851 | ||
|
|
b1e2e29222 | ||
|
|
84cfa76e00 | ||
|
|
d84bb02881 | ||
|
|
905a2114d7 | ||
|
|
8de1b4c4c2 | ||
|
|
878d0c8155 | ||
|
|
6032a051e9 | ||
|
|
fea639c1fc | ||
|
|
2f087d63af | ||
|
|
cc068f1b77 | ||
|
|
ac0a9d02bd | ||
|
|
d86ed15d88 | ||
|
|
624554a43a | ||
|
|
6d84541ff9 | ||
|
|
a9c2450330 | ||
|
|
d4cf1eb60a | ||
|
|
fba6921b50 | ||
|
|
bd277b5327 | ||
|
|
bf726f9d8a | ||
|
|
67db495fcf | ||
|
|
8af25867cb | ||
|
|
087a4bd2b8 | ||
|
|
b1446bea5f | ||
|
|
cdea47491d | ||
|
|
657f5f259f | ||
|
|
7f8727bbcd | ||
|
|
bbbca10704 | ||
|
|
6caba8e759 | ||
|
|
d18e788ee3 | ||
|
|
5f30cc8713 | ||
|
|
65c3b146c9 | ||
|
|
5a269d3175 | ||
|
|
c186f18aab | ||
|
|
349ba88aee | ||
|
|
1608f5dcae | ||
|
|
3b556eae44 | ||
|
|
9b830f437c | ||
|
|
374725a715 | ||
|
|
ea64b1716d | ||
|
|
525db1b6cb | ||
|
|
afa9d1292b | ||
|
|
7e967aa4d5 | ||
|
|
f3ec6d2449 | ||
|
|
f291fd7eed | ||
|
|
b67be55ab8 | ||
|
|
a5dd73c1a6 | ||
|
|
df3bc707fc | ||
|
|
f08a76250f | ||
|
|
aa38355999 | ||
|
|
1c68cbdb28 | ||
|
|
36ee60c96c | ||
|
|
e23391965b | ||
|
|
013208cce6 | ||
|
|
18f9d7b4f6 | ||
|
|
c26cf04110 | ||
|
|
71a337dac6 | ||
|
|
3bd5a99b83 |
42
.devcontainer/Dockerfile
Normal file
42
.devcontainer/Dockerfile
Normal file
@@ -0,0 +1,42 @@
|
||||
# This is a Dockerfile for Developer Container
|
||||
|
||||
# Use the Python base image
|
||||
ARG VARIANT="3.11-bullseye"
|
||||
FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT} AS langchain-dev-base
|
||||
|
||||
USER vscode
|
||||
|
||||
# Define the version of Poetry to install (default is 1.4.2)
|
||||
# Define the directory of python virtual environment
|
||||
ARG PYTHON_VIRTUALENV_HOME=/home/vscode/langchain-py-env \
|
||||
POETRY_VERSION=1.4.2
|
||||
|
||||
ENV POETRY_VIRTUALENVS_IN_PROJECT=false \
|
||||
POETRY_NO_INTERACTION=true
|
||||
|
||||
# Create a Python virtual environment for Poetry and install it
|
||||
RUN python3 -m venv ${PYTHON_VIRTUALENV_HOME} && \
|
||||
$PYTHON_VIRTUALENV_HOME/bin/pip install --upgrade pip && \
|
||||
$PYTHON_VIRTUALENV_HOME/bin/pip install poetry==${POETRY_VERSION}
|
||||
|
||||
ENV PATH="$PYTHON_VIRTUALENV_HOME/bin:$PATH" \
|
||||
VIRTUAL_ENV=$PYTHON_VIRTUALENV_HOME
|
||||
|
||||
# Setup for bash
|
||||
RUN poetry completions bash >> /home/vscode/.bash_completion && \
|
||||
echo "export PATH=$PYTHON_VIRTUALENV_HOME/bin:$PATH" >> ~/.bashrc
|
||||
|
||||
# Set the working directory for the app
|
||||
WORKDIR /workspaces/langchain
|
||||
|
||||
# Use a multi-stage build to install dependencies
|
||||
FROM langchain-dev-base AS langchain-dev-dependencies
|
||||
|
||||
ARG PYTHON_VIRTUALENV_HOME
|
||||
|
||||
# Copy only the dependency files for installation
|
||||
COPY pyproject.toml poetry.lock poetry.toml ./
|
||||
|
||||
# Install the Poetry dependencies (this layer will be cached as long as the dependencies don't change)
|
||||
RUN poetry install --no-interaction --no-ansi --with dev,test,docs
|
||||
|
||||
33
.devcontainer/devcontainer.json
Normal file
33
.devcontainer/devcontainer.json
Normal file
@@ -0,0 +1,33 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile
|
||||
{
|
||||
"dockerComposeFile": "./docker-compose.yaml",
|
||||
"service": "langchain",
|
||||
"workspaceFolder": "/workspaces/langchain",
|
||||
"name": "langchain",
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"ms-python.python"
|
||||
],
|
||||
"settings": {
|
||||
"python.defaultInterpreterPath": "/home/vscode/langchain-py-env/bin/python3.11"
|
||||
}
|
||||
}
|
||||
|
||||
},
|
||||
|
||||
// Features to add to the dev container. More info: https://containers.dev/features.
|
||||
"features": {},
|
||||
|
||||
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
||||
// "forwardPorts": [],
|
||||
|
||||
// Uncomment the next line to run commands after the container is created.
|
||||
// "postCreateCommand": "cat /etc/os-release",
|
||||
|
||||
// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
|
||||
// "remoteUser": "devcontainer"
|
||||
"remoteUser": "vscode",
|
||||
"overrideCommand": true
|
||||
}
|
||||
31
.devcontainer/docker-compose.yaml
Normal file
31
.devcontainer/docker-compose.yaml
Normal file
@@ -0,0 +1,31 @@
|
||||
version: '3'
|
||||
services:
|
||||
langchain:
|
||||
build:
|
||||
dockerfile: .devcontainer/Dockerfile
|
||||
context: ../
|
||||
volumes:
|
||||
- ../:/workspaces/langchain
|
||||
networks:
|
||||
- langchain-network
|
||||
# environment:
|
||||
# MONGO_ROOT_USERNAME: root
|
||||
# MONGO_ROOT_PASSWORD: example123
|
||||
# depends_on:
|
||||
# - mongo
|
||||
# mongo:
|
||||
# image: mongo
|
||||
# restart: unless-stopped
|
||||
# environment:
|
||||
# MONGO_INITDB_ROOT_USERNAME: root
|
||||
# MONGO_INITDB_ROOT_PASSWORD: example123
|
||||
# ports:
|
||||
# - "27017:27017"
|
||||
# networks:
|
||||
# - langchain-network
|
||||
|
||||
networks:
|
||||
langchain-network:
|
||||
driver: bridge
|
||||
|
||||
|
||||
106
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
Normal file
106
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
name: "\U0001F41B Bug Report"
|
||||
description: Submit a bug report to help us improve LangChain
|
||||
labels: ["02 Bug Report"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: >
|
||||
Thank you for taking the time to file a bug report. Before creating a new
|
||||
issue, please make sure to take a few moments to check the issue tracker
|
||||
for existing issues about the bug.
|
||||
|
||||
- type: textarea
|
||||
id: system-info
|
||||
attributes:
|
||||
label: System Info
|
||||
description: Please share your system info with us.
|
||||
placeholder: LangChain version, platform, python version, ...
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
id: who-can-help
|
||||
attributes:
|
||||
label: Who can help?
|
||||
description: |
|
||||
Your issue will be replied to more quickly if you can figure out the right person to tag with @
|
||||
If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**.
|
||||
|
||||
The core maintainers strive to read all issues, but tagging them will help them prioritize.
|
||||
|
||||
Please tag fewer than 3 people.
|
||||
|
||||
@hwchase17 - project lead
|
||||
|
||||
Tracing / Callbacks
|
||||
- @agola11
|
||||
|
||||
Async
|
||||
- @agola11
|
||||
|
||||
DataLoader Abstractions
|
||||
- @eyurtsev
|
||||
|
||||
LLM/Chat Wrappers
|
||||
- @hwchase17
|
||||
- @agola11
|
||||
|
||||
Tools / Toolkits
|
||||
- @vowelparrot
|
||||
|
||||
placeholder: "@Username ..."
|
||||
|
||||
- type: checkboxes
|
||||
id: information-scripts-examples
|
||||
attributes:
|
||||
label: Information
|
||||
description: "The problem arises when using:"
|
||||
options:
|
||||
- label: "The official example notebooks/scripts"
|
||||
- label: "My own modified scripts"
|
||||
|
||||
- type: checkboxes
|
||||
id: related-components
|
||||
attributes:
|
||||
label: Related Components
|
||||
description: "Select the components related to the issue (if applicable):"
|
||||
options:
|
||||
- label: "LLMs/Chat Models"
|
||||
- label: "Embedding Models"
|
||||
- label: "Prompts / Prompt Templates / Prompt Selectors"
|
||||
- label: "Output Parsers"
|
||||
- label: "Document Loaders"
|
||||
- label: "Vector Stores / Retrievers"
|
||||
- label: "Memory"
|
||||
- label: "Agents / Agent Executors"
|
||||
- label: "Tools / Toolkits"
|
||||
- label: "Chains"
|
||||
- label: "Callbacks/Tracing"
|
||||
- label: "Async"
|
||||
|
||||
- type: textarea
|
||||
id: reproduction
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Reproduction
|
||||
description: |
|
||||
Please provide a [code sample](https://stackoverflow.com/help/minimal-reproducible-example) that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
|
||||
If you have code snippets, error messages, stack traces please provide them here as well.
|
||||
Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
|
||||
Avoid screenshots when possible, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
|
||||
|
||||
placeholder: |
|
||||
Steps to reproduce the behavior:
|
||||
|
||||
1.
|
||||
2.
|
||||
3.
|
||||
|
||||
- type: textarea
|
||||
id: expected-behavior
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Expected behavior
|
||||
description: "A clear and concise description of what you would expect to happen."
|
||||
6
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
6
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
blank_issues_enabled: true
|
||||
version: 2.1
|
||||
contact_links:
|
||||
- name: Discord
|
||||
url: https://discord.gg/6adMQxSpJS
|
||||
about: General community discussions
|
||||
19
.github/ISSUE_TEMPLATE/documentation.yml
vendored
Normal file
19
.github/ISSUE_TEMPLATE/documentation.yml
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
name: Documentation
|
||||
description: Report an issue related to the LangChain documentation.
|
||||
title: "DOC: <Please write a comprehensive title after the 'DOC: ' prefix>"
|
||||
labels: [03 - Documentation]
|
||||
|
||||
body:
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Issue with current documentation:"
|
||||
description: >
|
||||
Please make sure to leave a reference to the document/code you're
|
||||
referring to.
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Idea or request for content:"
|
||||
description: >
|
||||
Please describe as clearly as possible what topics you think are missing
|
||||
from the current documentation.
|
||||
30
.github/ISSUE_TEMPLATE/feature-request.yml
vendored
Normal file
30
.github/ISSUE_TEMPLATE/feature-request.yml
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
name: "\U0001F680 Feature request"
|
||||
description: Submit a proposal/request for a new LangChain feature
|
||||
labels: ["02 Feature Request"]
|
||||
body:
|
||||
- type: textarea
|
||||
id: feature-request
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Feature request
|
||||
description: |
|
||||
A clear and concise description of the feature proposal. Please provide links to any relevant GitHub repos, papers, or other resources if relevant.
|
||||
|
||||
- type: textarea
|
||||
id: motivation
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Motivation
|
||||
description: |
|
||||
Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
|
||||
|
||||
- type: textarea
|
||||
id: contribution
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Your contribution
|
||||
description: |
|
||||
Is there any way that you could help, e.g. by submitting a PR? Make sure to read the CONTRIBUTING.MD [readme](https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md)
|
||||
18
.github/ISSUE_TEMPLATE/other.yml
vendored
Normal file
18
.github/ISSUE_TEMPLATE/other.yml
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
name: Other Issue
|
||||
description: Raise an issue that wouldn't be covered by the other templates.
|
||||
title: "Issue: <Please write a comprehensive title after the 'Issue: ' prefix>"
|
||||
labels: [04 - Other]
|
||||
|
||||
body:
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Issue you'd like to raise."
|
||||
description: >
|
||||
Please describe the issue you'd like to raise as clearly as possible.
|
||||
Make sure to include any relevant links or references.
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Suggestion:"
|
||||
description: >
|
||||
Please outline a suggestion to improve the issue here.
|
||||
42
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
42
.github/PULL_REQUEST_TEMPLATE.md
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
# Your PR Title (What it does)
|
||||
|
||||
<!--
|
||||
Thank you for contributing to LangChain! Your PR will appear in our next release under the title you set. Please make sure it highlights your valuable contribution.
|
||||
|
||||
Replace this with a description of the change, the issue it fixes (if applicable), and relevant context. List any dependencies required for this change.
|
||||
|
||||
After you're done, someone will review your PR. They may suggest improvements. If no one reviews your PR within a few days, feel free to @-mention the same people again, as notifications can get lost.
|
||||
-->
|
||||
|
||||
<!-- Remove if not applicable -->
|
||||
|
||||
Fixes # (issue)
|
||||
|
||||
## Before submitting
|
||||
|
||||
<!-- If you're adding a new integration, include an integration test and an example notebook showing its use! -->
|
||||
|
||||
## Who can review?
|
||||
|
||||
Community members can review the PR once tests pass. Tag maintainers/contributors who might be interested:
|
||||
|
||||
<!-- For a quicker response, figure out the right person to tag with @
|
||||
|
||||
@hwchase17 - project lead
|
||||
|
||||
Tracing / Callbacks
|
||||
- @agola11
|
||||
|
||||
Async
|
||||
- @agola11
|
||||
|
||||
DataLoader Abstractions
|
||||
- @eyurtsev
|
||||
|
||||
LLM/Chat Wrappers
|
||||
- @hwchase17
|
||||
- @agola11
|
||||
|
||||
Tools / Toolkits
|
||||
- @vowelparrot
|
||||
-->
|
||||
64
.github/actions/poetry_setup/action.yml
vendored
Normal file
64
.github/actions/poetry_setup/action.yml
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
# An action for setting up poetry install with caching.
|
||||
# Using a custom action since the default action does not
|
||||
# take poetry install groups into account.
|
||||
# Action code from:
|
||||
# https://github.com/actions/setup-python/issues/505#issuecomment-1273013236
|
||||
name: poetry-install-with-caching
|
||||
description: Poetry install with support for caching of dependency groups.
|
||||
|
||||
inputs:
|
||||
python-version:
|
||||
description: Python version, supporting MAJOR.MINOR only
|
||||
required: true
|
||||
|
||||
poetry-version:
|
||||
description: Poetry version
|
||||
required: true
|
||||
|
||||
install-command:
|
||||
description: Command run for installing dependencies
|
||||
required: false
|
||||
default: poetry install
|
||||
|
||||
cache-key:
|
||||
description: Cache key to use for manual handling of caching
|
||||
required: true
|
||||
|
||||
working-directory:
|
||||
description: Directory to run install-command in
|
||||
required: false
|
||||
default: ""
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- uses: actions/cache@v3
|
||||
id: cache-pip
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pip
|
||||
key: pip-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}
|
||||
|
||||
- run: pipx install poetry==${{ inputs.poetry-version }} --python python${{ inputs.python-version }}
|
||||
shell: bash
|
||||
|
||||
- uses: actions/cache@v3
|
||||
id: cache-poetry
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pypoetry/virtualenvs
|
||||
~/.cache/pypoetry/cache
|
||||
~/.cache/pypoetry/artifacts
|
||||
key: poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
- run: ${{ inputs.install-command }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
shell: bash
|
||||
10
.github/workflows/test.yml
vendored
10
.github/workflows/test.yml
vendored
@@ -20,15 +20,13 @@ jobs:
|
||||
- "3.11"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install poetry
|
||||
run: pipx install poetry==$POETRY_VERSION
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: "poetry"
|
||||
- name: Install dependencies
|
||||
run: poetry install
|
||||
poetry-version: "1.4.2"
|
||||
cache-key: "main"
|
||||
install-command: "poetry install"
|
||||
- name: Run unit tests
|
||||
run: |
|
||||
make test
|
||||
|
||||
33
.github/workflows/test_all.yml
vendored
Normal file
33
.github/workflows/test_all.yml
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
# Run unit tests with all optional packages installed.
|
||||
name: test_all
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.4.2"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: "1.4.2"
|
||||
cache-key: "extended"
|
||||
install-command: "poetry install -E extended_testing"
|
||||
- name: Run unit tests
|
||||
run: |
|
||||
make test
|
||||
26
.readthedocs.yaml
Normal file
26
.readthedocs.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
# Read the Docs configuration file
|
||||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
||||
|
||||
# Required
|
||||
version: 2
|
||||
|
||||
# Set the version of Python and other tools you might need
|
||||
build:
|
||||
os: ubuntu-22.04
|
||||
tools:
|
||||
python: "3.11"
|
||||
|
||||
# Build documentation in the docs/ directory with Sphinx
|
||||
sphinx:
|
||||
configuration: docs/conf.py
|
||||
|
||||
# If using Sphinx, optionally build your docs in additional formats such as PDF
|
||||
# formats:
|
||||
# - pdf
|
||||
|
||||
# Optionally declare the Python requirements required to build your docs
|
||||
python:
|
||||
install:
|
||||
- requirements: docs/requirements.txt
|
||||
- method: pip
|
||||
path: .
|
||||
27
Makefile
27
Makefile
@@ -32,11 +32,13 @@ lint lint_diff:
|
||||
poetry run black $(PYTHON_FILES) --check
|
||||
poetry run ruff .
|
||||
|
||||
TEST_FILE ?= tests/unit_tests/
|
||||
|
||||
test:
|
||||
poetry run pytest tests/unit_tests
|
||||
poetry run pytest $(TEST_FILE)
|
||||
|
||||
tests:
|
||||
poetry run pytest tests/unit_tests
|
||||
poetry run pytest $(TEST_FILE)
|
||||
|
||||
test_watch:
|
||||
poetry run ptw --now . -- tests/unit_tests
|
||||
@@ -50,13 +52,14 @@ docker_tests:
|
||||
|
||||
help:
|
||||
@echo '----'
|
||||
@echo 'coverage - run unit tests and generate coverage report'
|
||||
@echo 'docs_build - build the documentation'
|
||||
@echo 'docs_clean - clean the documentation build artifacts'
|
||||
@echo 'docs_linkcheck - run linkchecker on the documentation'
|
||||
@echo 'format - run code formatters'
|
||||
@echo 'lint - run linters'
|
||||
@echo 'test - run unit tests'
|
||||
@echo 'test_watch - run unit tests in watch mode'
|
||||
@echo 'integration_tests - run integration tests'
|
||||
@echo 'docker_tests - run unit tests in docker'
|
||||
@echo 'coverage - run unit tests and generate coverage report'
|
||||
@echo 'docs_build - build the documentation'
|
||||
@echo 'docs_clean - clean the documentation build artifacts'
|
||||
@echo 'docs_linkcheck - run linkchecker on the documentation'
|
||||
@echo 'format - run code formatters'
|
||||
@echo 'lint - run linters'
|
||||
@echo 'test - run unit tests'
|
||||
@echo 'test TEST_FILE=<test_file> - run all tests in file'
|
||||
@echo 'test_watch - run unit tests in watch mode'
|
||||
@echo 'integration_tests - run integration tests'
|
||||
@echo 'docker_tests - run unit tests in docker'
|
||||
|
||||
12
README.md
12
README.md
@@ -2,7 +2,17 @@
|
||||
|
||||
⚡ Building applications with LLMs through composability ⚡
|
||||
|
||||
[](https://github.com/hwchase17/langchain/actions/workflows/lint.yml) [](https://github.com/hwchase17/langchain/actions/workflows/test.yml) [](https://github.com/hwchase17/langchain/actions/workflows/linkcheck.yml) [](https://pepy.tech/project/langchain) [](https://opensource.org/licenses/MIT) [](https://twitter.com/langchainai) [](https://discord.gg/6adMQxSpJS)
|
||||
[](https://github.com/hwchase17/langchain/actions/workflows/lint.yml)
|
||||
[](https://github.com/hwchase17/langchain/actions/workflows/test.yml)
|
||||
[](https://github.com/hwchase17/langchain/actions/workflows/linkcheck.yml)
|
||||
[](https://pepy.tech/project/langchain)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://twitter.com/langchainai)
|
||||
[](https://discord.gg/6adMQxSpJS)
|
||||
[](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/hwchase17/langchain)
|
||||
[](https://codespaces.new/hwchase17/langchain)
|
||||
[](https://star-history.com/#hwchase17/langchain)
|
||||
|
||||
|
||||
Looking for the JS/TS version? Check out [LangChain.js](https://github.com/hwchase17/langchainjs).
|
||||
|
||||
|
||||
2
docs/_static/js/mendablesearch.js
vendored
2
docs/_static/js/mendablesearch.js
vendored
@@ -52,7 +52,7 @@ document.addEventListener('DOMContentLoaded', () => {
|
||||
|
||||
loadScript('https://unpkg.com/react@17/umd/react.production.min.js', () => {
|
||||
loadScript('https://unpkg.com/react-dom@17/umd/react-dom.production.min.js', () => {
|
||||
loadScript('https://unpkg.com/@mendable/search@0.0.83/dist/umd/mendable.min.js', initializeMendable);
|
||||
loadScript('https://unpkg.com/@mendable/search@0.0.93/dist/umd/mendable.min.js', initializeMendable);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -343,4 +343,12 @@ Proprietary
|
||||
+++
|
||||
|
||||
A journaling app for self-care that uses AI to uncover insights and patterns over time.
|
||||
|
||||
|
||||
|
||||
Articles on **Google Scholar**
|
||||
-----------------------------
|
||||
|
||||
LangChain is used in many scientific and research projects.
|
||||
|
||||
**Google Scholar** presents a `list of the papers <https://scholar.google.com/scholar?q=%22langchain%22&hl=en&as_sdt=0,5&as_vis=1>`_
|
||||
with references to LangChain.
|
||||
|
||||
@@ -316,7 +316,7 @@ You can also pass in multiple messages for OpenAI's gpt-3.5-turbo and gpt-4 mode
|
||||
```python
|
||||
messages = [
|
||||
SystemMessage(content="You are a helpful assistant that translates English to French."),
|
||||
HumanMessage(content="Translate this sentence from English to French. I love programming.")
|
||||
HumanMessage(content="I love programming.")
|
||||
]
|
||||
chat(messages)
|
||||
# -> AIMessage(content="J'aime programmer.", additional_kwargs={})
|
||||
@@ -327,22 +327,22 @@ You can go one step further and generate completions for multiple sets of messag
|
||||
batch_messages = [
|
||||
[
|
||||
SystemMessage(content="You are a helpful assistant that translates English to French."),
|
||||
HumanMessage(content="Translate this sentence from English to French. I love programming.")
|
||||
HumanMessage(content="I love programming.")
|
||||
],
|
||||
[
|
||||
SystemMessage(content="You are a helpful assistant that translates English to French."),
|
||||
HumanMessage(content="Translate this sentence from English to French. I love artificial intelligence.")
|
||||
HumanMessage(content="I love artificial intelligence.")
|
||||
],
|
||||
]
|
||||
result = chat.generate(batch_messages)
|
||||
result
|
||||
# -> LLMResult(generations=[[ChatGeneration(text="J'aime programmer.", generation_info=None, message=AIMessage(content="J'aime programmer.", additional_kwargs={}))], [ChatGeneration(text="J'aime l'intelligence artificielle.", generation_info=None, message=AIMessage(content="J'aime l'intelligence artificielle.", additional_kwargs={}))]], llm_output={'token_usage': {'prompt_tokens': 71, 'completion_tokens': 18, 'total_tokens': 89}})
|
||||
# -> LLMResult(generations=[[ChatGeneration(text="J'aime programmer.", generation_info=None, message=AIMessage(content="J'aime programmer.", additional_kwargs={}))], [ChatGeneration(text="J'aime l'intelligence artificielle.", generation_info=None, message=AIMessage(content="J'aime l'intelligence artificielle.", additional_kwargs={}))]], llm_output={'token_usage': {'prompt_tokens': 57, 'completion_tokens': 20, 'total_tokens': 77}})
|
||||
```
|
||||
|
||||
You can recover things like token usage from this LLMResult:
|
||||
```
|
||||
result.llm_output['token_usage']
|
||||
# -> {'prompt_tokens': 71, 'completion_tokens': 18, 'total_tokens': 89}
|
||||
# -> {'prompt_tokens': 57, 'completion_tokens': 20, 'total_tokens': 77}
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -10,6 +10,42 @@ but potentially an unknown chain that depends on the user's input.
|
||||
In these types of chains, there is a “agent” which has access to a suite of tools.
|
||||
Depending on the user input, the agent can then decide which, if any, of these tools to call.
|
||||
|
||||
At the moment, there are two main types of agents:
|
||||
|
||||
1. "Action Agents": these agents decide an action to take and take that action one step at a time
|
||||
2. "Plan-and-Execute Agents": these agents first decide a plan of actions to take, and then execute those actions one at a time.
|
||||
|
||||
When should you use each one? Action Agents are more conventional, and good for small tasks.
|
||||
For more complex or long running tasks, the initial planning step helps to maintain long term objectives and focus. However, that comes at the expense of generally more calls and higher latency.
|
||||
These two agents are also not mutually exclusive - in fact, it is often best to have an Action Agent be in change of the execution for the Plan and Execute agent.
|
||||
|
||||
Action Agents
|
||||
-------------
|
||||
|
||||
High level pseudocode of agents looks something like:
|
||||
|
||||
- Some user input is received
|
||||
- The `agent` decides which `tool` - if any - to use, and what the input to that tool should be
|
||||
- That `tool` is then called with that `tool input`, and an `observation` is recorded (this is just the output of calling that tool with that tool input)
|
||||
- That history of `tool`, `tool input`, and `observation` is passed back into the `agent`, and it decides what step to take next
|
||||
- This is repeated until the `agent` decides it no longer needs to use a `tool`, and then it responds directly to the user.
|
||||
|
||||
The different abstractions involved in agents are as follows:
|
||||
|
||||
- Agent: this is where the logic of the application lives. Agents expose an interface that takes in user input along with a list of previous steps the agent has taken, and returns either an `AgentAction` or `AgentFinish`
|
||||
- `AgentAction` corresponds to the tool to use and the input to that tool
|
||||
- `AgentFinish` means the agent is done, and has information around what to return to the user
|
||||
- Tools: these are the actions an agent can take. What tools you give an agent highly depend on what you want the agent to do
|
||||
- Toolkits: these are groups of tools designed for a specific use case. For example, in order for an agent to interact with a SQL database in the best way it may need access to one tool to execute queries and another tool to inspect tables.
|
||||
- Agent Executor: this wraps an agent and a list of tools. This is responsible for the loop of running the agent iteratively until the stopping criteria is met.
|
||||
|
||||
The most important abstraction of the four above to understand is that of the agent.
|
||||
Although an agent can be defined in whatever way one chooses, the typical way to construct an agent is with:
|
||||
|
||||
- PromptTemplate: this is responsible for taking the user input and previous steps and constructing a prompt to send to the language model
|
||||
- Language Model: this takes the prompt constructed by the PromptTemplate and returns some output
|
||||
- Output Parser: this takes the output of the Language Model and parses it into an `AgentAction` or `AgentFinish` object.
|
||||
|
||||
In this section of documentation, we first start with a Getting Started notebook to cover how to use all things related to agents in an end-to-end manner.
|
||||
|
||||
.. toctree::
|
||||
@@ -23,25 +59,29 @@ We then split the documentation into the following sections:
|
||||
|
||||
**Tools**
|
||||
|
||||
An overview of the various tools LangChain supports.
|
||||
In this section we cover the different types of tools LangChain supports natively.
|
||||
We then cover how to add your own tools.
|
||||
|
||||
|
||||
**Agents**
|
||||
|
||||
An overview of the different agent types.
|
||||
In this section we cover the different types of agents LangChain supports natively.
|
||||
We then cover how to modify and create your own agents.
|
||||
|
||||
|
||||
**Toolkits**
|
||||
|
||||
An overview of toolkits, and examples of the different ones LangChain supports.
|
||||
In this section we go over the various toolkits that LangChain supports out of the box,
|
||||
and how to create an agent from them.
|
||||
|
||||
|
||||
**Agent Executor**
|
||||
|
||||
An overview of the Agent Executor class and examples of how to use it.
|
||||
In this section we go over the Agent Executor class, which is responsible for calling
|
||||
the agent and tools in a loop. We go over different ways to customize this, and options you
|
||||
can use for more control.
|
||||
|
||||
Go Deeper
|
||||
---------
|
||||
**Go Deeper**
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
@@ -50,3 +90,23 @@ Go Deeper
|
||||
./agents/agents.rst
|
||||
./agents/toolkits.rst
|
||||
./agents/agent_executors.rst
|
||||
|
||||
Plan-and-Execute Agents
|
||||
-----------------------
|
||||
|
||||
High level pseudocode of agents looks something like:
|
||||
|
||||
- Some user input is received
|
||||
- The planner lists out the steps to take
|
||||
- The executor goes through the list of steps, executing them
|
||||
|
||||
The most typical implementation is to have the planner be a language model,
|
||||
and the executor be an action agent.
|
||||
|
||||
**Go Deeper**
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
./agents/plan_and_execute.ipynb
|
||||
|
||||
|
||||
@@ -9,9 +9,9 @@
|
||||
"\n",
|
||||
"LangChain provides async support for Agents by leveraging the [asyncio](https://docs.python.org/3/library/asyncio.html) library.\n",
|
||||
"\n",
|
||||
"Async methods are currently supported for the following `Tools`: [`SerpAPIWrapper`](https://github.com/hwchase17/langchain/blob/master/langchain/serpapi.py) and [`LLMMathChain`](https://github.com/hwchase17/langchain/blob/master/langchain/chains/llm_math/base.py). Async support for other agent tools are on the roadmap.\n",
|
||||
"Async methods are currently supported for the following `Tools`: [`GoogleSerperAPIWrapper`](https://github.com/hwchase17/langchain/blob/master/langchain/utilities/google_serper.py), [`SerpAPIWrapper`](https://github.com/hwchase17/langchain/blob/master/langchain/serpapi.py) and [`LLMMathChain`](https://github.com/hwchase17/langchain/blob/master/langchain/chains/llm_math/base.py). Async support for other agent tools are on the roadmap.\n",
|
||||
"\n",
|
||||
"For `Tool`s that have a `coroutine` implemented (the two mentioned above), the `AgentExecutor` will `await` them directly. Otherwise, the `AgentExecutor` will call the `Tool`'s `func` via `asyncio.get_event_loop().run_in_executor` to avoid blocking the main runloop.\n",
|
||||
"For `Tool`s that have a `coroutine` implemented (the three mentioned above), the `AgentExecutor` will `await` them directly. Otherwise, the `AgentExecutor` will call the `Tool`'s `func` via `asyncio.get_event_loop().run_in_executor` to avoid blocking the main runloop.\n",
|
||||
"\n",
|
||||
"You can use `arun` to call an `AgentExecutor` asynchronously."
|
||||
]
|
||||
@@ -28,10 +28,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 5,
|
||||
"id": "da5df06c-af6f-4572-b9f5-0ab971c16487",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
"tags": [],
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T01:27:22.755025Z",
|
||||
"start_time": "2023-05-04T01:27:22.754041Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -56,10 +60,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 3,
|
||||
"id": "fd4c294e-b1d6-44b8-b32e-2765c017e503",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
"tags": [],
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T01:15:35.466212Z",
|
||||
"start_time": "2023-05-04T01:14:05.452245Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -68,106 +76,98 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out who won the US Open men's final in 2019 and then calculate his age raised to the 0.334 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"US Open men's final 2019 winner\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mRafael Nadal defeated Daniil Medvedev in the final, 7–5, 6–3, 5–7, 4–6, 6–4 to win the men's singles tennis title at the 2019 US Open. It was his fourth US ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out the age of the winner\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Rafael Nadal age\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m36 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now need to calculate his age raised to the 0.334 power\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\u001B[32;1m\u001B[1;3m I need to find out who won the US Open men's final in 2019 and then calculate his age raised to the 0.334 power.\n",
|
||||
"Action: Google Serper\n",
|
||||
"Action Input: \"Who won the US Open men's final in 2019?\"\u001B[0m\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3mRafael Nadal defeated Daniil Medvedev in the final, 7–5, 6–3, 5–7, 4–6, 6–4 to win the men's singles tennis title at the 2019 US Open. It was his fourth US ... Draw: 128 (16 Q / 8 WC). Champion: Rafael Nadal. Runner-up: Daniil Medvedev. Score: 7–5, 6–3, 5–7, 4–6, 6–4. Bianca Andreescu won the women's singles title, defeating Serena Williams in straight sets in the final, becoming the first Canadian to win a Grand Slam singles ... Rafael Nadal won his 19th career Grand Slam title, and his fourth US Open crown, by surviving an all-time comback effort from Daniil ... Rafael Nadal beats Daniil Medvedev in US Open final to claim 19th major title. World No2 claims 7-5, 6-3, 5-7, 4-6, 6-4 victory over Russian ... Rafael Nadal defeated Daniil Medvedev in the men's singles final of the U.S. Open on Sunday. Rafael Nadal survived. The 33-year-old defeated Daniil Medvedev in the final of the 2019 U.S. Open to earn his 19th Grand Slam title Sunday ... NEW YORK -- Rafael Nadal defeated Daniil Medvedev in an epic five-set match, 7-5, 6-3, 5-7, 4-6, 6-4 to win the men's singles title at the ... Nadal previously won the U.S. Open three times, most recently in 2017. Ahead of the match, Nadal said he was “super happy to be back in the ... Watch the full match between Daniil Medvedev and Rafael ... Duration: 4:47:32. Posted: Mar 20, 2020. US Open 2019: Rafael Nadal beats Daniil Medvedev · Updated: Sep. 08, 2019, 11:11 p.m. |; Published: Sep · Published: Sep. 08, 2019, 10:06 p.m.. 26. US Open ...\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m I now know that Rafael Nadal won the US Open men's final in 2019 and he is 33 years old.\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 36^0.334\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mAnswer: 3.3098250249682484\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Rafael Nadal, aged 36, won the US Open men's final in 2019 and his age raised to the 0.334 power is 3.3098250249682484.\u001b[0m\n",
|
||||
"Action Input: 33^0.334\u001B[0m\n",
|
||||
"Observation: \u001B[33;1m\u001B[1;3mAnswer: 3.215019829667466\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m I now know the final answer.\n",
|
||||
"Final Answer: Rafael Nadal won the US Open men's final in 2019 and his age raised to the 0.334 power is 3.215019829667466.\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out who Olivia Wilde's boyfriend is and then calculate his age raised to the 0.23 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Olivia Wilde boyfriend\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mOlivia Wilde started dating Harry Styles after ending her years-long engagement to Jason Sudeikis — see their relationship timeline.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out Harry Styles' age.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Harry Styles age\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m29 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate 29 raised to the 0.23 power.\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\u001B[32;1m\u001B[1;3m I need to find out who Olivia Wilde's boyfriend is and then calculate his age raised to the 0.23 power.\n",
|
||||
"Action: Google Serper\n",
|
||||
"Action Input: \"Olivia Wilde boyfriend\"\u001B[0m\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3mSudeikis and Wilde's relationship ended in November 2020. Wilde was publicly served with court documents regarding child custody while she was presenting Don't Worry Darling at CinemaCon 2022. In January 2021, Wilde began dating singer Harry Styles after meeting during the filming of Don't Worry Darling.\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m I need to find out Harry Styles' age.\n",
|
||||
"Action: Google Serper\n",
|
||||
"Action Input: \"Harry Styles age\"\u001B[0m\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3m29 years\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m I need to calculate 29 raised to the 0.23 power.\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 29^0.23\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mAnswer: 2.169459462491557\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: Harry Styles, Olivia Wilde's boyfriend, is 29 years old and his age raised to the 0.23 power is 2.169459462491557.\u001b[0m\n",
|
||||
"Action Input: 29^0.23\u001B[0m\n",
|
||||
"Observation: \u001B[33;1m\u001B[1;3mAnswer: 2.169459462491557\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m I now know the final answer.\n",
|
||||
"Final Answer: Harry Styles is Olivia Wilde's boyfriend and his current age raised to the 0.23 power is 2.169459462491557.\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out who won the grand prix and then calculate their age raised to the 0.23 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Formula 1 Grand Prix Winner\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mMichael Schumacher (top left) and Lewis Hamilton (top right) have each won the championship a record seven times during their careers, while Sebastian Vettel ( ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out the age of the winner\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Michael Schumacher age\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m54 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate the age raised to the 0.23 power\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\u001B[32;1m\u001B[1;3m I need to find out who won the most recent grand prix and then calculate their age raised to the 0.23 power.\n",
|
||||
"Action: Google Serper\n",
|
||||
"Action Input: \"who won the most recent formula 1 grand prix\"\u001B[0m\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3mMax Verstappen won his first Formula 1 world title on Sunday after the championship was decided by a last-lap overtake of his rival Lewis Hamilton in the Abu Dhabi Grand Prix. Dec 12, 2021\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m I need to find out Max Verstappen's age\n",
|
||||
"Action: Google Serper\n",
|
||||
"Action Input: \"Max Verstappen age\"\u001B[0m\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3m25 years\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m I need to calculate 25 raised to the 0.23 power\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 54^0.23\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mAnswer: 2.502940725307012\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Michael Schumacher, aged 54, raised to the 0.23 power is 2.502940725307012.\u001b[0m\n",
|
||||
"Action Input: 25^0.23\u001B[0m\n",
|
||||
"Observation: \u001B[33;1m\u001B[1;3mAnswer: 2.096651272316035\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m I now know the final answer\n",
|
||||
"Final Answer: Max Verstappen, aged 25, won the most recent Formula 1 grand prix and his age raised to the 0.23 power is 2.096651272316035.\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out who won the US Open women's final in 2019 and then calculate her age raised to the 0.34 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"US Open women's final 2019 winner\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mBianca Andreescu\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out her age\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Bianca Andreescu age\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m22 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate her age raised to the 0.34 power\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\u001B[32;1m\u001B[1;3m I need to find out who won the US Open women's final in 2019 and then calculate her age raised to the 0.34 power.\n",
|
||||
"Action: Google Serper\n",
|
||||
"Action Input: \"US Open women's final 2019 winner\"\u001B[0m\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3mWHAT HAPPENED: #SheTheNorth? She the champion. Nineteen-year-old Canadian Bianca Andreescu sealed her first Grand Slam title on Saturday, downing 23-time major champion Serena Williams in the 2019 US Open women's singles final, 6-3, 7-5. Sep 7, 2019\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m I now need to calculate her age raised to the 0.34 power.\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 22^0.34\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mAnswer: 2.8603798598506933\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Bianca Andreescu, aged 22, won the US Open women's final in 2019 and her age raised to the 0.34 power is 2.86.\u001b[0m\n",
|
||||
"Action Input: 19^0.34\u001B[0m\n",
|
||||
"Observation: \u001B[33;1m\u001B[1;3mAnswer: 2.7212987634680084\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m I now know the final answer.\n",
|
||||
"Final Answer: Nineteen-year-old Canadian Bianca Andreescu won the US Open women's final in 2019 and her age raised to the 0.34 power is 2.7212987634680084.\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out who Beyonce's husband is and then calculate his age raised to the 0.19 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Who is Beyonce's husband?\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mJay-Z\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out Jay-Z's age\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"How old is Jay-Z?\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m53 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate 53 raised to the 0.19 power\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\u001B[32;1m\u001B[1;3m I need to find out who Beyonce's husband is and then calculate his age raised to the 0.19 power.\n",
|
||||
"Action: Google Serper\n",
|
||||
"Action Input: \"Who is Beyonce's husband?\"\u001B[0m\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3mJay-Z\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m I need to find out Jay-Z's age\n",
|
||||
"Action: Google Serper\n",
|
||||
"Action Input: \"How old is Jay-Z?\"\u001B[0m\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3m53 years\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m I need to calculate 53 raised to the 0.19 power\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 53^0.19\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mAnswer: 2.12624064206896\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: Jay-Z is Beyonce's husband and his age raised to the 0.19 power is 2.12624064206896.\u001b[0m\n",
|
||||
"Action Input: 53^0.19\u001B[0m\n",
|
||||
"Observation: \u001B[33;1m\u001B[1;3mAnswer: 2.12624064206896\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m I now know the final answer\n",
|
||||
"Final Answer: Jay-Z is Beyonce's husband and his age raised to the 0.19 power is 2.12624064206896.\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"Serial executed in 52.47 seconds.\n"
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n",
|
||||
"Serial executed in 89.97 seconds.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"tools = load_tools([\"llm-math\", \"serpapi\"], llm=llm)\n",
|
||||
"tools = load_tools([\"google-serper\", \"llm-math\"], llm=llm)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")\n",
|
||||
@@ -181,10 +181,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 4,
|
||||
"id": "076d7b85-45ec-465d-8b31-c2ad119c3438",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
"tags": [],
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T01:26:59.737657Z",
|
||||
"start_time": "2023-05-04T01:26:42.182078Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -193,96 +197,84 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to find out who won the US Open women's final in 2019 and then calculate her age raised to the 0.34 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"US Open women's final 2019 winner\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mBianca Andreescu\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out who won the US Open men's final in 2019 and then calculate his age raised to the 0.334 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"US Open men's final 2019 winner\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mRafael Nadal defeated Daniil Medvedev in the final, 7–5, 6–3, 5–7, 4–6, 6–4 to win the men's singles tennis title at the 2019 US Open. It was his fourth US ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out who Olivia Wilde's boyfriend is and then calculate his age raised to the 0.23 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Olivia Wilde boyfriend\"\u001b[0m\u001b[32;1m\u001b[1;3m I need to find out who won the grand prix and then calculate their age raised to the 0.23 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Formula 1 Grand Prix Winner\"\u001b[0m\u001b[32;1m\u001b[1;3m I need to find out who Beyonce's husband is and then calculate his age raised to the 0.19 power.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Who is Beyonce's husband?\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mOlivia Wilde started dating Harry Styles after ending her years-long engagement to Jason Sudeikis — see their relationship timeline.\u001b[0m\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001B[1m> Entering new AgentExecutor chain...\u001B[0m\n",
|
||||
"\u001B[32;1m\u001B[1;3m I need to find out who Olivia Wilde's boyfriend is and then calculate his age raised to the 0.23 power.\n",
|
||||
"Action: Google Serper\n",
|
||||
"Action Input: \"Olivia Wilde boyfriend\"\u001B[0m\u001B[32;1m\u001B[1;3m I need to find out who Beyonce's husband is and then calculate his age raised to the 0.19 power.\n",
|
||||
"Action: Google Serper\n",
|
||||
"Action Input: \"Who is Beyonce's husband?\"\u001B[0m\u001B[32;1m\u001B[1;3m I need to find out who won the most recent formula 1 grand prix and then calculate their age raised to the 0.23 power.\n",
|
||||
"Action: Google Serper\n",
|
||||
"Action Input: \"most recent formula 1 grand prix winner\"\u001B[0m\u001B[32;1m\u001B[1;3m I need to find out who won the US Open men's final in 2019 and then calculate his age raised to the 0.334 power.\n",
|
||||
"Action: Google Serper\n",
|
||||
"Action Input: \"Who won the US Open men's final in 2019?\"\u001B[0m\u001B[32;1m\u001B[1;3m I need to find out who won the US Open women's final in 2019 and then calculate her age raised to the 0.34 power.\n",
|
||||
"Action: Google Serper\n",
|
||||
"Action Input: \"US Open women's final 2019 winner\"\u001B[0m\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3mSudeikis and Wilde's relationship ended in November 2020. Wilde was publicly served with court documents regarding child custody while she was presenting Don't Worry Darling at CinemaCon 2022. In January 2021, Wilde began dating singer Harry Styles after meeting during the filming of Don't Worry Darling.\u001B[0m\n",
|
||||
"Thought:\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mJay-Z\u001b[0m\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3mJay-Z\u001B[0m\n",
|
||||
"Thought:\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mMichael Schumacher (top left) and Lewis Hamilton (top right) have each won the championship a record seven times during their careers, while Sebastian Vettel ( ...\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out her age\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Bianca Andreescu age\"\u001b[0m\u001b[32;1m\u001b[1;3m I need to find out Jay-Z's age\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"How old is Jay-Z?\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m53 years\u001b[0m\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3mRafael Nadal defeated Daniil Medvedev in the final, 7–5, 6–3, 5–7, 4–6, 6–4 to win the men's singles tennis title at the 2019 US Open. It was his fourth US ... Draw: 128 (16 Q / 8 WC). Champion: Rafael Nadal. Runner-up: Daniil Medvedev. Score: 7–5, 6–3, 5–7, 4–6, 6–4. Bianca Andreescu won the women's singles title, defeating Serena Williams in straight sets in the final, becoming the first Canadian to win a Grand Slam singles ... Rafael Nadal won his 19th career Grand Slam title, and his fourth US Open crown, by surviving an all-time comback effort from Daniil ... Rafael Nadal beats Daniil Medvedev in US Open final to claim 19th major title. World No2 claims 7-5, 6-3, 5-7, 4-6, 6-4 victory over Russian ... Rafael Nadal defeated Daniil Medvedev in the men's singles final of the U.S. Open on Sunday. Rafael Nadal survived. The 33-year-old defeated Daniil Medvedev in the final of the 2019 U.S. Open to earn his 19th Grand Slam title Sunday ... NEW YORK -- Rafael Nadal defeated Daniil Medvedev in an epic five-set match, 7-5, 6-3, 5-7, 4-6, 6-4 to win the men's singles title at the ... Nadal previously won the U.S. Open three times, most recently in 2017. Ahead of the match, Nadal said he was “super happy to be back in the ... Watch the full match between Daniil Medvedev and Rafael ... Duration: 4:47:32. Posted: Mar 20, 2020. US Open 2019: Rafael Nadal beats Daniil Medvedev · Updated: Sep. 08, 2019, 11:11 p.m. |; Published: Sep · Published: Sep. 08, 2019, 10:06 p.m.. 26. US Open ...\u001B[0m\n",
|
||||
"Thought:\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m22 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to find out Harry Styles' age.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Harry Styles age\"\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m29 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate her age raised to the 0.34 power\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3mWHAT HAPPENED: #SheTheNorth? She the champion. Nineteen-year-old Canadian Bianca Andreescu sealed her first Grand Slam title on Saturday, downing 23-time major champion Serena Williams in the 2019 US Open women's singles final, 6-3, 7-5. Sep 7, 2019\u001B[0m\n",
|
||||
"Thought:\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3mLewis Hamilton holds the record for the most race wins in Formula One history, with 103 wins to date. Michael Schumacher, the previous record holder, ... Michael Schumacher (top left) and Lewis Hamilton (top right) have each won the championship a record seven times during their careers, while Sebastian Vettel ( ... Grand Prix, Date, Winner, Car, Laps, Time. Bahrain, 05 Mar 2023, Max Verstappen VER, Red Bull Racing Honda RBPT, 57, 1:33:56.736. Saudi Arabia, 19 Mar 2023 ... The Red Bull driver Max Verstappen of the Netherlands celebrated winning his first Formula 1 world title at the Abu Dhabi Grand Prix. Perez wins sprint as Verstappen, Russell clash. Red Bull's Sergio Perez won the first sprint of the 2023 Formula One season after catching and passing Charles ... The most successful driver in the history of F1 is Lewis Hamilton. The man from Stevenage has won 103 Grands Prix throughout his illustrious career and is still ... Lewis Hamilton: 103. Max Verstappen: 37. Michael Schumacher: 91. Fernando Alonso: 32. Max Verstappen and Sergio Perez will race in a very different-looking Red Bull this weekend after the team unveiled a striking special livery for the Miami GP. Lewis Hamilton holds the record of most victories with 103, ahead of Michael Schumacher (91) and Sebastian Vettel (53). Schumacher also holds the record for the ... Lewis Hamilton holds the record for the most race wins in Formula One history, with 103 wins to date. Michael Schumacher, the previous record holder, is second ...\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m I need to find out Harry Styles' age.\n",
|
||||
"Action: Google Serper\n",
|
||||
"Action Input: \"Harry Styles age\"\u001B[0m\u001B[32;1m\u001B[1;3m I need to find out Jay-Z's age\n",
|
||||
"Action: Google Serper\n",
|
||||
"Action Input: \"How old is Jay-Z?\"\u001B[0m\u001B[32;1m\u001B[1;3m I now know that Rafael Nadal won the US Open men's final in 2019 and he is 33 years old.\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 22^0.34\u001b[0m\u001b[32;1m\u001b[1;3m I need to calculate 53 raised to the 0.19 power\n",
|
||||
"Action Input: 33^0.334\u001B[0m\u001B[32;1m\u001B[1;3m I now need to calculate her age raised to the 0.34 power.\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 53^0.19\u001b[0m\u001b[32;1m\u001b[1;3m I need to calculate 29 raised to the 0.23 power.\n",
|
||||
"Action Input: 19^0.34\u001B[0m\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3m29 years\u001B[0m\n",
|
||||
"Thought:\n",
|
||||
"Observation: \u001B[36;1m\u001B[1;3m53 years\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m Max Verstappen won the most recent Formula 1 grand prix.\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 29^0.23\u001b[0m\u001b[32;1m\u001b[1;3m I need to find out the age of the winner\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Rafael Nadal age\"\u001b[0m\u001b[32;1m\u001b[1;3m I need to find out the age of the winner\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"Michael Schumacher age\"\u001b[0m\n",
|
||||
"Observation: \n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3m36 years\u001b[0m\n",
|
||||
"Thought:\u001b[33;1m\u001b[1;3m54 years\u001b[0m\n",
|
||||
"Action Input: Max Verstappen's age (23) raised to the 0.23 power\u001B[0m\n",
|
||||
"Observation: \u001B[33;1m\u001B[1;3mAnswer: 2.7212987634680084\u001B[0m\n",
|
||||
"Thought:\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mAnswer: 2.8603798598506933\u001b[0m\n",
|
||||
"Thought:\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mAnswer: 2.169459462491557\u001b[0m\n",
|
||||
"Thought:\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mAnswer: 2.12624064206896\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to calculate the age raised to the 0.334 power\n",
|
||||
"Observation: \u001B[33;1m\u001B[1;3mAnswer: 3.215019829667466\u001B[0m\n",
|
||||
"Thought:\u001B[32;1m\u001B[1;3m I need to calculate 29 raised to the 0.23 power.\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 36^0.334\u001b[0m\u001b[32;1m\u001b[1;3m I now need to calculate the age raised to the 0.23 power\n",
|
||||
"Action Input: 29^0.23\u001B[0m\u001B[32;1m\u001B[1;3m I need to calculate 53 raised to the 0.19 power\n",
|
||||
"Action: Calculator\n",
|
||||
"Action Input: 54^0.23\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mAnswer: 3.3098250249682484\u001b[0m\n",
|
||||
"Action Input: 53^0.19\u001B[0m\n",
|
||||
"Observation: \u001B[33;1m\u001B[1;3mAnswer: 2.0568252837687546\u001B[0m\n",
|
||||
"Thought:\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mAnswer: 2.502940725307012\u001b[0m\n",
|
||||
"Observation: \u001B[33;1m\u001B[1;3mAnswer: 2.169459462491557\u001B[0m\n",
|
||||
"Thought:\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"Concurrent executed in 14.49 seconds.\n"
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001B[33;1m\u001B[1;3mAnswer: 2.12624064206896\u001B[0m\n",
|
||||
"Thought:\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n",
|
||||
"\n",
|
||||
"\u001B[1m> Finished chain.\u001B[0m\n",
|
||||
"Concurrent executed in 17.52 seconds.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"tools = load_tools([\"llm-math\", \"serpapi\"], llm=llm)\n",
|
||||
"tools = load_tools([\"google-serper\",\"llm-math\"], llm=llm)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True\n",
|
||||
")\n",
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 1,
|
||||
"id": "9af9734e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -100,13 +100,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 12,
|
||||
"id": "339b1bb8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set up the base template\n",
|
||||
"template = \"\"\"Answer the following questions as best you can, but speaking as a pirate might speak. You have access to the following tools:\n",
|
||||
"template = \"\"\"Complete the objective as best you can. You have access to the following tools:\n",
|
||||
"\n",
|
||||
"{tools}\n",
|
||||
"\n",
|
||||
@@ -121,7 +121,11 @@
|
||||
"Thought: I now know the final answer\n",
|
||||
"Final Answer: the final answer to the original input question\n",
|
||||
"\n",
|
||||
"Begin! Remember to speak as a pirate when giving your final answer. Use lots of \"Arg\"s\n",
|
||||
"These were previous tasks you completed:\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Begin!\n",
|
||||
"\n",
|
||||
"Question: {input}\n",
|
||||
"{agent_scratchpad}\"\"\""
|
||||
@@ -129,7 +133,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 13,
|
||||
"id": "fd969d31",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -161,7 +165,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 14,
|
||||
"id": "798ef9fb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -189,7 +193,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 15,
|
||||
"id": "7c6fe0d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -218,7 +222,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 16,
|
||||
"id": "d278706a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -238,7 +242,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 17,
|
||||
"id": "f9d4c374",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -270,7 +274,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 18,
|
||||
"id": "9b1cc2a2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -281,7 +285,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 19,
|
||||
"id": "e4f5092f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -307,7 +311,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 20,
|
||||
"id": "490604e9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -317,7 +321,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 21,
|
||||
"id": "653b1617",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -328,16 +332,13 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: Wot year be it now? That be important to know the answer.\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I should use a reliable search engine to get accurate information.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"current population canada 2023\"\u001b[0m\n",
|
||||
"Action Input: \"Leo DiCaprio girlfriend\"\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3m38,649,283\u001b[0m\u001b[32;1m\u001b[1;3mAhoy! That be the correct year, but the answer be in regular numbers. 'Tis time to translate to pirate speak.\n",
|
||||
"Action: Search\n",
|
||||
"Action Input: \"38,649,283 in pirate speak\"\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3mBrush up on your “Pirate Talk” with these helpful pirate phrases. Aaaarrrrgggghhhh! Pirate catch phrase of grumbling or disgust. Ahoy! Hello! Ahoy, Matey, Hello ...\u001b[0m\u001b[32;1m\u001b[1;3mThat be not helpful, I'll just do the translation meself.\n",
|
||||
"Final Answer: Arrrr, thar be 38,649,283 scallywags in Canada as of 2023.\u001b[0m\n",
|
||||
"Observation:\u001b[36;1m\u001b[1;3mHe went on to date Gisele Bündchen, Bar Refaeli, Blake Lively, Toni Garrn and Nina Agdal, among others, before finally settling down with current girlfriend Camila Morrone, who is 23 years his junior.\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mI have found the answer to the question.\n",
|
||||
"Final Answer: Leo DiCaprio's current girlfriend is Camila Morrone.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
@@ -345,16 +346,16 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Arrrr, thar be 38,649,283 scallywags in Canada as of 2023.'"
|
||||
"\"Leo DiCaprio's current girlfriend is Camila Morrone.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"How many people live in canada as of 2023?\")"
|
||||
"agent_executor.run(\"Search for Leo DiCaprio's girlfriend on the internet.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -14,6 +14,17 @@
|
||||
"This functionality is natively available in the (`structured-chat-zero-shot-react-description` or `AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION`)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ccc8ff98",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING\"] = \"true\" # If you want to trace the execution of the program, set to \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
@@ -85,18 +96,6 @@
|
||||
"agent_chain = initialize_agent(tools, llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "c4a45575-f3ef-46ba-a943-475584073984",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.callbacks import tracing_enabled # This is used to configure tracing for our runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
@@ -119,8 +118,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"with tracing_enabled(): # If you want to see the traces in the UI\n",
|
||||
" response = await agent_chain.arun(input=\"Hi I'm Erica.\")\n",
|
||||
"response = await agent_chain.arun(input=\"Hi I'm Erica.\")\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
@@ -146,8 +144,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"with tracing_enabled(): # If you want to see the traces in the UI\n",
|
||||
" response = await agent_chain.arun(input=\"Don't need help really just chatting.\")\n",
|
||||
"response = await agent_chain.arun(input=\"Don't need help really just chatting.\")\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
@@ -222,8 +219,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"with tracing_enabled(): # If you want to see the traces in the UI\n",
|
||||
" response = await agent_chain.arun(input=\"Browse to blog.langchain.dev and summarize the text, please.\")\n",
|
||||
"response = await agent_chain.arun(input=\"Browse to blog.langchain.dev and summarize the text, please.\")\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
@@ -274,8 +270,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"with tracing_enabled(): # If you want to see the traces in the UI\n",
|
||||
" response = await agent_chain.arun(input=\"What's the latest xkcd comic about?\")\n",
|
||||
"response = await agent_chain.arun(input=\"What's the latest xkcd comic about?\")\n",
|
||||
"print(response)"
|
||||
]
|
||||
},
|
||||
|
||||
362
docs/modules/agents/plan_and_execute.ipynb
Normal file
362
docs/modules/agents/plan_and_execute.ipynb
Normal file
@@ -0,0 +1,362 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "406483c4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Plan and Execute\n",
|
||||
"\n",
|
||||
"Plan and execute agents accomplish an objective by first planning what to do, then executing the sub tasks. This idea is largely inspired by [BabyAGI](https://github.com/yoheinakajima/babyagi) and then the [\"Plan-and-Solve\" paper](https://arxiv.org/abs/2305.04091).\n",
|
||||
"\n",
|
||||
"The planning is almost always done by an LLM.\n",
|
||||
"\n",
|
||||
"The execution is usually done by a separate agent (equipped with tools)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "91192118",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Imports"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "6ccd1dc5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.experimental.plan_and_execute import PlanAndExecute, load_agent_executor, load_chat_planner\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain import SerpAPIWrapper\n",
|
||||
"from langchain.agents.tools import Tool\n",
|
||||
"from langchain import LLMMathChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0b10d200",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "3c00f724",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"llm_math_chain = LLMMathChain.from_llm(llm=llm, verbose=True)\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name = \"Search\",\n",
|
||||
" func=search.run,\n",
|
||||
" description=\"useful for when you need to answer questions about current events\"\n",
|
||||
" ),\n",
|
||||
" Tool(\n",
|
||||
" name=\"Calculator\",\n",
|
||||
" func=llm_math_chain.run,\n",
|
||||
" description=\"useful for when you need to answer questions about math\"\n",
|
||||
" ),\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ce38ae84",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Planner, Executor, and Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "0ab2cadd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = ChatOpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "7b2419f2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"planner = load_chat_planner(model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "ed9f518b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"executor = load_agent_executor(model, tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "36943178",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = PlanAndExecute(planner=planner, executer=executor, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8be9f1bd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "4891062e",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new PlanAndExecute chain...\u001b[0m\n",
|
||||
"steps=[Step(value=\"Search for Leo DiCaprio's girlfriend on the internet.\"), Step(value='Find her current age.'), Step(value='Raise her current age to the 0.43 power using a calculator or programming language.'), Step(value='Output the result.'), Step(value=\"Given the above steps taken, respond to the user's original question.\\n\\n\")]\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Search\",\n",
|
||||
" \"action_input\": \"Who is Leo DiCaprio's girlfriend?\"\n",
|
||||
"}\n",
|
||||
"``` \n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mDiCaprio broke up with girlfriend Camila Morrone, 25, in the summer of 2022, after dating for four years. He's since been linked to another famous supermodel – Gigi Hadid. The power couple were first supposedly an item in September after being spotted getting cozy during a party at New York Fashion Week.\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mBased on the previous observation, I can provide the answer to the current objective. \n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Leo DiCaprio is currently linked to Gigi Hadid.\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"*****\n",
|
||||
"\n",
|
||||
"Step: Search for Leo DiCaprio's girlfriend on the internet.\n",
|
||||
"\n",
|
||||
"Response: Leo DiCaprio is currently linked to Gigi Hadid.\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Search\",\n",
|
||||
" \"action_input\": \"What is Gigi Hadid's current age?\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m28 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mPrevious steps: steps=[(Step(value=\"Search for Leo DiCaprio's girlfriend on the internet.\"), StepResponse(response='Leo DiCaprio is currently linked to Gigi Hadid.'))]\n",
|
||||
"\n",
|
||||
"Current objective: value='Find her current age.'\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Search\",\n",
|
||||
" \"action_input\": \"What is Gigi Hadid's current age?\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m28 years\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mBased on my search, Gigi Hadid's current age is 26 years old. \n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Gigi Hadid's current age is 26 years old.\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"*****\n",
|
||||
"\n",
|
||||
"Step: Find her current age.\n",
|
||||
"\n",
|
||||
"Response: Gigi Hadid's current age is 26 years old.\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Calculator\",\n",
|
||||
" \"action_input\": \"26 ** 0.43\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMMathChain chain...\u001b[0m\n",
|
||||
"26 ** 0.43\u001b[32;1m\u001b[1;3m\n",
|
||||
"```text\n",
|
||||
"26 ** 0.43\n",
|
||||
"```\n",
|
||||
"...numexpr.evaluate(\"26 ** 0.43\")...\n",
|
||||
"\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m4.059182145592686\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 4.059182145592686\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe current objective is to raise Gigi Hadid's age to the 0.43 power. \n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Calculator\",\n",
|
||||
" \"action_input\": \"26 ** 0.43\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new LLMMathChain chain...\u001b[0m\n",
|
||||
"26 ** 0.43\u001b[32;1m\u001b[1;3m\n",
|
||||
"```text\n",
|
||||
"26 ** 0.43\n",
|
||||
"```\n",
|
||||
"...numexpr.evaluate(\"26 ** 0.43\")...\n",
|
||||
"\u001b[0m\n",
|
||||
"Answer: \u001b[33;1m\u001b[1;3m4.059182145592686\u001b[0m\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mAnswer: 4.059182145592686\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3mThe answer to the current objective is 4.059182145592686.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Gigi Hadid's age raised to the 0.43 power is approximately 4.059 years.\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"*****\n",
|
||||
"\n",
|
||||
"Step: Raise her current age to the 0.43 power using a calculator or programming language.\n",
|
||||
"\n",
|
||||
"Response: Gigi Hadid's age raised to the 0.43 power is approximately 4.059 years.\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Gigi Hadid's age raised to the 0.43 power is approximately 4.059 years.\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"*****\n",
|
||||
"\n",
|
||||
"Step: Output the result.\n",
|
||||
"\n",
|
||||
"Response: Gigi Hadid's age raised to the 0.43 power is approximately 4.059 years.\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mAction:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"Gigi Hadid's age raised to the 0.43 power is approximately 4.059 years.\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"*****\n",
|
||||
"\n",
|
||||
"Step: Given the above steps taken, respond to the user's original question.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Response: Gigi Hadid's age raised to the 0.43 power is approximately 4.059 years.\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Gigi Hadid's age raised to the 0.43 power is approximately 4.059 years.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aa3ec998",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -116,7 +116,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"how many people have more than 3 sibligngs\")"
|
||||
"agent.run(\"how many people have more than 3 siblings\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
232
docs/modules/agents/toolkits/examples/gmail.ipynb
Normal file
232
docs/modules/agents/toolkits/examples/gmail.ipynb
Normal file
@@ -0,0 +1,232 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Gmail Toolkit\n",
|
||||
"\n",
|
||||
"This notebook walks through connecting a LangChain email to the Gmail API.\n",
|
||||
"\n",
|
||||
"To use this toolkit, you will need to set up your credentials explained in the [Gmail API docs](https://developers.google.com/gmail/api/quickstart/python#authorize_credentials_for_a_desktop_application). Once you've downloaded the `credentials.json` file, you can start using the Gmail API. Once this is done, we'll install the required libraries."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install --upgrade google-api-python-client > /dev/null\n",
|
||||
"!pip install --upgrade google-auth-oauthlib > /dev/null\n",
|
||||
"!pip install --upgrade google-auth-httplib2 > /dev/null\n",
|
||||
"!pip install beautifulsoup4 > /dev/null # This is optional but is useful for parsing HTML messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the Toolkit\n",
|
||||
"\n",
|
||||
"By default the toolkit reads the local `credentials.json` file. You can also manually provide a `Credentials` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents.agent_toolkits import GmailToolkit\n",
|
||||
"\n",
|
||||
"toolkit = GmailToolkit() "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customizing Authentication\n",
|
||||
"\n",
|
||||
"Behind the scenes, a `googleapi` resource is created using the following methods. \n",
|
||||
"you can manually build a `googleapi` resource for more auth control. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools.gmail.utils import build_resource_service, get_gmail_credentials\n",
|
||||
"\n",
|
||||
"# Can review scopes here https://developers.google.com/gmail/api/auth/scopes\n",
|
||||
"# For instance, readonly scope is 'https://www.googleapis.com/auth/gmail.readonly'\n",
|
||||
"credentials = get_gmail_credentials(\n",
|
||||
" token_file='token.json',\n",
|
||||
" scopes=[\"https://mail.google.com/\"],\n",
|
||||
" client_secrets_file=\"credentials.json\",\n",
|
||||
")\n",
|
||||
"api_resource = build_resource_service(credentials=credentials)\n",
|
||||
"toolkit = GmailToolkit(api_resource=api_resource)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[GmailCreateDraft(name='create_gmail_draft', description='Use this tool to create a draft email with the provided message fields.', args_schema=<class 'langchain.tools.gmail.create_draft.CreateDraftSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailSendMessage(name='send_gmail_message', description='Use this tool to send email messages. The input is the message, recipents', args_schema=None, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailSearch(name='search_gmail', description=('Use this tool to search for email messages or threads. The input must be a valid Gmail query. The output is a JSON list of the requested resource.',), args_schema=<class 'langchain.tools.gmail.search.SearchArgsSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailGetMessage(name='get_gmail_message', description='Use this tool to fetch an email by message ID. Returns the thread ID, snipet, body, subject, and sender.', args_schema=<class 'langchain.tools.gmail.get_message.SearchArgsSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>),\n",
|
||||
" GmailGetThread(name='get_gmail_thread', description=('Use this tool to search for email messages. The input must be a valid Gmail query. The output is a JSON list of messages.',), args_schema=<class 'langchain.tools.gmail.get_thread.GetThreadSchema'>, return_direct=False, verbose=False, callbacks=None, callback_manager=None, api_resource=<googleapiclient.discovery.Resource object at 0x10e5c6d10>)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import OpenAI\n",
|
||||
"from langchain.agents import initialize_agent, AgentType"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools=toolkit.get_tools(),\n",
|
||||
" llm=llm,\n",
|
||||
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to load default session, using empty session: 0\n",
|
||||
"WARNING:root:Failed to persist run: {\"detail\":\"Not Found\"}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'I have created a draft email for you to edit. The draft Id is r5681294731961864018.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Create a gmail draft for me to edit of a letter from the perspective of a sentient parrot\"\n",
|
||||
" \" who is looking to collaborate on some research with her\"\n",
|
||||
" \" estranged friend, a cat. Under no circumstances may you send the message, however.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:Failed to load default session, using empty session: 0\n",
|
||||
"WARNING:root:Failed to persist run: {\"detail\":\"Not Found\"}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"The latest email in your drafts is from hopefulparrot@gmail.com with the subject 'Collaboration Opportunity'. The body of the email reads: 'Dear [Friend], I hope this letter finds you well. I am writing to you in the hopes of rekindling our friendship and to discuss the possibility of collaborating on some research together. I know that we have had our differences in the past, but I believe that we can put them aside and work together for the greater good. I look forward to hearing from you. Sincerely, [Parrot]'\""
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"Could you search in my drafts for the latest email?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -118,7 +118,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"how many people have more than 3 sibligngs\")"
|
||||
"agent.run(\"how many people have more than 3 siblings\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
229
docs/modules/agents/toolkits/examples/spark.ipynb
Normal file
229
docs/modules/agents/toolkits/examples/spark.ipynb
Normal file
@@ -0,0 +1,229 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Spark Dataframe Agent\n",
|
||||
"\n",
|
||||
"This notebook shows how to use agents to interact with a Spark dataframe. It is mostly optimized for question answering.\n",
|
||||
"\n",
|
||||
"**NOTE: this agent calls the Python agent under the hood, which executes LLM generated Python code - this can be bad if the LLM generated Python code is harmful. Use cautiously.**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import create_spark_dataframe_agent\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"...input_your_openai_api_key...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"|PassengerId|Survived|Pclass| Name| Sex| Age|SibSp|Parch| Ticket| Fare|Cabin|Embarked|\n",
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"| 1| 0| 3|Braund, Mr. Owen ...| male|22.0| 1| 0| A/5 21171| 7.25| null| S|\n",
|
||||
"| 2| 1| 1|Cumings, Mrs. Joh...|female|38.0| 1| 0| PC 17599|71.2833| C85| C|\n",
|
||||
"| 3| 1| 3|Heikkinen, Miss. ...|female|26.0| 0| 0|STON/O2. 3101282| 7.925| null| S|\n",
|
||||
"| 4| 1| 1|Futrelle, Mrs. Ja...|female|35.0| 1| 0| 113803| 53.1| C123| S|\n",
|
||||
"| 5| 0| 3|Allen, Mr. Willia...| male|35.0| 0| 0| 373450| 8.05| null| S|\n",
|
||||
"| 6| 0| 3| Moran, Mr. James| male|null| 0| 0| 330877| 8.4583| null| Q|\n",
|
||||
"| 7| 0| 1|McCarthy, Mr. Tim...| male|54.0| 0| 0| 17463|51.8625| E46| S|\n",
|
||||
"| 8| 0| 3|Palsson, Master. ...| male| 2.0| 3| 1| 349909| 21.075| null| S|\n",
|
||||
"| 9| 1| 3|Johnson, Mrs. Osc...|female|27.0| 0| 2| 347742|11.1333| null| S|\n",
|
||||
"| 10| 1| 2|Nasser, Mrs. Nich...|female|14.0| 1| 0| 237736|30.0708| null| C|\n",
|
||||
"| 11| 1| 3|Sandstrom, Miss. ...|female| 4.0| 1| 1| PP 9549| 16.7| G6| S|\n",
|
||||
"| 12| 1| 1|Bonnell, Miss. El...|female|58.0| 0| 0| 113783| 26.55| C103| S|\n",
|
||||
"| 13| 0| 3|Saundercock, Mr. ...| male|20.0| 0| 0| A/5. 2151| 8.05| null| S|\n",
|
||||
"| 14| 0| 3|Andersson, Mr. An...| male|39.0| 1| 5| 347082| 31.275| null| S|\n",
|
||||
"| 15| 0| 3|Vestrom, Miss. Hu...|female|14.0| 0| 0| 350406| 7.8542| null| S|\n",
|
||||
"| 16| 1| 2|Hewlett, Mrs. (Ma...|female|55.0| 0| 0| 248706| 16.0| null| S|\n",
|
||||
"| 17| 0| 3|Rice, Master. Eugene| male| 2.0| 4| 1| 382652| 29.125| null| Q|\n",
|
||||
"| 18| 1| 2|Williams, Mr. Cha...| male|null| 0| 0| 244373| 13.0| null| S|\n",
|
||||
"| 19| 0| 3|Vander Planke, Mr...|female|31.0| 1| 0| 345763| 18.0| null| S|\n",
|
||||
"| 20| 1| 3|Masselmani, Mrs. ...|female|null| 0| 0| 2649| 7.225| null| C|\n",
|
||||
"+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n",
|
||||
"only showing top 20 rows\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from pyspark.sql import SparkSession\n",
|
||||
"\n",
|
||||
"spark = SparkSession.builder.getOrCreate()\n",
|
||||
"csv_file_path = \"titanic.csv\"\n",
|
||||
"df = spark.read.csv(csv_file_path, header=True, inferSchema=True)\n",
|
||||
"df.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent = create_spark_dataframe_agent(llm=OpenAI(temperature=0), df=df, verbose=True)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out how many rows are in the dataframe\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df.count()\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m891\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: There are 891 rows in the dataframe.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'There are 891 rows in the dataframe.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"how many rows are there?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to find out how many people have more than 3 siblings\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df.filter(df.SibSp > 3).count()\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m30\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 30 people have more than 3 siblings.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'30 people have more than 3 siblings.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"how many people have more than 3 siblings\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mThought: I need to get the average age first\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: df.agg({\"Age\": \"mean\"}).collect()[0][0]\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m29.69911764705882\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now have the average age, I need to get the square root\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: math.sqrt(29.69911764705882)\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mname 'math' is not defined\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to import math first\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: import math\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now have the math library imported, I can get the square root\n",
|
||||
"Action: python_repl_ast\n",
|
||||
"Action Input: math.sqrt(29.69911764705882)\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m5.449689683556195\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: 5.449689683556195\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'5.449689683556195'"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.run(\"whats the square root of the average age?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "LangChain",
|
||||
"language": "python",
|
||||
"name": "langchain"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -20,7 +19,15 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install apify-client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -39,7 +46,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -60,7 +66,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -85,7 +90,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -102,7 +106,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -156,9 +159,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "d5a7209e",
|
||||
"metadata": {
|
||||
"tags": [],
|
||||
@@ -22,17 +22,7 @@
|
||||
"languageId": "shellscript"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: arxiv in /Users/wfh/code/lc/lckg/.venv/lib/python3.11/site-packages (1.4.7)\n",
|
||||
"Requirement already satisfied: feedparser in /Users/wfh/code/lc/lckg/.venv/lib/python3.11/site-packages (from arxiv) (6.0.10)\n",
|
||||
"Requirement already satisfied: sgmllib3k in /Users/wfh/code/lc/lckg/.venv/lib/python3.11/site-packages (from feedparser->arxiv) (1.0.0)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install arxiv"
|
||||
]
|
||||
@@ -178,13 +168,14 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "840f70c9-8f80-4680-bb38-46198e931bcf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, we want to get information about one author, `Caprice Stanley`.\n",
|
||||
"\n",
|
||||
"This query returns information about three articles. By default, query returns information only about three top articles."
|
||||
"This query returns information about three articles. By default, the query returns information only about three top articles."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -260,7 +251,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -12,21 +12,34 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 11,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import pprint\n",
|
||||
"os.environ[\"SERPER_API_KEY\"] = \"\""
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
},
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:56:29.336521Z",
|
||||
"start_time": "2023-05-04T00:56:29.334173Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "54bf5afd",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:07.676293Z",
|
||||
"start_time": "2023-05-04T00:54:06.665742Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.utilities import GoogleSerperAPIWrapper"
|
||||
@@ -36,7 +49,12 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "31f8f382",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:08.324245Z",
|
||||
"start_time": "2023-05-04T00:54:08.321577Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"search = GoogleSerperAPIWrapper()"
|
||||
@@ -46,7 +64,12 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "25ce0225",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:11.399847Z",
|
||||
"start_time": "2023-05-04T00:54:09.335597Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@@ -72,13 +95,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ['OPENAI_API_KEY'] = \"\""
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:14.311773Z",
|
||||
"start_time": "2023-05-04T00:54:14.304389Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -133,6 +160,693 @@
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Obtaining results with metadata\n",
|
||||
"If you would also like to obtain the results in a structured way including metadata. For this we will be using the `results` method of the wrapper."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'searchParameters': {'q': 'Apple Inc.',\n",
|
||||
" 'gl': 'us',\n",
|
||||
" 'hl': 'en',\n",
|
||||
" 'num': 10,\n",
|
||||
" 'type': 'search'},\n",
|
||||
" 'knowledgeGraph': {'title': 'Apple',\n",
|
||||
" 'type': 'Technology company',\n",
|
||||
" 'website': 'http://www.apple.com/',\n",
|
||||
" 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQwGQRv5TjjkycpctY66mOg_e2-npacrmjAb6_jAWhzlzkFE3OTjxyzbA&s=0',\n",
|
||||
" 'description': 'Apple Inc. is an American multinational '\n",
|
||||
" 'technology company headquartered in '\n",
|
||||
" 'Cupertino, California. Apple is the '\n",
|
||||
" \"world's largest technology company by \"\n",
|
||||
" 'revenue, with US$394.3 billion in 2022 '\n",
|
||||
" 'revenue. As of March 2023, Apple is the '\n",
|
||||
" \"world's biggest...\",\n",
|
||||
" 'descriptionSource': 'Wikipedia',\n",
|
||||
" 'descriptionLink': 'https://en.wikipedia.org/wiki/Apple_Inc.',\n",
|
||||
" 'attributes': {'Customer service': '1 (800) 275-2273',\n",
|
||||
" 'CEO': 'Tim Cook (Aug 24, 2011–)',\n",
|
||||
" 'Headquarters': 'Cupertino, CA',\n",
|
||||
" 'Founded': 'April 1, 1976, Los Altos, CA',\n",
|
||||
" 'Founders': 'Steve Jobs, Steve Wozniak, '\n",
|
||||
" 'Ronald Wayne, and more',\n",
|
||||
" 'Products': 'iPhone, iPad, Apple TV, and '\n",
|
||||
" 'more'}},\n",
|
||||
" 'organic': [{'title': 'Apple',\n",
|
||||
" 'link': 'https://www.apple.com/',\n",
|
||||
" 'snippet': 'Discover the innovative world of Apple and shop '\n",
|
||||
" 'everything iPhone, iPad, Apple Watch, Mac, and Apple '\n",
|
||||
" 'TV, plus explore accessories, entertainment, ...',\n",
|
||||
" 'sitelinks': [{'title': 'Support',\n",
|
||||
" 'link': 'https://support.apple.com/'},\n",
|
||||
" {'title': 'iPhone',\n",
|
||||
" 'link': 'https://www.apple.com/iphone/'},\n",
|
||||
" {'title': 'Site Map',\n",
|
||||
" 'link': 'https://www.apple.com/sitemap/'},\n",
|
||||
" {'title': 'Business',\n",
|
||||
" 'link': 'https://www.apple.com/business/'},\n",
|
||||
" {'title': 'Mac',\n",
|
||||
" 'link': 'https://www.apple.com/mac/'},\n",
|
||||
" {'title': 'Watch',\n",
|
||||
" 'link': 'https://www.apple.com/watch/'}],\n",
|
||||
" 'position': 1},\n",
|
||||
" {'title': 'Apple Inc. - Wikipedia',\n",
|
||||
" 'link': 'https://en.wikipedia.org/wiki/Apple_Inc.',\n",
|
||||
" 'snippet': 'Apple Inc. is an American multinational technology '\n",
|
||||
" 'company headquartered in Cupertino, California. '\n",
|
||||
" \"Apple is the world's largest technology company by \"\n",
|
||||
" 'revenue, ...',\n",
|
||||
" 'attributes': {'Products': 'AirPods; Apple Watch; iPad; iPhone; '\n",
|
||||
" 'Mac; Full list',\n",
|
||||
" 'Founders': 'Steve Jobs; Steve Wozniak; Ronald '\n",
|
||||
" 'Wayne; Mike Markkula'},\n",
|
||||
" 'sitelinks': [{'title': 'History',\n",
|
||||
" 'link': 'https://en.wikipedia.org/wiki/History_of_Apple_Inc.'},\n",
|
||||
" {'title': 'Timeline of Apple Inc. products',\n",
|
||||
" 'link': 'https://en.wikipedia.org/wiki/Timeline_of_Apple_Inc._products'},\n",
|
||||
" {'title': 'Litigation involving Apple Inc.',\n",
|
||||
" 'link': 'https://en.wikipedia.org/wiki/Litigation_involving_Apple_Inc.'},\n",
|
||||
" {'title': 'Apple Store',\n",
|
||||
" 'link': 'https://en.wikipedia.org/wiki/Apple_Store'}],\n",
|
||||
" 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRvmB5fT1LjqpZx02UM7IJq0Buoqt0DZs_y0dqwxwSWyP4PIN9FaxuTea0&s',\n",
|
||||
" 'position': 2},\n",
|
||||
" {'title': 'Apple Inc. | History, Products, Headquarters, & Facts '\n",
|
||||
" '| Britannica',\n",
|
||||
" 'link': 'https://www.britannica.com/topic/Apple-Inc',\n",
|
||||
" 'snippet': 'Apple Inc., formerly Apple Computer, Inc., American '\n",
|
||||
" 'manufacturer of personal computers, smartphones, '\n",
|
||||
" 'tablet computers, computer peripherals, and computer '\n",
|
||||
" '...',\n",
|
||||
" 'attributes': {'Related People': 'Steve Jobs Steve Wozniak Jony '\n",
|
||||
" 'Ive Tim Cook Angela Ahrendts',\n",
|
||||
" 'Date': '1976 - present'},\n",
|
||||
" 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS3liELlhrMz3Wpsox29U8jJ3L8qETR0hBWHXbFnwjwQc34zwZvFELst2E&s',\n",
|
||||
" 'position': 3},\n",
|
||||
" {'title': 'AAPL: Apple Inc Stock Price Quote - NASDAQ GS - '\n",
|
||||
" 'Bloomberg.com',\n",
|
||||
" 'link': 'https://www.bloomberg.com/quote/AAPL:US',\n",
|
||||
" 'snippet': 'AAPL:USNASDAQ GS. Apple Inc. COMPANY INFO ; Open. '\n",
|
||||
" '170.09 ; Prev Close. 169.59 ; Volume. 48,425,696 ; '\n",
|
||||
" 'Market Cap. 2.667T ; Day Range. 167.54170.35.',\n",
|
||||
" 'position': 4},\n",
|
||||
" {'title': 'Apple Inc. (AAPL) Company Profile & Facts - Yahoo '\n",
|
||||
" 'Finance',\n",
|
||||
" 'link': 'https://finance.yahoo.com/quote/AAPL/profile/',\n",
|
||||
" 'snippet': 'Apple Inc. designs, manufactures, and markets '\n",
|
||||
" 'smartphones, personal computers, tablets, wearables, '\n",
|
||||
" 'and accessories worldwide. The company offers '\n",
|
||||
" 'iPhone, a line ...',\n",
|
||||
" 'position': 5},\n",
|
||||
" {'title': 'Apple Inc. (AAPL) Stock Price, News, Quote & History - '\n",
|
||||
" 'Yahoo Finance',\n",
|
||||
" 'link': 'https://finance.yahoo.com/quote/AAPL',\n",
|
||||
" 'snippet': 'Find the latest Apple Inc. (AAPL) stock quote, '\n",
|
||||
" 'history, news and other vital information to help '\n",
|
||||
" 'you with your stock trading and investing.',\n",
|
||||
" 'position': 6}],\n",
|
||||
" 'peopleAlsoAsk': [{'question': 'What does Apple Inc do?',\n",
|
||||
" 'snippet': 'Apple Inc. (Apple) designs, manufactures and '\n",
|
||||
" 'markets smartphones, personal\\n'\n",
|
||||
" 'computers, tablets, wearables and accessories '\n",
|
||||
" 'and sells a range of related\\n'\n",
|
||||
" 'services.',\n",
|
||||
" 'title': 'AAPL.O - | Stock Price & Latest News - Reuters',\n",
|
||||
" 'link': 'https://www.reuters.com/markets/companies/AAPL.O/'},\n",
|
||||
" {'question': 'What is the full form of Apple Inc?',\n",
|
||||
" 'snippet': '(formerly Apple Computer Inc.) is an American '\n",
|
||||
" 'computer and consumer electronics\\n'\n",
|
||||
" 'company famous for creating the iPhone, iPad '\n",
|
||||
" 'and Macintosh computers.',\n",
|
||||
" 'title': 'What is Apple? An products and history overview '\n",
|
||||
" '- TechTarget',\n",
|
||||
" 'link': 'https://www.techtarget.com/whatis/definition/Apple'},\n",
|
||||
" {'question': 'What is Apple Inc iPhone?',\n",
|
||||
" 'snippet': 'Apple Inc (Apple) designs, manufactures, and '\n",
|
||||
" 'markets smartphones, tablets,\\n'\n",
|
||||
" 'personal computers, and wearable devices. The '\n",
|
||||
" 'company also offers software\\n'\n",
|
||||
" 'applications and related services, '\n",
|
||||
" 'accessories, and third-party digital content.\\n'\n",
|
||||
" \"Apple's product portfolio includes iPhone, \"\n",
|
||||
" 'iPad, Mac, iPod, Apple Watch, and\\n'\n",
|
||||
" 'Apple TV.',\n",
|
||||
" 'title': 'Apple Inc Company Profile - Apple Inc Overview - '\n",
|
||||
" 'GlobalData',\n",
|
||||
" 'link': 'https://www.globaldata.com/company-profile/apple-inc/'},\n",
|
||||
" {'question': 'Who runs Apple Inc?',\n",
|
||||
" 'snippet': 'Timothy Donald Cook (born November 1, 1960) is '\n",
|
||||
" 'an American business executive\\n'\n",
|
||||
" 'who has been the chief executive officer of '\n",
|
||||
" 'Apple Inc. since 2011. Cook\\n'\n",
|
||||
" \"previously served as the company's chief \"\n",
|
||||
" 'operating officer under its co-founder\\n'\n",
|
||||
" 'Steve Jobs. He is the first CEO of any Fortune '\n",
|
||||
" '500 company who is openly gay.',\n",
|
||||
" 'title': 'Tim Cook - Wikipedia',\n",
|
||||
" 'link': 'https://en.wikipedia.org/wiki/Tim_Cook'}],\n",
|
||||
" 'relatedSearches': [{'query': 'Who invented the iPhone'},\n",
|
||||
" {'query': 'Apple iPhone'},\n",
|
||||
" {'query': 'History of Apple company PDF'},\n",
|
||||
" {'query': 'Apple company history'},\n",
|
||||
" {'query': 'Apple company introduction'},\n",
|
||||
" {'query': 'Apple India'},\n",
|
||||
" {'query': 'What does Apple Inc own'},\n",
|
||||
" {'query': 'Apple Inc After Steve'},\n",
|
||||
" {'query': 'Apple Watch'},\n",
|
||||
" {'query': 'Apple App Store'}]}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"search = GoogleSerperAPIWrapper()\n",
|
||||
"results = search.results(\"Apple Inc.\")\n",
|
||||
"pprint.pp(results)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
},
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:22.863413Z",
|
||||
"start_time": "2023-05-04T00:54:20.827395Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Searching for Google Images\n",
|
||||
"We can also query Google Images using this wrapper. For example:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'searchParameters': {'q': 'Lion',\n",
|
||||
" 'gl': 'us',\n",
|
||||
" 'hl': 'en',\n",
|
||||
" 'num': 10,\n",
|
||||
" 'type': 'images'},\n",
|
||||
" 'images': [{'title': 'Lion - Wikipedia',\n",
|
||||
" 'imageUrl': 'https://upload.wikimedia.org/wikipedia/commons/thumb/7/73/Lion_waiting_in_Namibia.jpg/1200px-Lion_waiting_in_Namibia.jpg',\n",
|
||||
" 'imageWidth': 1200,\n",
|
||||
" 'imageHeight': 900,\n",
|
||||
" 'thumbnailUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRye79ROKwjfb6017jr0iu8Bz2E1KKuHg-A4qINJaspyxkZrkw&s',\n",
|
||||
" 'thumbnailWidth': 259,\n",
|
||||
" 'thumbnailHeight': 194,\n",
|
||||
" 'source': 'Wikipedia',\n",
|
||||
" 'domain': 'en.wikipedia.org',\n",
|
||||
" 'link': 'https://en.wikipedia.org/wiki/Lion',\n",
|
||||
" 'position': 1},\n",
|
||||
" {'title': 'Lion | Characteristics, Habitat, & Facts | Britannica',\n",
|
||||
" 'imageUrl': 'https://cdn.britannica.com/55/2155-050-604F5A4A/lion.jpg',\n",
|
||||
" 'imageWidth': 754,\n",
|
||||
" 'imageHeight': 752,\n",
|
||||
" 'thumbnailUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS3fnDub1GSojI0hJ-ZGS8Tv-hkNNloXh98DOwXZoZ_nUs3GWSd&s',\n",
|
||||
" 'thumbnailWidth': 225,\n",
|
||||
" 'thumbnailHeight': 224,\n",
|
||||
" 'source': 'Encyclopedia Britannica',\n",
|
||||
" 'domain': 'www.britannica.com',\n",
|
||||
" 'link': 'https://www.britannica.com/animal/lion',\n",
|
||||
" 'position': 2},\n",
|
||||
" {'title': 'African lion, facts and photos',\n",
|
||||
" 'imageUrl': 'https://i.natgeofe.com/n/487a0d69-8202-406f-a6a0-939ed3704693/african-lion.JPG',\n",
|
||||
" 'imageWidth': 3072,\n",
|
||||
" 'imageHeight': 2043,\n",
|
||||
" 'thumbnailUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTPlTarrtDbyTiEm-VI_PML9VtOTVPuDXJ5ybDf_lN11H2mShk&s',\n",
|
||||
" 'thumbnailWidth': 275,\n",
|
||||
" 'thumbnailHeight': 183,\n",
|
||||
" 'source': 'National Geographic',\n",
|
||||
" 'domain': 'www.nationalgeographic.com',\n",
|
||||
" 'link': 'https://www.nationalgeographic.com/animals/mammals/facts/african-lion',\n",
|
||||
" 'position': 3},\n",
|
||||
" {'title': 'Saint Louis Zoo | African Lion',\n",
|
||||
" 'imageUrl': 'https://optimise2.assets-servd.host/maniacal-finch/production/animals/african-lion-01-01.jpg?w=1200&auto=compress%2Cformat&fit=crop&dm=1658933674&s=4b63f926a0f524f2087a8e0613282bdb',\n",
|
||||
" 'imageWidth': 1200,\n",
|
||||
" 'imageHeight': 1200,\n",
|
||||
" 'thumbnailUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTlewcJ5SwC7yKup6ByaOjTnAFDeoOiMxyJTQaph2W_I3dnks4&s',\n",
|
||||
" 'thumbnailWidth': 225,\n",
|
||||
" 'thumbnailHeight': 225,\n",
|
||||
" 'source': 'St. Louis Zoo',\n",
|
||||
" 'domain': 'stlzoo.org',\n",
|
||||
" 'link': 'https://stlzoo.org/animals/mammals/carnivores/lion',\n",
|
||||
" 'position': 4},\n",
|
||||
" {'title': 'How to Draw a Realistic Lion like an Artist - Studio '\n",
|
||||
" 'Wildlife',\n",
|
||||
" 'imageUrl': 'https://studiowildlife.com/wp-content/uploads/2021/10/245528858_183911853822648_6669060845725210519_n.jpg',\n",
|
||||
" 'imageWidth': 1431,\n",
|
||||
" 'imageHeight': 2048,\n",
|
||||
" 'thumbnailUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTmn5HayVj3wqoBDQacnUtzaDPZzYHSLKUlIEcni6VB8w0mVeA&s',\n",
|
||||
" 'thumbnailWidth': 188,\n",
|
||||
" 'thumbnailHeight': 269,\n",
|
||||
" 'source': 'Studio Wildlife',\n",
|
||||
" 'domain': 'studiowildlife.com',\n",
|
||||
" 'link': 'https://studiowildlife.com/how-to-draw-a-realistic-lion-like-an-artist/',\n",
|
||||
" 'position': 5},\n",
|
||||
" {'title': 'Lion | Characteristics, Habitat, & Facts | Britannica',\n",
|
||||
" 'imageUrl': 'https://cdn.britannica.com/29/150929-050-547070A1/lion-Kenya-Masai-Mara-National-Reserve.jpg',\n",
|
||||
" 'imageWidth': 1600,\n",
|
||||
" 'imageHeight': 1085,\n",
|
||||
" 'thumbnailUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSCqaKY_THr0IBZN8c-2VApnnbuvKmnsWjfrwKoWHFR9w3eN5o&s',\n",
|
||||
" 'thumbnailWidth': 273,\n",
|
||||
" 'thumbnailHeight': 185,\n",
|
||||
" 'source': 'Encyclopedia Britannica',\n",
|
||||
" 'domain': 'www.britannica.com',\n",
|
||||
" 'link': 'https://www.britannica.com/animal/lion',\n",
|
||||
" 'position': 6},\n",
|
||||
" {'title': \"Where do lions live? Facts about lions' habitats and \"\n",
|
||||
" 'other cool facts',\n",
|
||||
" 'imageUrl': 'https://www.gannett-cdn.com/-mm-/b2b05a4ab25f4fca0316459e1c7404c537a89702/c=0-0-1365-768/local/-/media/2022/03/16/USATODAY/usatsports/imageForEntry5-ODq.jpg?width=1365&height=768&fit=crop&format=pjpg&auto=webp',\n",
|
||||
" 'imageWidth': 1365,\n",
|
||||
" 'imageHeight': 768,\n",
|
||||
" 'thumbnailUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTc_4vCHscgvFvYy3PSrtIOE81kNLAfhDK8F3mfOuotL0kUkbs&s',\n",
|
||||
" 'thumbnailWidth': 299,\n",
|
||||
" 'thumbnailHeight': 168,\n",
|
||||
" 'source': 'USA Today',\n",
|
||||
" 'domain': 'www.usatoday.com',\n",
|
||||
" 'link': 'https://www.usatoday.com/story/news/2023/01/08/where-do-lions-live-habitat/10927718002/',\n",
|
||||
" 'position': 7},\n",
|
||||
" {'title': 'Lion',\n",
|
||||
" 'imageUrl': 'https://i.natgeofe.com/k/1d33938b-3d02-4773-91e3-70b113c3b8c7/lion-male-roar_square.jpg',\n",
|
||||
" 'imageWidth': 3072,\n",
|
||||
" 'imageHeight': 3072,\n",
|
||||
" 'thumbnailUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQqLfnBrBLcTiyTZynHH3FGbBtX2bd1ScwpcuOLnksTyS9-4GM&s',\n",
|
||||
" 'thumbnailWidth': 225,\n",
|
||||
" 'thumbnailHeight': 225,\n",
|
||||
" 'source': 'National Geographic Kids',\n",
|
||||
" 'domain': 'kids.nationalgeographic.com',\n",
|
||||
" 'link': 'https://kids.nationalgeographic.com/animals/mammals/facts/lion',\n",
|
||||
" 'position': 8},\n",
|
||||
" {'title': \"Lion | Smithsonian's National Zoo\",\n",
|
||||
" 'imageUrl': 'https://nationalzoo.si.edu/sites/default/files/styles/1400_scale/public/animals/exhibit/africanlion-005.jpg?itok=6wA745g_',\n",
|
||||
" 'imageWidth': 1400,\n",
|
||||
" 'imageHeight': 845,\n",
|
||||
" 'thumbnailUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSgB3z_D4dMEOWJ7lajJk4XaQSL4DdUvIRj4UXZ0YoE5fGuWuo&s',\n",
|
||||
" 'thumbnailWidth': 289,\n",
|
||||
" 'thumbnailHeight': 174,\n",
|
||||
" 'source': \"Smithsonian's National Zoo\",\n",
|
||||
" 'domain': 'nationalzoo.si.edu',\n",
|
||||
" 'link': 'https://nationalzoo.si.edu/animals/lion',\n",
|
||||
" 'position': 9},\n",
|
||||
" {'title': \"Zoo's New Male Lion Explores Habitat for the First Time \"\n",
|
||||
" '- Virginia Zoo',\n",
|
||||
" 'imageUrl': 'https://virginiazoo.org/wp-content/uploads/2022/04/ZOO_0056-scaled.jpg',\n",
|
||||
" 'imageWidth': 2560,\n",
|
||||
" 'imageHeight': 2141,\n",
|
||||
" 'thumbnailUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTDCG7XvXRCwpe_-Vy5mpvrQpVl5q2qwgnDklQhrJpQzObQGz4&s',\n",
|
||||
" 'thumbnailWidth': 246,\n",
|
||||
" 'thumbnailHeight': 205,\n",
|
||||
" 'source': 'Virginia Zoo',\n",
|
||||
" 'domain': 'virginiazoo.org',\n",
|
||||
" 'link': 'https://virginiazoo.org/zoos-new-male-lion-explores-habitat-for-thefirst-time/',\n",
|
||||
" 'position': 10}]}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"search = GoogleSerperAPIWrapper(type=\"images\")\n",
|
||||
"results = search.results(\"Lion\")\n",
|
||||
"pprint.pp(results)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:27.879867Z",
|
||||
"start_time": "2023-05-04T00:54:26.380022Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Searching for Google News\n",
|
||||
"We can also query Google News using this wrapper. For example:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'searchParameters': {'q': 'Tesla Inc.',\n",
|
||||
" 'gl': 'us',\n",
|
||||
" 'hl': 'en',\n",
|
||||
" 'num': 10,\n",
|
||||
" 'type': 'news'},\n",
|
||||
" 'news': [{'title': 'ISS recommends Tesla investors vote against re-election '\n",
|
||||
" 'of Robyn Denholm',\n",
|
||||
" 'link': 'https://www.reuters.com/business/autos-transportation/iss-recommends-tesla-investors-vote-against-re-election-robyn-denholm-2023-05-04/',\n",
|
||||
" 'snippet': 'Proxy advisory firm ISS on Wednesday recommended Tesla '\n",
|
||||
" 'investors vote against re-election of board chair Robyn '\n",
|
||||
" 'Denholm, citing \"concerns on...',\n",
|
||||
" 'date': '5 mins ago',\n",
|
||||
" 'source': 'Reuters',\n",
|
||||
" 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcROdETe_GUyp1e8RHNhaRM8Z_vfxCvdfinZwzL1bT1ZGSYaGTeOojIdBoLevA&s',\n",
|
||||
" 'position': 1},\n",
|
||||
" {'title': 'Global companies by market cap: Tesla fell most in April',\n",
|
||||
" 'link': 'https://www.reuters.com/markets/global-companies-by-market-cap-tesla-fell-most-april-2023-05-02/',\n",
|
||||
" 'snippet': 'Tesla Inc was the biggest loser among top companies by '\n",
|
||||
" 'market capitalisation in April, hit by disappointing '\n",
|
||||
" 'quarterly earnings after it...',\n",
|
||||
" 'date': '1 day ago',\n",
|
||||
" 'source': 'Reuters',\n",
|
||||
" 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ4u4CP8aOdGyRFH6o4PkXi-_eZDeY96vLSag5gDjhKMYf98YBER2cZPbkStQ&s',\n",
|
||||
" 'position': 2},\n",
|
||||
" {'title': 'Tesla Wanted an EV Price War. Ford Showed Up.',\n",
|
||||
" 'link': 'https://www.bloomberg.com/opinion/articles/2023-05-03/tesla-wanted-an-ev-price-war-ford-showed-up',\n",
|
||||
" 'snippet': 'The legacy automaker is paring back the cost of its '\n",
|
||||
" 'Mustang Mach-E model after Tesla discounted its '\n",
|
||||
" 'competing EVs, portending tighter...',\n",
|
||||
" 'date': '6 hours ago',\n",
|
||||
" 'source': 'Bloomberg.com',\n",
|
||||
" 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcS_3Eo4VI0H-nTeIbYc5DaQn5ep7YrWnmhx6pv8XddFgNF5zRC9gEpHfDq8yQ&s',\n",
|
||||
" 'position': 3},\n",
|
||||
" {'title': 'Joby Aviation to get investment from Tesla shareholder '\n",
|
||||
" 'Baillie Gifford',\n",
|
||||
" 'link': 'https://finance.yahoo.com/news/joby-aviation-investment-tesla-shareholder-204450712.html',\n",
|
||||
" 'snippet': 'This comes days after Joby clinched a $55 million '\n",
|
||||
" 'contract extension to deliver up to nine air taxis to '\n",
|
||||
" 'the U.S. Air Force,...',\n",
|
||||
" 'date': '4 hours ago',\n",
|
||||
" 'source': 'Yahoo Finance',\n",
|
||||
" 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQO0uVn297LI-xryrPNqJ-apUOulj4ohM-xkN4OfmvMOYh1CPdUEBbYx6hviw&s',\n",
|
||||
" 'position': 4},\n",
|
||||
" {'title': 'Tesla resumes U.S. orders for a Model 3 version at lower '\n",
|
||||
" 'price, range',\n",
|
||||
" 'link': 'https://finance.yahoo.com/news/tesla-resumes-us-orders-model-045736115.html',\n",
|
||||
" 'snippet': '(Reuters) -Tesla Inc has resumed taking orders for its '\n",
|
||||
" 'Model 3 long-range vehicle in the United States, the '\n",
|
||||
" \"company's website showed late on...\",\n",
|
||||
" 'date': '19 hours ago',\n",
|
||||
" 'source': 'Yahoo Finance',\n",
|
||||
" 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTIZetJ62sQefPfbQ9KKDt6iH7Mc0ylT5t_hpgeeuUkHhJuAx2FOJ4ZTRVDFg&s',\n",
|
||||
" 'position': 5},\n",
|
||||
" {'title': 'The Tesla Model 3 Long Range AWD Is Now Available in the '\n",
|
||||
" 'U.S. With 325 Miles of Range',\n",
|
||||
" 'link': 'https://www.notateslaapp.com/news/1393/tesla-reopens-orders-for-model-3-long-range-after-months-of-unavailability',\n",
|
||||
" 'snippet': 'Tesla has reopened orders for the Model 3 Long Range '\n",
|
||||
" 'RWD, which has been unavailable for months due to high '\n",
|
||||
" 'demand.',\n",
|
||||
" 'date': '7 hours ago',\n",
|
||||
" 'source': 'Not a Tesla App',\n",
|
||||
" 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSecrgxZpRj18xIJY-nDHljyP-A4ejEkswa9eq77qhMNrScnVIqe34uql5U4w&s',\n",
|
||||
" 'position': 6},\n",
|
||||
" {'title': 'Tesla Cybertruck alpha prototype spotted at the Fremont '\n",
|
||||
" 'factory in new pics and videos',\n",
|
||||
" 'link': 'https://www.teslaoracle.com/2023/05/03/tesla-cybertruck-alpha-prototype-interior-and-exterior-spotted-at-the-fremont-factory-in-new-pics-and-videos/',\n",
|
||||
" 'snippet': 'A Tesla Cybertruck alpha prototype goes to Fremont, '\n",
|
||||
" 'California for another round of testing before going to '\n",
|
||||
" 'production later this year (pics...',\n",
|
||||
" 'date': '14 hours ago',\n",
|
||||
" 'source': 'Tesla Oracle',\n",
|
||||
" 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRO7M5ZLQE-Zo4-_5dv9hNAQZ3wSqfvYCuKqzxHG-M6CgLpwPMMG_ssebdcMg&s',\n",
|
||||
" 'position': 7},\n",
|
||||
" {'title': 'Tesla putting facility in new part of country - Austin '\n",
|
||||
" 'Business Journal',\n",
|
||||
" 'link': 'https://www.bizjournals.com/austin/news/2023/05/02/tesla-leases-building-seattle-area.html',\n",
|
||||
" 'snippet': 'Check out what Puget Sound Business Journal has to '\n",
|
||||
" \"report about the Austin-based company's real estate \"\n",
|
||||
" 'footprint in the Pacific Northwest.',\n",
|
||||
" 'date': '22 hours ago',\n",
|
||||
" 'source': 'The Business Journals',\n",
|
||||
" 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR9kIEHWz1FcHKDUtGQBS0AjmkqtyuBkQvD8kyIY3kpaPrgYaN7I_H2zoOJsA&s',\n",
|
||||
" 'position': 8},\n",
|
||||
" {'title': 'Tesla (TSLA) Resumes Orders for Model 3 Long Range After '\n",
|
||||
" 'Backlog',\n",
|
||||
" 'link': 'https://www.bloomberg.com/news/articles/2023-05-03/tesla-resumes-orders-for-popular-model-3-long-range-at-47-240',\n",
|
||||
" 'snippet': 'Tesla Inc. has resumed taking orders for its Model 3 '\n",
|
||||
" 'Long Range edition with a starting price of $47240, '\n",
|
||||
" 'according to its website.',\n",
|
||||
" 'date': '5 hours ago',\n",
|
||||
" 'source': 'Bloomberg.com',\n",
|
||||
" 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTWWIC4VpMTfRvSyqiomODOoLg0xhoBf-Tc1qweKnSuaiTk-Y1wMJZM3jct0w&s',\n",
|
||||
" 'position': 9}]}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"search = GoogleSerperAPIWrapper(type=\"news\")\n",
|
||||
"results = search.results(\"Tesla Inc.\")\n",
|
||||
"pprint.pp(results)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:34.984087Z",
|
||||
"start_time": "2023-05-04T00:54:33.369231Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"If you want to only receive news articles published in the last hour, you can do the following:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'searchParameters': {'q': 'Tesla Inc.',\n",
|
||||
" 'gl': 'us',\n",
|
||||
" 'hl': 'en',\n",
|
||||
" 'num': 10,\n",
|
||||
" 'type': 'news',\n",
|
||||
" 'tbs': 'qdr:h'},\n",
|
||||
" 'news': [{'title': 'Oklahoma Gov. Stitt sees growing foreign interest in '\n",
|
||||
" 'investments in ...',\n",
|
||||
" 'link': 'https://www.reuters.com/world/us/oklahoma-gov-stitt-sees-growing-foreign-interest-investments-state-2023-05-04/',\n",
|
||||
" 'snippet': 'T)), a battery supplier to electric vehicle maker Tesla '\n",
|
||||
" 'Inc (TSLA.O), said on Sunday it is considering building '\n",
|
||||
" 'a battery plant in Oklahoma, its third in...',\n",
|
||||
" 'date': '53 mins ago',\n",
|
||||
" 'source': 'Reuters',\n",
|
||||
" 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSSTcsXeenqmEKdiekvUgAmqIPR4nlAmgjTkBqLpza-lLfjX1CwB84MoNVj0Q&s',\n",
|
||||
" 'position': 1},\n",
|
||||
" {'title': 'Ryder lanza solución llave en mano para vehículos '\n",
|
||||
" 'eléctricos en EU',\n",
|
||||
" 'link': 'https://www.tyt.com.mx/nota/ryder-lanza-solucion-llave-en-mano-para-vehiculos-electricos-en-eu',\n",
|
||||
" 'snippet': 'Ryder System Inc. presentó RyderElectric+ TM como su '\n",
|
||||
" 'nueva solución llave en mano ... Ryder también tiene '\n",
|
||||
" 'reservados los semirremolques Tesla y continúa...',\n",
|
||||
" 'date': '56 mins ago',\n",
|
||||
" 'source': 'Revista Transportes y Turismo',\n",
|
||||
" 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQJhXTQQtjSUZf9YPM235WQhFU5_d7lEA76zB8DGwZfixcgf1_dhPJyKA1Nbw&s',\n",
|
||||
" 'position': 2},\n",
|
||||
" {'title': '\"I think people can get by with $999 million,\" Bernie '\n",
|
||||
" 'Sanders tells American Billionaires.',\n",
|
||||
" 'link': 'https://thebharatexpressnews.com/i-think-people-can-get-by-with-999-million-bernie-sanders-tells-american-billionaires-heres-how-the-ultra-rich-can-pay-less-income-tax-than-you-legally/',\n",
|
||||
" 'snippet': 'The report noted that in 2007 and 2011, Amazon.com Inc. '\n",
|
||||
" 'founder Jeff Bezos “did not pay a dime in federal ... '\n",
|
||||
" 'If you want to bet on Musk, check out Tesla.',\n",
|
||||
" 'date': '11 mins ago',\n",
|
||||
" 'source': 'THE BHARAT EXPRESS NEWS',\n",
|
||||
" 'imageUrl': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR_X9qqSwVFBBdos2CK5ky5IWIE3aJPCQeRYR9O1Jz4t-MjaEYBuwK7AU3AJQ&s',\n",
|
||||
" 'position': 3}]}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"search = GoogleSerperAPIWrapper(type=\"news\", tbs=\"qdr:h\")\n",
|
||||
"results = search.results(\"Tesla Inc.\")\n",
|
||||
"pprint.pp(results)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:41.786864Z",
|
||||
"start_time": "2023-05-04T00:54:40.691905Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"Some examples of the `tbs` parameter:\n",
|
||||
"\n",
|
||||
"`qdr:h` (past hour)\n",
|
||||
"`qdr:d` (past day)\n",
|
||||
"`qdr:w` (past week)\n",
|
||||
"`qdr:m` (past month)\n",
|
||||
"`qdr:y` (past year)\n",
|
||||
"\n",
|
||||
"You can specify intermediate time periods by adding a number:\n",
|
||||
"`qdr:h12` (past 12 hours)\n",
|
||||
"`qdr:d3` (past 3 days)\n",
|
||||
"`qdr:w2` (past 2 weeks)\n",
|
||||
"`qdr:m6` (past 6 months)\n",
|
||||
"`qdr:m2` (past 2 years)\n",
|
||||
"\n",
|
||||
"For all supported filters simply go to [Google Search](https://google.com), search for something, click on \"Tools\", add your date filter and check the URL for \"tbs=\".\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Searching for Google Places\n",
|
||||
"We can also query Google Places using this wrapper. For example:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'searchParameters': {'q': 'Italian restaurants in Upper East Side',\n",
|
||||
" 'gl': 'us',\n",
|
||||
" 'hl': 'en',\n",
|
||||
" 'num': 10,\n",
|
||||
" 'type': 'places'},\n",
|
||||
" 'places': [{'position': 1,\n",
|
||||
" 'title': \"L'Osteria\",\n",
|
||||
" 'address': '1219 Lexington Ave',\n",
|
||||
" 'latitude': 40.777154599999996,\n",
|
||||
" 'longitude': -73.9571363,\n",
|
||||
" 'thumbnailUrl': 'https://lh5.googleusercontent.com/p/AF1QipNjU7BWEq_aYQANBCbX52Kb0lDpd_lFIx5onw40=w92-h92-n-k-no',\n",
|
||||
" 'rating': 4.7,\n",
|
||||
" 'ratingCount': 91,\n",
|
||||
" 'category': 'Italian'},\n",
|
||||
" {'position': 2,\n",
|
||||
" 'title': \"Tony's Di Napoli\",\n",
|
||||
" 'address': '1081 3rd Ave',\n",
|
||||
" 'latitude': 40.7643567,\n",
|
||||
" 'longitude': -73.9642373,\n",
|
||||
" 'thumbnailUrl': 'https://lh5.googleusercontent.com/p/AF1QipNbNv6jZkJ9nyVi60__8c1DQbe_eEbugRAhIYye=w92-h92-n-k-no',\n",
|
||||
" 'rating': 4.5,\n",
|
||||
" 'ratingCount': 2265,\n",
|
||||
" 'category': 'Italian'},\n",
|
||||
" {'position': 3,\n",
|
||||
" 'title': 'Caravaggio',\n",
|
||||
" 'address': '23 E 74th St',\n",
|
||||
" 'latitude': 40.773412799999996,\n",
|
||||
" 'longitude': -73.96473379999999,\n",
|
||||
" 'thumbnailUrl': 'https://lh5.googleusercontent.com/p/AF1QipPDGchokDvppoLfmVEo6X_bWd3Fz0HyxIHTEe9V=w92-h92-n-k-no',\n",
|
||||
" 'rating': 4.5,\n",
|
||||
" 'ratingCount': 276,\n",
|
||||
" 'category': 'Italian'},\n",
|
||||
" {'position': 4,\n",
|
||||
" 'title': 'Luna Rossa',\n",
|
||||
" 'address': '347 E 85th St',\n",
|
||||
" 'latitude': 40.776593999999996,\n",
|
||||
" 'longitude': -73.950351,\n",
|
||||
" 'thumbnailUrl': 'https://lh5.googleusercontent.com/p/AF1QipNPCpCPuqPAb1Mv6_fOP7cjb8Wu1rbqbk2sMBlh=w92-h92-n-k-no',\n",
|
||||
" 'rating': 4.5,\n",
|
||||
" 'ratingCount': 140,\n",
|
||||
" 'category': 'Italian'},\n",
|
||||
" {'position': 5,\n",
|
||||
" 'title': \"Paola's\",\n",
|
||||
" 'address': '1361 Lexington Ave',\n",
|
||||
" 'latitude': 40.7822019,\n",
|
||||
" 'longitude': -73.9534096,\n",
|
||||
" 'thumbnailUrl': 'https://lh5.googleusercontent.com/p/AF1QipPJr2Vcx-B6K-GNQa4koOTffggTePz8TKRTnWi3=w92-h92-n-k-no',\n",
|
||||
" 'rating': 4.5,\n",
|
||||
" 'ratingCount': 344,\n",
|
||||
" 'category': 'Italian'},\n",
|
||||
" {'position': 6,\n",
|
||||
" 'title': 'Come Prima',\n",
|
||||
" 'address': '903 Madison Ave',\n",
|
||||
" 'latitude': 40.772124999999996,\n",
|
||||
" 'longitude': -73.965012,\n",
|
||||
" 'thumbnailUrl': 'https://lh5.googleusercontent.com/p/AF1QipNrX19G0NVdtDyMovCQ-M-m0c_gLmIxrWDQAAbz=w92-h92-n-k-no',\n",
|
||||
" 'rating': 4.5,\n",
|
||||
" 'ratingCount': 176,\n",
|
||||
" 'category': 'Italian'},\n",
|
||||
" {'position': 7,\n",
|
||||
" 'title': 'Botte UES',\n",
|
||||
" 'address': '1606 1st Ave.',\n",
|
||||
" 'latitude': 40.7750785,\n",
|
||||
" 'longitude': -73.9504801,\n",
|
||||
" 'thumbnailUrl': 'https://lh5.googleusercontent.com/p/AF1QipPPN5GXxfH3NDacBc0Pt3uGAInd9OChS5isz9RF=w92-h92-n-k-no',\n",
|
||||
" 'rating': 4.4,\n",
|
||||
" 'ratingCount': 152,\n",
|
||||
" 'category': 'Italian'},\n",
|
||||
" {'position': 8,\n",
|
||||
" 'title': 'Piccola Cucina Uptown',\n",
|
||||
" 'address': '106 E 60th St',\n",
|
||||
" 'latitude': 40.7632468,\n",
|
||||
" 'longitude': -73.9689825,\n",
|
||||
" 'thumbnailUrl': 'https://lh5.googleusercontent.com/p/AF1QipPifIgzOCD5SjgzzqBzGkdZCBp0MQsK5k7M7znn=w92-h92-n-k-no',\n",
|
||||
" 'rating': 4.6,\n",
|
||||
" 'ratingCount': 941,\n",
|
||||
" 'category': 'Italian'},\n",
|
||||
" {'position': 9,\n",
|
||||
" 'title': 'Pinocchio Restaurant',\n",
|
||||
" 'address': '300 E 92nd St',\n",
|
||||
" 'latitude': 40.781453299999995,\n",
|
||||
" 'longitude': -73.9486788,\n",
|
||||
" 'thumbnailUrl': 'https://lh5.googleusercontent.com/p/AF1QipNtxlIyEEJHtDtFtTR9nB38S8A2VyMu-mVVz72A=w92-h92-n-k-no',\n",
|
||||
" 'rating': 4.5,\n",
|
||||
" 'ratingCount': 113,\n",
|
||||
" 'category': 'Italian'},\n",
|
||||
" {'position': 10,\n",
|
||||
" 'title': 'Barbaresco',\n",
|
||||
" 'address': '843 Lexington Ave #1',\n",
|
||||
" 'latitude': 40.7654332,\n",
|
||||
" 'longitude': -73.9656873,\n",
|
||||
" 'thumbnailUrl': 'https://lh5.googleusercontent.com/p/AF1QipMb9FbPuXF_r9g5QseOHmReejxSHgSahPMPJ9-8=w92-h92-n-k-no',\n",
|
||||
" 'rating': 4.3,\n",
|
||||
" 'ratingCount': 122,\n",
|
||||
" 'locationHint': 'In The Touraine',\n",
|
||||
" 'category': 'Italian'}]}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"search = GoogleSerperAPIWrapper(type=\"places\")\n",
|
||||
"results = search.results(\"Italian restaurants in Upper East Side\")\n",
|
||||
"pprint.pp(results)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:56:07.271164Z",
|
||||
"start_time": "2023-05-04T00:56:05.645847Z"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -68,7 +68,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdin",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" Zhu\n"
|
||||
@@ -98,7 +98,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_chain.run(\"When's my friend Eric's surname?\")\n",
|
||||
"agent_chain.run(\"What's my friend Eric's surname?\")\n",
|
||||
"# Answer with 'Zhu'"
|
||||
]
|
||||
},
|
||||
@@ -196,7 +196,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdin",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" vini\n",
|
||||
@@ -222,7 +222,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdin",
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" oh who said it \n",
|
||||
|
||||
@@ -65,7 +65,7 @@
|
||||
"repl_tool = Tool(\n",
|
||||
" name=\"python_repl\",\n",
|
||||
" description=\"A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`.\",\n",
|
||||
" func=python_repl\n",
|
||||
" func=python_repl.run\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
125
docs/modules/agents/tools/examples/youtube.ipynb
Normal file
125
docs/modules/agents/tools/examples/youtube.ipynb
Normal file
@@ -0,0 +1,125 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "acb64858",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# YouTubeSearchTool\n",
|
||||
"\n",
|
||||
"This notebook shows how to use a tool to search YouTube\n",
|
||||
"\n",
|
||||
"Adapted from [https://github.com/venuv/langchain_yt_tools](https://github.com/venuv/langchain_yt_tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9bb15d4a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#! pip install youtube_search"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "cc1c83e2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools import YouTubeSearchTool"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "becb262b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tool = YouTubeSearchTool()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "6bbc4211",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"['/watch?v=VcVfceTsD0A&pp=ygUMbGV4IGZyaWVkbWFu', '/watch?v=gPfriiHBBek&pp=ygUMbGV4IGZyaWVkbWFu']\""
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tool.run(\"lex friedman\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7f772147",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also specify the number of results that are returned"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "682fdb33",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"['/watch?v=VcVfceTsD0A&pp=ygUMbGV4IGZyaWVkbWFu', '/watch?v=YVJ8gTnDC4Y&pp=ygUMbGV4IGZyaWVkbWFu', '/watch?v=Udh22kuLebg&pp=ygUMbGV4IGZyaWVkbWFu', '/watch?v=gPfriiHBBek&pp=ygUMbGV4IGZyaWVkbWFu', '/watch?v=L_Guz73e6fw&pp=ygUMbGV4IGZyaWVkbWFu']\""
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tool.run(\"lex friedman,5\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bb5e1659",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,23 +1,144 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "87455ddb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Multi-Input Tools\n",
|
||||
"\n",
|
||||
"This notebook shows how to use a tool that requires multiple inputs with an agent.\n",
|
||||
"\n",
|
||||
"The difficulty in doing so comes from the fact that an agent decides its next step from a language model, which outputs a string. So if that step requires multiple inputs, they need to be parsed from that. Therefore, the currently supported way to do this is to write a smaller wrapper function that parses a string into multiple inputs.\n",
|
||||
"\n",
|
||||
"For a concrete example, let's work on giving an agent access to a multiplication function, which takes as input two integers. In order to use this, we will tell the agent to generate the \"Action Input\" as a comma-separated list of length two. We will then write a thin wrapper that takes a string, splits it into two around a comma, and passes both parsed sides as integers to the multiplication function."
|
||||
"This notebook shows how to use a tool that requires multiple inputs with an agent. The recommended way to do so is with the `StructuredTool` class.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "113c8805",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"os.environ[\"LANGCHAIN_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "9c257017",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import OpenAI\n",
|
||||
"from langchain.agents import initialize_agent, AgentType\n",
|
||||
"\n",
|
||||
"llm = OpenAI(temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "21623e8f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools import StructuredTool\n",
|
||||
"\n",
|
||||
"def multiplier(a: float, b: float) -> float:\n",
|
||||
" \"\"\"Multiply the provided floats.\"\"\"\n",
|
||||
" return a * b\n",
|
||||
"\n",
|
||||
"tool = StructuredTool.from_function(multiplier)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "ae7e8e07",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Structured tools are compatible with the STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION agent type. \n",
|
||||
"agent_executor = initialize_agent([tool], llm, agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "6cfa22d7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Thought: I need to multiply 3 and 4\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"multiplier\",\n",
|
||||
" \"action_input\": {\"a\": 3, \"b\": 4}\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3m12\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I know what to respond\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Final Answer\",\n",
|
||||
" \"action_input\": \"3 times 4 is 12\"\n",
|
||||
"}\n",
|
||||
"```\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'3 times 4 is 12'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.run(\"What is 3 times 4\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "e643b307",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Multi-Input Tools with a string format\n",
|
||||
"\n",
|
||||
"An alternative to the structured tool would be to use the regular `Tool` class and accept a single string. The tool would then have to handle the parsing logic to extract the relavent values from the text, which tightly couples the tool representation to the agent prompt. This is still useful if the underlying language model can't reliabl generate structured schema. \n",
|
||||
"\n",
|
||||
"Let's take the multiplication function as an example. In order to use this, we will tell the agent to generate the \"Action Input\" as a comma-separated list of length two. We will then write a thin wrapper that takes a string, splits it into two around a comma, and passes both parsed sides as integers to the multiplication function."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "291149b6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -37,7 +158,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 7,
|
||||
"id": "f0b82020",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -52,7 +173,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 8,
|
||||
"id": "6db1d43f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -70,7 +191,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 9,
|
||||
"id": "aa25d0ca",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -97,7 +218,7 @@
|
||||
"'3 times 4 is 12'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -131,7 +252,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.11.2"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
179
docs/modules/chains/examples/multi_prompt_router.ipynb
Normal file
179
docs/modules/chains/examples/multi_prompt_router.ipynb
Normal file
@@ -0,0 +1,179 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a5cf6c49",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Router Chains: Selecting from multiple prompts with MultiPromptChain\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to use the `RouterChain` paradigm to create a chain that dynamically selects the prompt to use for a given input. Specifically we show how to use the `MultiPromptChain` to create a question-answering chain that selects the prompt which is most relevant for a given question, and then answers the question using that prompt."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e8d624d4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.router import MultiPromptChain\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "8d11fa5c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"physics_template = \"\"\"You are a very smart physics professor. \\\n",
|
||||
"You are great at answering questions about physics in a concise and easy to understand manner. \\\n",
|
||||
"When you don't know the answer to a question you admit that you don't know.\n",
|
||||
"\n",
|
||||
"Here is a question:\n",
|
||||
"{input}\"\"\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"math_template = \"\"\"You are a very good mathematician. You are great at answering math questions. \\\n",
|
||||
"You are so good because you are able to break down hard problems into their component parts, \\\n",
|
||||
"answer the component parts, and then put them together to answer the broader question.\n",
|
||||
"\n",
|
||||
"Here is a question:\n",
|
||||
"{input}\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "d0b8856e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompt_infos = [\n",
|
||||
" {\n",
|
||||
" \"name\": \"physics\", \n",
|
||||
" \"description\": \"Good for answering questions about physics\", \n",
|
||||
" \"prompt_template\": physics_template\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"math\", \n",
|
||||
" \"description\": \"Good for answering math questions\", \n",
|
||||
" \"prompt_template\": math_template\n",
|
||||
" }\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "db679975",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = MultiPromptChain.from_prompts(OpenAI(), prompt_infos, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "90fd594c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new MultiPromptChain chain...\u001b[0m\n",
|
||||
"physics: {'input': 'What is black body radiation?'}\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Black body radiation is the emission of electromagnetic radiation from a body due to its temperature. It is a type of thermal radiation that is emitted from the surface of all objects that are at a temperature above absolute zero. It is a spectrum of radiation that is influenced by the temperature of the body and is independent of the composition of the emitting material.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(chain.run(\"What is black body radiation?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "b8c83765",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new MultiPromptChain chain...\u001b[0m\n",
|
||||
"math: {'input': 'What is the first prime number greater than 40 such that one plus the prime number is divisible by 3'}\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"?\n",
|
||||
"\n",
|
||||
"The first prime number greater than 40 such that one plus the prime number is divisible by 3 is 43. To solve this problem, we can break down the question into two parts: finding the first prime number greater than 40, and then finding a number that is divisible by 3. \n",
|
||||
"\n",
|
||||
"The first step is to find the first prime number greater than 40. A prime number is a number that is only divisible by 1 and itself. The next prime number after 40 is 41.\n",
|
||||
"\n",
|
||||
"The second step is to find a number that is divisible by 3. To do this, we can add 1 to 41, which gives us 42. Now, we can check if 42 is divisible by 3. 42 divided by 3 is 14, so 42 is divisible by 3.\n",
|
||||
"\n",
|
||||
"Therefore, the answer to the question is 43.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(chain.run(\"What is the first prime number greater than 40 such that one plus the prime number is divisible by 3\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "74c6bba7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new MultiPromptChain chain...\u001b[0m\n",
|
||||
"None: {'input': 'What is the name of the type of cloud that rains?'}\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The type of cloud that typically produces rain is called a cumulonimbus cloud. This type of cloud is characterized by its large vertical extent and can produce thunderstorms and heavy precipitation. Is there anything else you'd like to know?\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(chain.run(\"What is the name of the type of cloud that rins\"))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
209
docs/modules/chains/examples/multi_retrieval_qa_router.ipynb
Normal file
209
docs/modules/chains/examples/multi_retrieval_qa_router.ipynb
Normal file
@@ -0,0 +1,209 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "782ffcf1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Router Chains: Selecting from multiple prompts with MultiRetrievalQAChain\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to use the `RouterChain` paradigm to create a chain that dynamically selects which Retrieval system to use. Specifically we show how to use the `MultiRetrievalQAChain` to create a question-answering chain that selects the retrieval QA chain which is most relevant for a given question, and then answers the question using it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "b6aeec07",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.router import MultiRetrievalQAChain\n",
|
||||
"from langchain.llms import OpenAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "3c42f051",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings import OpenAIEmbeddings\n",
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"from langchain.vectorstores import FAISS\n",
|
||||
"\n",
|
||||
"sou_docs = TextLoader('../../state_of_the_union.txt').load_and_split()\n",
|
||||
"sou_retriever = FAISS.from_documents(sou_docs, OpenAIEmbeddings()).as_retriever()\n",
|
||||
"\n",
|
||||
"pg_docs = TextLoader('../../paul_graham_essay.txt').load_and_split()\n",
|
||||
"pg_retriever = FAISS.from_documents(pg_docs, OpenAIEmbeddings()).as_retriever()\n",
|
||||
"\n",
|
||||
"personal_texts = [\n",
|
||||
" \"I love apple pie\",\n",
|
||||
" \"My favorite color is fuchsia\",\n",
|
||||
" \"My dream is to become a professional dancer\",\n",
|
||||
" \"I broke my arm when I was 12\",\n",
|
||||
" \"My parents are from Peru\",\n",
|
||||
"]\n",
|
||||
"personal_retriever = FAISS.from_texts(personal_texts, OpenAIEmbeddings()).as_retriever()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "783d6bcd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever_infos = [\n",
|
||||
" {\n",
|
||||
" \"name\": \"state of the union\", \n",
|
||||
" \"description\": \"Good for answering questions about the 2023 State of the Union address\", \n",
|
||||
" \"retriever\": sou_retriever\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"pg essay\", \n",
|
||||
" \"description\": \"Good for answer quesitons about Paul Graham's essay on his career\", \n",
|
||||
" \"retriever\": pg_retriever\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"name\": \"personal\", \n",
|
||||
" \"description\": \"Good for answering questions about me\", \n",
|
||||
" \"retriever\": personal_retriever\n",
|
||||
" }\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "5b671ac5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain = MultiRetrievalQAChain.from_retrievers(OpenAI(), retriever_infos, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "7db5814f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new MultiRetrievalQAChain chain...\u001b[0m\n",
|
||||
"state of the union: {'query': 'What did the president say about the economy in the 2023 State of the Union address?'}\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" The president said that the economy was stronger than it had been a year prior, and that the American Rescue Plan helped create record job growth and fuel economic relief for millions of Americans. He also proposed a plan to fight inflation and lower costs for families, including cutting the cost of prescription drugs and energy, providing investments and tax credits for energy efficiency, and increasing access to child care and Pre-K.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(chain.run(\"What did the president say about the economy?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "bbcdbe82",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new MultiRetrievalQAChain chain...\u001b[0m\n",
|
||||
"pg essay: {'query': 'What is something Paul Graham regrets about his work?'}\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" Paul Graham regrets that he did not take a vacation after selling his company, instead of immediately starting to paint.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(chain.run(\"What is something Paul Graham regrets about his work?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "37c88a27",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new MultiRetrievalQAChain chain...\u001b[0m\n",
|
||||
"personal: {'query': 'What is my background?'}\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
" Your background is Peruvian.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(chain.run(\"What is my background?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "de8519b2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new MultiRetrievalQAChain chain...\u001b[0m\n",
|
||||
"None: {'query': 'What year was the Internet created in?'}\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The Internet was created in 1969 through a project called ARPANET, which was funded by the United States Department of Defense. However, the World Wide Web, which is often confused with the Internet, was created in 1989 by British computer scientist Tim Berners-Lee.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(chain.run(\"What year was the Internet created in?\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e50a0227",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"language": "python",
|
||||
"name": "venv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -5,7 +5,22 @@
|
||||
"id": "1f3a5ebf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Airbyte JSON\n",
|
||||
"# Airbyte JSON"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35ac77b1-449b-44f7-b8f3-3494d55c286e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
">[Airbyte](https://github.com/airbytehq/airbyte) is a data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes. It has the largest catalog of ELT connectors to data warehouses and databases."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1fe72234-3110-4c07-a766-3dc505dd25cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This covers how to load any source from Airbyte into a local JSON file that can be read in as a document\n",
|
||||
"\n",
|
||||
"Prereqs:\n",
|
||||
@@ -25,7 +40,7 @@
|
||||
"\n",
|
||||
"6) Set destination as Local JSON, with specified destination path - lets say `/json_data`. Set up manual sync.\n",
|
||||
"\n",
|
||||
"7) Run the connection!\n",
|
||||
"7) Run the connection.\n",
|
||||
"\n",
|
||||
"7) To see what files are create, you can navigate to: `file:///tmp/airbyte_local`\n",
|
||||
"\n",
|
||||
@@ -52,7 +67,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"_airbyte_raw_pokemon.jsonl\r\n"
|
||||
"_airbyte_raw_pokemon.jsonl\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Apify Dataset\n",
|
||||
"\n",
|
||||
">[Apify Dataset](https://docs.apify.com/platform/storage/dataset) is a scaleable append-only storage with sequential access built for storing structured web scraping results, such as a list of products or Google SERPs, and then export them to various formats like JSON, CSV, or Excel. Datasets are mainly used to save results of [Apify Actors](https://apify.com/store)—serverless cloud programs for varius web scraping, crawling, and data extraction use cases.\n",
|
||||
"\n",
|
||||
"This notebook shows how to load Apify datasets to LangChain.\n",
|
||||
"\n",
|
||||
"[Apify Dataset](https://docs.apify.com/platform/storage/dataset) is a scaleable append-only storage with sequential access built for storing structured web scraping results, such as a list of products or Google SERPs, and then export them to various formats like JSON, CSV, or Excel. Datasets are mainly used to save results of [Apify Actors](https://apify.com/store)—serverless cloud programs for varius web scraping, crawling, and data extraction use cases.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
@@ -17,7 +17,17 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install apify-client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -35,7 +45,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -77,7 +86,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -167,9 +175,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"source": [
|
||||
"# Arxiv\n",
|
||||
"\n",
|
||||
"[arXiv](https://arxiv.org/) is an open-access archive for 2 million scholarly articles in the fields of physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering and systems science, and economics.\n",
|
||||
">[arXiv](https://arxiv.org/) is an open-access archive for 2 million scholarly articles in the fields of physics, mathematics, computer science, quantitative biology, quantitative finance, statistics, electrical engineering and systems science, and economics.\n",
|
||||
"\n",
|
||||
"This notebook shows how to load scientific articles from `Arxiv.org` into a document format that we can use downstream."
|
||||
]
|
||||
@@ -37,11 +37,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install arxiv"
|
||||
"#!pip install arxiv"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "094b5f13-7e54-4354-9d83-26d6926ecaa0",
|
||||
"metadata": {
|
||||
@@ -60,7 +59,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install pymupdf"
|
||||
"#!pip install pymupdf"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -72,7 +71,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "e29b954c-1407-4797-ae21-6ba8937156be",
|
||||
"metadata": {},
|
||||
@@ -90,7 +88,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.base import Document\n",
|
||||
"from langchain.document_loaders import ArxivLoader"
|
||||
]
|
||||
},
|
||||
@@ -171,7 +168,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,36 +5,46 @@
|
||||
"id": "a634365e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# s3 Directory\n",
|
||||
"# AWS S3 Directory\n",
|
||||
"\n",
|
||||
"This covers how to load document objects from an s3 directory object."
|
||||
">[Amazon Simple Storage Service (Amazon S3)](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html) is an object storage service\n",
|
||||
"\n",
|
||||
">[AWS S3 Directory](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html)\n",
|
||||
"\n",
|
||||
"This covers how to load document objects from an `AWS S3 Directory` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "2f0cd6a5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import S3DirectoryLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "49815096",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install boto3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "2f0cd6a5",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import S3DirectoryLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "321cc7f1",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = S3DirectoryLoader(\"testing-hwc\")"
|
||||
@@ -42,21 +52,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"id": "2b11d155",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Lorem ipsum dolor sit amet.', lookup_str='', metadata={'source': '/var/folders/y6/8_bzdg295ld6s1_97_12m4lr0000gn/T/tmpaa9xl6ch/fake.docx'}, lookup_index=0)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
@@ -126,7 +127,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -5,9 +5,13 @@
|
||||
"id": "66a7777e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# s3 File\n",
|
||||
"# AWS S3 File\n",
|
||||
"\n",
|
||||
"This covers how to load document objects from an s3 file object."
|
||||
">[Amazon Simple Storage Service (Amazon S3)](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html) is an object storage service.\n",
|
||||
"\n",
|
||||
">[AWS S3 Buckets](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingBucket.html)\n",
|
||||
"\n",
|
||||
"This covers how to load document objects from an `AWS S3 File` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -86,7 +90,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -6,6 +6,9 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AZLyrics\n",
|
||||
"\n",
|
||||
">[AZLyrics](https://www.azlyrics.com/) is a large, legal, every day growing collection of lyrics.\n",
|
||||
"\n",
|
||||
"This covers how to load AZLyrics webpages into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
@@ -85,7 +88,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,29 +1,28 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "a634365e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure Blob Storage Container\n",
|
||||
"\n",
|
||||
"This covers how to load document objects from a container on Azure Blob Storage."
|
||||
">[Azure Blob Storage](https://learn.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction) is Microsoft's object storage solution for the cloud. Blob Storage is optimized for storing massive amounts of unstructured data. Unstructured data is data that doesn't adhere to a particular data model or definition, such as text or binary data.\n",
|
||||
"\n",
|
||||
"`Azure Blob Storage` is designed for:\n",
|
||||
"- Serving images or documents directly to a browser.\n",
|
||||
"- Storing files for distributed access.\n",
|
||||
"- Streaming video and audio.\n",
|
||||
"- Writing to log files.\n",
|
||||
"- Storing data for backup and restore, disaster recovery, and archiving.\n",
|
||||
"- Storing data for analysis by an on-premises or Azure-hosted service.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load document objects from a container on `Azure Blob Storage`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "2f0cd6a5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import AzureBlobStorageContainerLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "49815096",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -31,6 +30,18 @@
|
||||
"#!pip install azure-storage-blob"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "2f0cd6a5",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import AzureBlobStorageContainerLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
@@ -127,7 +138,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,14 +1,27 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "66a7777e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure Blob Storage File\n",
|
||||
"\n",
|
||||
"This covers how to load document objects from a Azure Blob Storage file."
|
||||
">[Azure Files](https://learn.microsoft.com/en-us/azure/storage/files/storage-files-introduction) offers fully managed file shares in the cloud that are accessible via the industry standard Server Message Block (`SMB`) protocol, Network File System (`NFS`) protocol, and `Azure Files REST API`.\n",
|
||||
"\n",
|
||||
"This covers how to load document objects from a Azure Files."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "43128d8d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install azure-storage-blob"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -21,16 +34,6 @@
|
||||
"from langchain.document_loaders import AzureBlobStorageFileLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "43128d8d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install azure-storage-blob"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
@@ -87,7 +90,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -7,29 +7,35 @@
|
||||
"source": [
|
||||
"# Bilibili\n",
|
||||
"\n",
|
||||
"This loader utilizes the `bilibili-api` to fetch the text transcript from Bilibili, one of the most beloved long-form video sites in China.\n",
|
||||
">[Bilibili](https://www.bilibili.tv/) is one of the most beloved long-form video sites in China.\n",
|
||||
"\n",
|
||||
"This loader utilizes the [bilibili-api](https://github.com/MoyuScript/bilibili-api) to fetch the text transcript from `Bilibili`.\n",
|
||||
"\n",
|
||||
"With this BiliBiliLoader, users can easily obtain the transcript of their desired video content on the platform."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "9ec8a3b3",
|
||||
"metadata": {},
|
||||
"execution_count": null,
|
||||
"id": "43128d8d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.bilibili import BiliBiliLoader"
|
||||
"#!pip install bilibili-api"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "43128d8d",
|
||||
"metadata": {},
|
||||
"execution_count": null,
|
||||
"id": "9ec8a3b3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install bilibili-api"
|
||||
"from langchain.document_loaders.bilibili import BiliBiliLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -51,16 +57,20 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
],
|
||||
"id": "3470dadf",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
},
|
||||
"pycharm": {
|
||||
"name": "#%%\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -79,9 +89,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,13 +1,20 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Blackboard\n",
|
||||
"\n",
|
||||
"This covers how to load data from a Blackboard Learn instance."
|
||||
">[Blackboard Learn](https://en.wikipedia.org/wiki/Blackboard_Learn) (previously the Blackboard Learning Management System) is a web-based virtual learning environment and learning management system developed by Blackboard Inc. The software features course management, customizable open architecture, and scalable design that allows integration with student information systems and authentication protocols. It may be installed on local servers, hosted by `Blackboard ASP Solutions`, or provided as Software as a Service hosted on Amazon Web Services. Its main purposes are stated to include the addition of online elements to courses traditionally delivered face-to-face and development of completely online courses with few or no face-to-face meetings\n",
|
||||
"\n",
|
||||
"This covers how to load data from a [Blackboard Learn](https://www.anthology.com/products/teaching-and-learning/learning-effectiveness/blackboard-learn) instance.\n",
|
||||
"\n",
|
||||
"This loader is not compatible with all `Blackboard` courses. It is only\n",
|
||||
" compatible with courses that use the new `Blackboard` interface.\n",
|
||||
" To use this loader, you must have the BbRouter cookie. You can get this\n",
|
||||
" cookie by logging into the course and then copying the value of the\n",
|
||||
" BbRouter cookie from the browser's developer tools."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -28,11 +35,24 @@
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -1,151 +1,149 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "vm8vn9t8DvC_"
|
||||
},
|
||||
"source": [
|
||||
"# Blockchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "5WjXERXzFEhg"
|
||||
},
|
||||
"source": [
|
||||
"## Overview"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "juAmbgoWD17u"
|
||||
},
|
||||
"source": [
|
||||
"The intention of this notebook is to provide a means of testing functionality in the Langchain Document Loader for Blockchain.\n",
|
||||
"\n",
|
||||
"Initially this Loader supports:\n",
|
||||
"\n",
|
||||
"* Loading NFTs as Documents from NFT Smart Contracts (ERC721 and ERC1155)\n",
|
||||
"* Ethereum Maninnet, Ethereum Testnet, Polgyon Mainnet, Polygon Testnet (default is eth-mainnet)\n",
|
||||
"* Alchemy's getNFTsForCollection API\n",
|
||||
"\n",
|
||||
"It can be extended if the community finds value in this loader. Specifically:\n",
|
||||
"\n",
|
||||
"* Additional APIs can be added (e.g. Tranction-related APIs)\n",
|
||||
"\n",
|
||||
"This Document Loader Requires:\n",
|
||||
"\n",
|
||||
"* A free [Alchemy API Key](https://www.alchemy.com/)\n",
|
||||
"\n",
|
||||
"The output takes the following format:\n",
|
||||
"\n",
|
||||
"- pageContent= Individual NFT\n",
|
||||
"- metadata={'source': '0x1a92f7381b9f03921564a437210bb9396471050c', 'blockchain': 'eth-mainnet', 'tokenId': '0x15'})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load NFTs into Document Loader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"alchemyApiKey = \"get from https://www.alchemy.com/ and set in environment variable ALCHEMY_API_KEY\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Option 1: Ethereum Mainnet (default BlockchainType)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "J3LWHARC-Kn0"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"contractAddress = \"0xbc4ca0eda7647a8ab7c2061c2e118a18a936f13d\" # Bored Ape Yacht Club contract address\n",
|
||||
"\n",
|
||||
"blockchainType = BlockchainType.ETH_MAINNET #default value, optional parameter\n",
|
||||
"\n",
|
||||
"blockchainLoader = BlockchainDocumentLoader(contract_address=contractAddress,\n",
|
||||
" api_key=alchemyApiKey)\n",
|
||||
"\n",
|
||||
"nfts = blockchainLoader.load()\n",
|
||||
"\n",
|
||||
"nfts[:2]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Option 2: Polygon Mainnet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"contractAddress = \"0x448676ffCd0aDf2D85C1f0565e8dde6924A9A7D9\" # Polygon Mainnet contract address\n",
|
||||
"\n",
|
||||
"blockchainType = BlockchainType.POLYGON_MAINNET \n",
|
||||
"\n",
|
||||
"blockchainLoader = BlockchainDocumentLoader(contract_address=contractAddress, \n",
|
||||
" blockchainType=blockchainType, \n",
|
||||
" api_key=alchemyApiKey)\n",
|
||||
"\n",
|
||||
"nfts = blockchainLoader.load()\n",
|
||||
"\n",
|
||||
"nfts[:2]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"collapsed_sections": [
|
||||
"5WjXERXzFEhg"
|
||||
],
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "vm8vn9t8DvC_"
|
||||
},
|
||||
"source": [
|
||||
"# Blockchain"
|
||||
]
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "5WjXERXzFEhg"
|
||||
},
|
||||
"source": [
|
||||
"## Overview"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "juAmbgoWD17u"
|
||||
},
|
||||
"source": [
|
||||
"The intention of this notebook is to provide a means of testing functionality in the Langchain Document Loader for Blockchain.\n",
|
||||
"\n",
|
||||
"Initially this Loader supports:\n",
|
||||
"\n",
|
||||
"* Loading NFTs as Documents from NFT Smart Contracts (ERC721 and ERC1155)\n",
|
||||
"* Ethereum Maninnet, Ethereum Testnet, Polgyon Mainnet, Polygon Testnet (default is eth-mainnet)\n",
|
||||
"* Alchemy's getNFTsForCollection API\n",
|
||||
"\n",
|
||||
"It can be extended if the community finds value in this loader. Specifically:\n",
|
||||
"\n",
|
||||
"* Additional APIs can be added (e.g. Tranction-related APIs)\n",
|
||||
"\n",
|
||||
"This Document Loader Requires:\n",
|
||||
"\n",
|
||||
"* A free [Alchemy API Key](https://www.alchemy.com/)\n",
|
||||
"\n",
|
||||
"The output takes the following format:\n",
|
||||
"\n",
|
||||
"- pageContent= Individual NFT\n",
|
||||
"- metadata={'source': '0x1a92f7381b9f03921564a437210bb9396471050c', 'blockchain': 'eth-mainnet', 'tokenId': '0x15'})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load NFTs into Document Loader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get ALCHEMY_API_KEY from https://www.alchemy.com/ \n",
|
||||
"\n",
|
||||
"alchemyApiKey = \"...\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Option 1: Ethereum Mainnet (default BlockchainType)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "J3LWHARC-Kn0"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.blockchain import BlockchainDocumentLoader, BlockchainType\n",
|
||||
"contractAddress = \"0xbc4ca0eda7647a8ab7c2061c2e118a18a936f13d\" # Bored Ape Yacht Club contract address\n",
|
||||
"\n",
|
||||
"blockchainType = BlockchainType.ETH_MAINNET #default value, optional parameter\n",
|
||||
"\n",
|
||||
"blockchainLoader = BlockchainDocumentLoader(contract_address=contractAddress,\n",
|
||||
" api_key=alchemyApiKey)\n",
|
||||
"\n",
|
||||
"nfts = blockchainLoader.load()\n",
|
||||
"\n",
|
||||
"nfts[:2]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Option 2: Polygon Mainnet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"contractAddress = \"0x448676ffCd0aDf2D85C1f0565e8dde6924A9A7D9\" # Polygon Mainnet contract address\n",
|
||||
"\n",
|
||||
"blockchainType = BlockchainType.POLYGON_MAINNET \n",
|
||||
"\n",
|
||||
"blockchainLoader = BlockchainDocumentLoader(contract_address=contractAddress, \n",
|
||||
" blockchainType=blockchainType, \n",
|
||||
" api_key=alchemyApiKey)\n",
|
||||
"\n",
|
||||
"nfts = blockchainLoader.load()\n",
|
||||
"\n",
|
||||
"nfts[:2]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"collapsed_sections": [
|
||||
"5WjXERXzFEhg"
|
||||
],
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -1,21 +1,25 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ChatGPT Data Loader\n",
|
||||
"### ChatGPT Data\n",
|
||||
"\n",
|
||||
"This notebook covers how to load `conversations.json` from your ChatGPT data export folder.\n",
|
||||
">[ChatGPT](https://chat.openai.com) is an artificial intelligence (AI) chatbot developed by OpenAI.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This notebook covers how to load `conversations.json` from your `ChatGPT` data export folder.\n",
|
||||
"\n",
|
||||
"You can get your data export by email by going to: https://chat.openai.com/ -> (Profile) - Settings -> Export data -> Confirm export."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.chatgpt import ChatGPTLoader"
|
||||
@@ -53,7 +57,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -67,10 +71,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -6,7 +6,10 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# College Confidential\n",
|
||||
"This covers how to load College Confidential webpages into a document format that we can use downstream."
|
||||
"\n",
|
||||
">[College Confidential](https://www.collegeconfidential.com/) gives information on 3,800+ colleges and universities.\n",
|
||||
"\n",
|
||||
"This covers how to load `College Confidential` webpages into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -85,7 +88,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -6,18 +6,31 @@
|
||||
"source": [
|
||||
"# Confluence\n",
|
||||
"\n",
|
||||
"A loader for Confluence pages.\n",
|
||||
">[Confluence](https://www.atlassian.com/software/confluence) is a wiki collaboration platform that saves and organizes all of the project-related material. `Confluence` is a knowledge base that primarily handles content management activities. \n",
|
||||
"\n",
|
||||
"A loader for `Confluence` pages.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This currently supports both username/api_key and Oauth2 login.\n",
|
||||
"This currently supports both `username/api_key` and `Oauth2 login`.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Specify a list page_ids and/or space_key to load in the corresponding pages into Document objects, if both are specified the union of both sets will be returned.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"You can also specify a boolean `include_attachments` to include attachments, this is set to False by default, if set to True all attachments will be downloaded and ConfluenceReader will extract the text from the attachments and add it to the Document object. Currently supported attachment types are: PDF, PNG, JPEG/JPG, SVG, Word and Excel.\n",
|
||||
"You can also specify a boolean `include_attachments` to include attachments, this is set to False by default, if set to True all attachments will be downloaded and ConfluenceReader will extract the text from the attachments and add it to the Document object. Currently supported attachment types are: `PDF`, `PNG`, `JPEG/JPG`, `SVG`, `Word` and `Excel`.\n",
|
||||
"\n",
|
||||
"Hint: space_key and page_id can both be found in the URL of a page in Confluence - https://yoursite.atlassian.com/wiki/spaces/<space_key>/pages/<page_id>\n"
|
||||
"Hint: `space_key` and `page_id` can both be found in the URL of a page in Confluence - https://yoursite.atlassian.com/wiki/spaces/<space_key>/pages/<page_id>\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install atlassian-python-api"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -33,7 +46,7 @@
|
||||
" username=\"me\",\n",
|
||||
" api_key=\"12345\"\n",
|
||||
")\n",
|
||||
"documents = loader.load(space_key=\"SPACE\", include_attachments=True, limit=50)\n"
|
||||
"documents = loader.load(space_key=\"SPACE\", include_attachments=True, limit=50)"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -53,7 +66,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
@@ -62,5 +75,5 @@
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -6,14 +6,22 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# CoNLL-U\n",
|
||||
"\n",
|
||||
">[CoNLL-U](https://universaldependencies.org/format.html) is revised version of the CoNLL-X format. Annotations are encoded in plain text files (UTF-8, normalized to NFC, using only the LF character as line break, including an LF character at the end of file) with three types of lines:\n",
|
||||
">- Word lines containing the annotation of a word/token in 10 fields separated by single tab characters; see below.\n",
|
||||
">- Blank lines marking sentence boundaries.\n",
|
||||
">- Comment lines starting with hash (#).\n",
|
||||
"\n",
|
||||
"This is an example of how to load a file in [CoNLL-U](https://universaldependencies.org/format.html) format. The whole file is treated as one document. The example data (`conllu.conllu`) is based on one of the standard UD/CoNLL-U examples."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "d9b2e33e",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import CoNLLULoader"
|
||||
@@ -21,9 +29,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"id": "5b5eec48",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = CoNLLULoader(\"example_data/conllu.conllu\")"
|
||||
@@ -31,9 +41,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "10f3f725",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"document = loader.load()"
|
||||
@@ -41,10 +53,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"id": "acbb3579",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='They buy and sell books.', metadata={'source': 'example_data/conllu.conllu'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"document"
|
||||
]
|
||||
@@ -52,7 +77,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -66,7 +91,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.8"
|
||||
"version": "3.10.6"
|
||||
},
|
||||
"toc": {
|
||||
"base_numbering": 1,
|
||||
@@ -94,7 +94,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -2,20 +2,23 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# CSV Loader\n",
|
||||
"# CSV\n",
|
||||
"\n",
|
||||
"Load csv files with a single row per document."
|
||||
">A [comma-separated values (CSV)](https://en.wikipedia.org/wiki/Comma-separated_values) file is a delimited text file that uses a comma to separate values. Each line of the file is a data record. Each record consists of one or more fields, separated by commas.\n",
|
||||
"\n",
|
||||
"Load [csv](https://en.wikipedia.org/wiki/Comma-separated_values) data with a single row per document."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
"collapsed": true,
|
||||
"jupyter": {
|
||||
"outputs_hidden": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -26,7 +29,10 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -39,7 +45,10 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -56,9 +65,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customizing the csv parsing and loading\n",
|
||||
"\n",
|
||||
@@ -69,7 +76,10 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -86,7 +96,10 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -102,13 +115,12 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Specify a column to be used identify the document source\n",
|
||||
"## Specify a column to identify the document source\n",
|
||||
"\n",
|
||||
"Use the `source_column` argument to specify a column to be set as the source for the document created from each row. Otherwise `file_path` will be used as the source for all documents created from the csv file.\n",
|
||||
"Use the `source_column` argument to specify a source for the document created from each row. Otherwise `file_path` will be used as the source for all documents created from the CSV file.\n",
|
||||
"\n",
|
||||
"This is useful when using documents loaded from CSV files for chains that answer questions using sources."
|
||||
]
|
||||
@@ -144,7 +156,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -158,9 +170,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.9"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "2dfc4698",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Diffbot\n",
|
||||
"\n",
|
||||
">Unlike traditional web scraping tools, [Diffbot](https://docs.diffbot.com/docs) doesn't require any rules to read the content on a page.\n",
|
||||
">It starts with computer vision, which classifies a page into one of 20 possible types. Content is then interpreted by a machine learning model trained to identify the key attributes on a page based on its type.\n",
|
||||
">The result is a website transformed into clean structured data (like JSON or CSV), ready for your application.\n",
|
||||
"\n",
|
||||
"This covers how to extract HTML documents from a list of URLs using the [Diffbot extract API](https://www.diffbot.com/products/extract/), into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
@@ -24,7 +27,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "6fffec88",
|
||||
"metadata": {},
|
||||
@@ -45,7 +47,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "e0ce8c05",
|
||||
"metadata": {},
|
||||
|
||||
@@ -6,7 +6,9 @@
|
||||
"source": [
|
||||
"# Discord\n",
|
||||
"\n",
|
||||
"You can follow the below steps to download your Discord data:\n",
|
||||
">[Discord](https://discord.com/) is a VoIP and instant messaging social platform. Users have the ability to communicate with voice calls, video calls, text messaging, media and files in private chats or as part of communities called \"servers\". A server is a collection of persistent chat rooms and voice channels which can be accessed via invite links.\n",
|
||||
"\n",
|
||||
"Follow these steps to download your `Discord` data:\n",
|
||||
"\n",
|
||||
"1. Go to your **User Settings**\n",
|
||||
"2. Then go to **Privacy and Safety**\n",
|
||||
@@ -79,9 +81,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -4,15 +4,30 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DuckDB Loader\n",
|
||||
"# DuckDB\n",
|
||||
"\n",
|
||||
"Load a DuckDB query with one document per row."
|
||||
">[DuckDB](https://duckdb.org/) is an in-process SQL OLAP database management system.\n",
|
||||
"\n",
|
||||
"Load a `DuckDB` query with one document per row."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install duckdb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import DuckDBLoader"
|
||||
@@ -20,8 +35,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -40,8 +57,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = DuckDBLoader(\"SELECT * FROM read_csv_auto('example.csv')\")\n",
|
||||
@@ -51,8 +70,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -167,9 +188,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"source": [
|
||||
"# Email\n",
|
||||
"\n",
|
||||
"This notebook shows how to load email (`.eml`) and Microsoft Outlook (`.msg`) files."
|
||||
"This notebook shows how to load email (`.eml`) or `Microsoft Outlook` (`.msg`) files."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -20,9 +20,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "226e50aa-407d-43d9-a81d-f6706298b10c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install unstructured"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "40cd9806",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import UnstructuredEmailLoader"
|
||||
@@ -30,9 +44,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 6,
|
||||
"id": "2d20b852",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredEmailLoader('example_data/fake-email.eml')"
|
||||
@@ -40,9 +56,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "579fa702",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
@@ -50,17 +68,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 8,
|
||||
"id": "90c1d899",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='This is a test email to use for unit tests.\\n\\nImportant points:\\n\\nRoses are red\\n\\nViolets are blue', lookup_str='', metadata={'source': 'example_data/fake-email.eml'}, lookup_index=0)]"
|
||||
"[Document(page_content='This is a test email to use for unit tests.\\n\\nImportant points:\\n\\nRoses are red\\n\\nViolets are blue', metadata={'source': 'example_data/fake-email.eml'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -128,6 +148,16 @@
|
||||
"## Using OutlookMessageLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "058e670e-9964-44ee-b888-44f23ffb9310",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install extract_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
@@ -204,7 +234,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,16 +5,30 @@
|
||||
"id": "39af9ecd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# EPubs\n",
|
||||
"# EPub \n",
|
||||
"\n",
|
||||
"This covers how to load `.epub` documents into a document format that we can use downstream. You'll need to install the [`pandocs`](https://pandoc.org/installing.html) package for this loader to work."
|
||||
">[EPUB](https://en.wikipedia.org/wiki/EPUB) is an e-book file format that uses the \".epub\" file extension. The term is short for electronic publication and is sometimes styled ePub. `EPUB` is supported by many e-readers, and compatible software is available for most smartphones, tablets, and computers.\n",
|
||||
"\n",
|
||||
"This covers how to load `.epub` documents into the Document format that we can use downstream. You'll need to install the [`pandocs`](https://pandoc.org/installing.html) package for this loader to work."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cd1affad-8ba6-43b1-b8cd-f61f44025077",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install pandocs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "721c48aa",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import UnstructuredEPubLoader"
|
||||
@@ -24,7 +38,9 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "9d3d0e35",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredEPubLoader(\"winter-sports.epub\")"
|
||||
@@ -32,9 +48,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "06073f91",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
@@ -54,7 +72,9 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "064f9162",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredEPubLoader(\"winter-sports.epub\", mode=\"elements\")"
|
||||
@@ -62,9 +82,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"id": "abefbbdb",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
@@ -116,7 +138,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -7,35 +7,41 @@
|
||||
"source": [
|
||||
"# EverNote\n",
|
||||
"\n",
|
||||
"How to load EverNote file from disk."
|
||||
">[EverNote](https://evernote.com/) is intended for archiving and creating notes in which photos, audio and saved web content can be embedded. Notes are stored in virtual \"notebooks\" and can be tagged, annotated, edited, searched, and exported.\n",
|
||||
"\n",
|
||||
"This notebook shows how to load `EverNote` file from disk."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 3,
|
||||
"id": "1a53ece0",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install pypandoc\n",
|
||||
"# import pypandoc\n",
|
||||
"#!pip install pypandoc\n",
|
||||
"import pypandoc\n",
|
||||
"\n",
|
||||
"# pypandoc.download_pandoc()"
|
||||
"pypandoc.download_pandoc()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"id": "88df766f",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='testing this\\n\\nwhat happens?\\n\\nto the world?\\n', lookup_str='', metadata={'source': 'example_data/testing.enex'}, lookup_index=0)]"
|
||||
"[Document(page_content='testing this\\n\\nwhat happens?\\n\\nto the world?\\n', metadata={'source': 'example_data/testing.enex'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -46,14 +52,6 @@
|
||||
"loader = EverNoteLoader(\"example_data/testing.enex\")\n",
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c1329905",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -72,7 +70,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,60 +5,60 @@
|
||||
{
|
||||
"sender_name": "User 1",
|
||||
"timestamp_ms": 1675597435669,
|
||||
"content": "Oh no worries! Bye",
|
||||
"content": "Oh no worries! Bye"
|
||||
},
|
||||
{
|
||||
"sender_name": "User 2",
|
||||
"timestamp_ms": 1675596277579,
|
||||
"content": "No Im sorry it was my mistake, the blue one is not for sale",
|
||||
"content": "No Im sorry it was my mistake, the blue one is not for sale"
|
||||
},
|
||||
{
|
||||
"sender_name": "User 1",
|
||||
"timestamp_ms": 1675595140251,
|
||||
"content": "I thought you were selling the blue one!",
|
||||
"content": "I thought you were selling the blue one!"
|
||||
},
|
||||
{
|
||||
"sender_name": "User 1",
|
||||
"timestamp_ms": 1675595109305,
|
||||
"content": "Im not interested in this bag. Im interested in the blue one!",
|
||||
"content": "Im not interested in this bag. Im interested in the blue one!"
|
||||
},
|
||||
{
|
||||
"sender_name": "User 2",
|
||||
"timestamp_ms": 1675595068468,
|
||||
"content": "Here is $129",
|
||||
"content": "Here is $129"
|
||||
},
|
||||
{
|
||||
"sender_name": "User 2",
|
||||
"timestamp_ms": 1675595060730,
|
||||
"photos": [
|
||||
{"uri": "url_of_some_picture.jpg", "creation_timestamp": 1675595059}
|
||||
],
|
||||
]
|
||||
},
|
||||
{
|
||||
"sender_name": "User 2",
|
||||
"timestamp_ms": 1675595045152,
|
||||
"content": "Online is at least $100",
|
||||
"content": "Online is at least $100"
|
||||
},
|
||||
{
|
||||
"sender_name": "User 1",
|
||||
"timestamp_ms": 1675594799696,
|
||||
"content": "How much do you want?",
|
||||
"content": "How much do you want?"
|
||||
},
|
||||
{
|
||||
"sender_name": "User 2",
|
||||
"timestamp_ms": 1675577876645,
|
||||
"content": "Goodmorning! $50 is too low.",
|
||||
"content": "Goodmorning! $50 is too low."
|
||||
},
|
||||
{
|
||||
"sender_name": "User 1",
|
||||
"timestamp_ms": 1675549022673,
|
||||
"content": "Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!",
|
||||
},
|
||||
"content": "Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!"
|
||||
}
|
||||
],
|
||||
"title": "User 1 and User 2 chat",
|
||||
"is_still_participant": true,
|
||||
"thread_path": "inbox/User 1 and User 2 chat",
|
||||
"magic_words": [],
|
||||
"image": {"uri": "image_of_the_chat.jpg", "creation_timestamp": 1675549016},
|
||||
"joinable_mode": {"mode": 1, "link": ""},
|
||||
"joinable_mode": {"mode": 1, "link": ""}
|
||||
}
|
||||
|
||||
Binary file not shown.
@@ -0,0 +1,22 @@
|
||||
[internal]
|
||||
creation_date = "2023-05-01"
|
||||
updated_date = "2022-05-01"
|
||||
release = ["release_type"]
|
||||
min_endpoint_version = "some_semantic_version"
|
||||
os_list = ["operating_system_list"]
|
||||
|
||||
[rule]
|
||||
uuid = "some_uuid"
|
||||
name = "Fake Rule Name"
|
||||
description = "Fake description of rule"
|
||||
query = '''
|
||||
process where process.name : "somequery"
|
||||
'''
|
||||
|
||||
[[rule.threat]]
|
||||
framework = "MITRE ATT&CK"
|
||||
|
||||
[rule.threat.tactic]
|
||||
name = "Execution"
|
||||
id = "TA0002"
|
||||
reference = "https://attack.mitre.org/tactics/TA0002/"
|
||||
File diff suppressed because it is too large
Load Diff
@@ -6,13 +6,26 @@
|
||||
"source": [
|
||||
"### Facebook Chat\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from the Facebook Chats into a format that can be ingested into LangChain."
|
||||
">[Messenger](https://en.wikipedia.org/wiki/Messenger_(software)) is an American proprietary instant messaging app and platform developed by `Meta Platforms`. Originally developed as `Facebook Chat` in 2008, the company revamped its messaging service in 2010.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from the [Facebook Chats](https://www.facebook.com/business/help/1646890868956360) into a format that can be ingested into LangChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#pip install pandas"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import FacebookChatLoader"
|
||||
@@ -21,7 +34,9 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = FacebookChatLoader(\"example_data/facebook_chat.json\")"
|
||||
@@ -29,16 +44,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='User 2 on 2023-02-05 12:46:11: Bye!\\n\\nUser 1 on 2023-02-05 12:43:55: Oh no worries! Bye\\n\\nUser 2 on 2023-02-05 12:24:37: No Im sorry it was my mistake, the blue one is not for sale\\n\\nUser 1 on 2023-02-05 12:05:40: I thought you were selling the blue one!\\n\\nUser 1 on 2023-02-05 12:05:09: Im not interested in this bag. Im interested in the blue one!\\n\\nUser 2 on 2023-02-05 12:04:28: Here is $129\\n\\nUser 2 on 2023-02-05 12:04:05: Online is at least $100\\n\\nUser 1 on 2023-02-05 11:59:59: How much do you want?\\n\\nUser 2 on 2023-02-05 07:17:56: Goodmorning! $50 is too low.\\n\\nUser 1 on 2023-02-04 23:17:02: Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!\\n\\n', lookup_str='', metadata={'source': 'docs/modules/document_loaders/examples/example_data/facebook_chat.json'}, lookup_index=0)]"
|
||||
"[Document(page_content='User 2 on 2023-02-05 03:46:11: Bye!\\n\\nUser 1 on 2023-02-05 03:43:55: Oh no worries! Bye\\n\\nUser 2 on 2023-02-05 03:24:37: No Im sorry it was my mistake, the blue one is not for sale\\n\\nUser 1 on 2023-02-05 03:05:40: I thought you were selling the blue one!\\n\\nUser 1 on 2023-02-05 03:05:09: Im not interested in this bag. Im interested in the blue one!\\n\\nUser 2 on 2023-02-05 03:04:28: Here is $129\\n\\nUser 2 on 2023-02-05 03:04:05: Online is at least $100\\n\\nUser 1 on 2023-02-05 02:59:59: How much do you want?\\n\\nUser 2 on 2023-02-04 22:17:56: Goodmorning! $50 is too low.\\n\\nUser 1 on 2023-02-04 14:17:02: Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!\\n\\n', metadata={'source': 'example_data/facebook_chat.json'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -64,7 +81,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.1"
|
||||
"version": "3.10.6"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
@@ -73,5 +90,5 @@
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -1,21 +1,24 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "33205b12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Figma\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from the Figma REST API into a format that can be ingested into LangChain, along with example usage for code generation."
|
||||
">[Figma](https://www.figma.com/) is a collaborative web application for interface design.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from the `Figma` REST API into a format that can be ingested into LangChain, along with example usage for code generation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "90b69c94",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
@@ -37,7 +40,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "d809744a",
|
||||
"metadata": {},
|
||||
@@ -117,7 +119,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "baf9b2c9",
|
||||
"metadata": {},
|
||||
@@ -151,7 +152,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.10"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,8 +5,9 @@
|
||||
"id": "79f24a6b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Directory Loader\n",
|
||||
"This covers how to use the DirectoryLoader to load all documents in a directory. Under the hood, by default this uses the [UnstructuredLoader](./unstructured_file.ipynb)"
|
||||
"# File Directory\n",
|
||||
"\n",
|
||||
"This covers how to use the `DirectoryLoader` to load all documents in a directory. Under the hood, by default this uses the [UnstructuredLoader](./unstructured_file.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -69,7 +70,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "e633d62f",
|
||||
"metadata": {},
|
||||
@@ -78,7 +78,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "43911860",
|
||||
"metadata": {},
|
||||
@@ -119,7 +118,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Change loader class\n",
|
||||
"By default this uses the UnstructuredLoader class. However, you can change up the type of loader pretty easily."
|
||||
"By default this uses the `UnstructuredLoader` class. However, you can change up the type of loader pretty easily."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -235,7 +234,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7f6e0eae",
|
||||
"id": "6a91a0bc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
@@ -257,7 +256,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.3"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -6,7 +6,9 @@
|
||||
"source": [
|
||||
"# Git\n",
|
||||
"\n",
|
||||
"This notebook shows how to load text files from Git repository."
|
||||
">[Git](https://en.wikipedia.org/wiki/Git) is a distributed version control system that tracks changes in any set of computer files, usually used for coordinating work among programmers collaboratively developing source code during software development.\n",
|
||||
"\n",
|
||||
"This notebook shows how to load text files from `Git` repository."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -18,8 +20,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install GitPython"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from git import Repo\n",
|
||||
@@ -33,7 +48,9 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import GitLoader"
|
||||
@@ -184,9 +201,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -6,7 +6,10 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# GitBook\n",
|
||||
"How to pull page data from any GitBook."
|
||||
"\n",
|
||||
">[GitBook](https://docs.gitbook.com/) is a modern documentation platform where teams can document everything from products to internal knowledge bases and APIs.\n",
|
||||
"\n",
|
||||
"This notebook shows how to pull page data from any `GitBook`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -19,6 +22,14 @@
|
||||
"from langchain.document_loaders import GitbookLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "65d5ddce",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load from single GitBook page"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
@@ -29,14 +40,6 @@
|
||||
"loader = GitbookLoader(\"https://docs.gitbook.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "65d5ddce",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load from single GitBook page"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
@@ -178,7 +181,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -4,15 +4,31 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BigQuery Loader\n",
|
||||
"# Google BigQuery\n",
|
||||
"\n",
|
||||
"Load a BigQuery query with one document per row."
|
||||
">[Google BigQuery](https://cloud.google.com/bigquery) is a serverless and cost-effective enterprise data warehouse that works across clouds and scales with your data.\n",
|
||||
"`BigQuery` is a part of the `Google Cloud Platform`.\n",
|
||||
"\n",
|
||||
"Load a `BigQuery` query with one document per row."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install google-cloud-bigquery"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import BigQueryLoader"
|
||||
@@ -194,9 +210,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -5,19 +5,11 @@
|
||||
"id": "0ef41fd4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# GCS Directory\n",
|
||||
"# Google Cloud Storage Directory\n",
|
||||
"\n",
|
||||
"This covers how to load document objects from an Google Cloud Storage (GCS) directory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "5cfb25c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import GCSDirectoryLoader"
|
||||
">[Google Cloud Storage](https://en.wikipedia.org/wiki/Google_Cloud_Storage) is a managed service for storing unstructured data.\n",
|
||||
"\n",
|
||||
"This covers how to load document objects from an `Google Cloud Storage (GCS) directory (bucket)`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -32,6 +24,16 @@
|
||||
"# !pip install google-cloud-storage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "5cfb25c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import GCSDirectoryLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
@@ -148,7 +150,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -5,19 +5,11 @@
|
||||
"id": "0ef41fd4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# GCS File Storage\n",
|
||||
"# Google Cloud Storage File\n",
|
||||
"\n",
|
||||
"This covers how to load document objects from an Google Cloud Storage (GCS) file object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "5cfb25c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import GCSFileLoader"
|
||||
">[Google Cloud Storage](https://en.wikipedia.org/wiki/Google_Cloud_Storage) is a managed service for storing unstructured data.\n",
|
||||
"\n",
|
||||
"This covers how to load document objects from an `Google Cloud Storage (GCS) file object (blob)`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -32,6 +24,16 @@
|
||||
"# !pip install google-cloud-storage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "5cfb25c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import GCSFileLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
@@ -96,7 +98,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -6,7 +6,10 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Drive\n",
|
||||
"This notebook covers how to load documents from Google Drive. Currently, only Google Docs are supported.\n",
|
||||
"\n",
|
||||
">[Google Drive](https://en.wikipedia.org/wiki/Google_Drive) is a file storage and synchronization service developed by Google.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load documents from `Google Drive`. Currently, only `Google Docs` are supported.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
@@ -23,6 +26,16 @@
|
||||
"* Document: https://docs.google.com/document/d/1bfaMQ18_i56204VaQDVeAFpqEijJTgvurupdEDiaUQw/edit -> document id is `\"1bfaMQ18_i56204VaQDVeAFpqEijJTgvurupdEDiaUQw\"`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6e40071c-3a65-4e26-b497-3e2be0bd86b9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
@@ -80,7 +93,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -7,14 +7,18 @@
|
||||
"source": [
|
||||
"# Gutenberg\n",
|
||||
"\n",
|
||||
"This covers how to load links to Gutenberg e-books into a document format that we can use downstream."
|
||||
">[Project Gutenberg](https://www.gutenberg.org/about/) is an online library of free eBooks.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load links to `Gutenberg` e-books into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "9bfd5e46",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import GutenbergLoader"
|
||||
@@ -22,9 +26,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 2,
|
||||
"id": "700e4ef2",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GutenbergLoader('https://www.gutenberg.org/cache/epub/69972/pg69972.txt')"
|
||||
@@ -32,9 +38,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 3,
|
||||
"id": "b6f28930",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
@@ -42,21 +50,49 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"id": "7d436441",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The Project Gutenberg eBook of The changed brides, by Emma Dorothy\\r\\n\\n\\nEliza Nevitte Southworth\\r\\n\\n\\n\\r\\n\\n\\nThis eBook is for the use of anyone anywhere in the United States and\\r\\n\\n\\nmost other parts of the world at no cost and with almost no restrictions\\r\\n\\n\\nwhatsoever. You may copy it, give it away or re-u'"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data"
|
||||
"data[0].page_content[:300]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3b74d755",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"execution_count": 9,
|
||||
"id": "1481beb1-12a7-4654-9d91-bfd101109891",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'source': 'https://www.gutenberg.org/cache/epub/69972/pg69972.txt'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data[0].metadata"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -75,7 +111,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
125
docs/modules/indexes/document_loaders/examples/hacker_news.ipynb
Normal file
125
docs/modules/indexes/document_loaders/examples/hacker_news.ipynb
Normal file
@@ -0,0 +1,125 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4babfba5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Hacker News\n",
|
||||
"\n",
|
||||
">[Hacker News](https://en.wikipedia.org/wiki/Hacker_News) (sometimes abbreviated as `HN`) is a social news website focusing on computer science and entrepreneurship. It is run by the investment fund and startup incubator `Y Combinator`. In general, content that can be submitted is defined as \"anything that gratifies one's intellectual curiosity.\"\n",
|
||||
"\n",
|
||||
"This notebook covers how to pull page data and comments from [Hacker News](https://news.ycombinator.com/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "ff49b177",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import HNLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "849a8d52",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = HNLoader(\"https://news.ycombinator.com/item?id=34817881\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "c2826836",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "fefa2adc",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"delta_p_delta_x 73 days ago \\n | next [–] \\n\\nAstrophysical and cosmological simulations are often insightful. They're also very cross-disciplinary; besides the obvious astrophysics, there's networking and sysadmin, parallel computing and algorithm theory (so that the simulation programs a\""
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data[0].page_content[:300]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "938ff4ee",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'source': 'https://news.ycombinator.com/item?id=34817881',\n",
|
||||
" 'title': 'What Lights the Universe’s Standard Candles?'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data[0].metadata"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "c05c795047059754c96cf5f30fd1289e4658e92c92d00704a3cddb24e146e3ef"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,101 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4babfba5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Hacker News\n",
|
||||
"How to pull page data and comments from Hacker News"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "ff49b177",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import HNLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "849a8d52",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = HNLoader(\"https://news.ycombinator.com/item?id=34817881\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "c2826836",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "fefa2adc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content=\"delta_p_delta_x 18 hours ago \\n | next [–] \\n\\nAstrophysical and cosmological simulations are often insightful. They're also very cross-disciplinary; besides the obvious astrophysics, there's networking and sysadmin, parallel computing and algorithm theory (so that the simulation programs are actually fast but still accurate), systems design, and even a bit of graphic design for the visualisations.Some of my favourite simulation projects:- IllustrisTNG: https://www.tng-project.org/- SWIFT: https://swift.dur.ac.uk/- CO5BOLD: https://www.astro.uu.se/~bf/co5bold_main.html (which produced these animations of a red-giant star: https://www.astro.uu.se/~bf/movie/AGBmovie.html)- AbacusSummit: https://abacussummit.readthedocs.io/en/latest/And I can add the simulations in the article, too.\\n \\nreply\", lookup_str='', metadata={'source': 'https://news.ycombinator.com/item?id=34817881', 'title': 'What Lights the Universe’s Standard Candles?'}, lookup_index=0),\n",
|
||||
" Document(page_content=\"andrewflnr 19 hours ago \\n | prev | next [–] \\n\\nWhoa. I didn't know the accretion theory of Ia supernovae was dead, much less that it had been since 2011.\\n \\nreply\", lookup_str='', metadata={'source': 'https://news.ycombinator.com/item?id=34817881', 'title': 'What Lights the Universe’s Standard Candles?'}, lookup_index=0),\n",
|
||||
" Document(page_content='andreareina 18 hours ago \\n | prev | next [–] \\n\\nThis seems to be the paper https://academic.oup.com/mnras/article/517/4/5260/6779709\\n \\nreply', lookup_str='', metadata={'source': 'https://news.ycombinator.com/item?id=34817881', 'title': 'What Lights the Universe’s Standard Candles?'}, lookup_index=0),\n",
|
||||
" Document(page_content=\"andreareina 18 hours ago \\n | prev [–] \\n\\nWouldn't double detonation show up as variance in the brightness?\\n \\nreply\", lookup_str='', metadata={'source': 'https://news.ycombinator.com/item?id=34817881', 'title': 'What Lights the Universe’s Standard Candles?'}, lookup_index=0)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "938ff4ee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "c05c795047059754c96cf5f30fd1289e4658e92c92d00704a3cddb24e146e3ef"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -7,7 +7,9 @@
|
||||
"source": [
|
||||
"# HTML\n",
|
||||
"\n",
|
||||
"This covers how to load HTML documents into a document format that we can use downstream."
|
||||
">[The HyperText Markup Language or HTML](https://en.wikipedia.org/wiki/HTML) is the standard markup language for documents designed to be displayed in a web browser.\n",
|
||||
"\n",
|
||||
"This covers how to load `HTML` documents into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -48,7 +50,9 @@
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[Document(page_content='My First Heading\\n\\nMy first paragraph.', lookup_str='', metadata={'source': 'example_data/fake-content.html'}, lookup_index=0)]"
|
||||
"text/plain": [
|
||||
"[Document(page_content='My First Heading\\n\\nMy first paragraph.', lookup_str='', metadata={'source': 'example_data/fake-content.html'}, lookup_index=0)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
@@ -61,20 +65,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "00337aae",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Loading HTML with BeautifulSoup4\n",
|
||||
"\n",
|
||||
"We can also use BeautifulSoup4 to load HTML documents using the `BSHTMLLoader`. This will extract the text from the html into `page_content`, and the page title as `title` into `metadata`."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
"We can also use `BeautifulSoup4` to load HTML documents using the `BSHTMLLoader`. This will extract the text from the HTML into `page_content`, and the page title as `title` into `metadata`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 1,
|
||||
"id": "79b1bce4",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import BSHTMLLoader"
|
||||
@@ -82,13 +87,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 2,
|
||||
"id": "4be99e6c",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[Document(page_content='\\n\\nTest Title\\n\\n\\nMy First Heading\\nMy first paragraph.\\n\\n\\n', lookup_str='', metadata={'source': 'example_data/fake-content.html', 'title': 'Test Title'}, lookup_index=0)]"
|
||||
"text/plain": [
|
||||
"[Document(page_content='\\n\\nTest Title\\n\\n\\nMy First Heading\\nMy first paragraph.\\n\\n\\n', metadata={'source': 'example_data/fake-content.html', 'title': 'Test Title'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -97,19 +112,7 @@
|
||||
"loader = BSHTMLLoader(\"example_data/fake-content.html\")\n",
|
||||
"data = loader.load()\n",
|
||||
"data"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -128,7 +131,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,11 +5,13 @@
|
||||
"id": "04c9fdc5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# HuggingFace dataset loader \n",
|
||||
"# HuggingFace dataset\n",
|
||||
"\n",
|
||||
"This notebook shows how to load Hugging Face Hub datasets to LangChain.\n",
|
||||
">The [Hugging Face Hub](https://huggingface.co/docs/hub/index) is home to over 5,000 [datasets](https://huggingface.co/docs/hub/index#datasets) in more than 100 languages that can be used for a broad range of tasks across NLP, Computer Vision, and Audio. They used for a diverse range of tasks such as translation,\n",
|
||||
"automatic speech recognition, and image classification.\n",
|
||||
"\n",
|
||||
"The Hugging Face Hub hosts a large number of community-curated datasets for a diverse range of tasks such as translation, automatic speech recognition, and image classification.\n"
|
||||
"\n",
|
||||
"This notebook shows how to load `Hugging Face Hub` datasets to LangChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -212,7 +214,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -7,7 +7,7 @@
|
||||
"source": [
|
||||
"# Images\n",
|
||||
"\n",
|
||||
"This covers how to load images such as JPGs PNGs into a document format that we can use downstream."
|
||||
"This covers how to load images such as `JPG` or `PNG` into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -18,11 +18,25 @@
|
||||
"## Using Unstructured"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "db8e56db-2e66-443b-8a0b-ef69fa5fae9a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install pdfminer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "0cc0cd42",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.image import UnstructuredImageLoader"
|
||||
@@ -32,7 +46,9 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "082d557c",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredImageLoader(\"layout-parser-paper-fast.jpg\")"
|
||||
@@ -40,9 +56,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "df11c953",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
@@ -137,7 +155,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -6,7 +6,21 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Image captions\n",
|
||||
"This notebook shows how to use the ImageCaptionLoader tutorial to generate a query-able index of image captions"
|
||||
"\n",
|
||||
"By default, the loader utilizes the pre-trained [Salesforce BLIP image captioning model](https://huggingface.co/Salesforce/blip-image-captioning-base).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the `ImageCaptionLoader` to generate a query-able index of image captions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9f78585a-a2fa-4ece-834f-66692b959efb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install transformers"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -232,7 +246,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
File diff suppressed because one or more lines are too long
367
docs/modules/indexes/document_loaders/examples/json_loader.ipynb
Normal file
367
docs/modules/indexes/document_loaders/examples/json_loader.ipynb
Normal file
@@ -0,0 +1,367 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# JSON Files\n",
|
||||
"\n",
|
||||
"The `JSONLoader` uses a specified [jq schema](https://en.wikipedia.org/wiki/Jq_(programming_language)) to parse the JSON files.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the `JSONLoader` to load [JSON](https://en.wikipedia.org/wiki/JSON) files into documents. A few examples of `jq` schema extracting different parts of a JSON file are also shown.\n",
|
||||
"\n",
|
||||
"Check this [manual](https://stedolan.github.io/jq/manual/#Basicfilters) for a detailed documentation of the `jq` syntax."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install jq"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"jupyter": {
|
||||
"outputs_hidden": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import JSONLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from pathlib import Path\n",
|
||||
"from pprint import pprint\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"file_path='./example_data/facebook_chat.json'\n",
|
||||
"data = json.loads(Path(file_path).read_text())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'image': {'creation_timestamp': 1675549016, 'uri': 'image_of_the_chat.jpg'},\n",
|
||||
" 'is_still_participant': True,\n",
|
||||
" 'joinable_mode': {'link': '', 'mode': 1},\n",
|
||||
" 'magic_words': [],\n",
|
||||
" 'messages': [{'content': 'Bye!',\n",
|
||||
" 'sender_name': 'User 2',\n",
|
||||
" 'timestamp_ms': 1675597571851},\n",
|
||||
" {'content': 'Oh no worries! Bye',\n",
|
||||
" 'sender_name': 'User 1',\n",
|
||||
" 'timestamp_ms': 1675597435669},\n",
|
||||
" {'content': 'No Im sorry it was my mistake, the blue one is not '\n",
|
||||
" 'for sale',\n",
|
||||
" 'sender_name': 'User 2',\n",
|
||||
" 'timestamp_ms': 1675596277579},\n",
|
||||
" {'content': 'I thought you were selling the blue one!',\n",
|
||||
" 'sender_name': 'User 1',\n",
|
||||
" 'timestamp_ms': 1675595140251},\n",
|
||||
" {'content': 'Im not interested in this bag. Im interested in the '\n",
|
||||
" 'blue one!',\n",
|
||||
" 'sender_name': 'User 1',\n",
|
||||
" 'timestamp_ms': 1675595109305},\n",
|
||||
" {'content': 'Here is $129',\n",
|
||||
" 'sender_name': 'User 2',\n",
|
||||
" 'timestamp_ms': 1675595068468},\n",
|
||||
" {'photos': [{'creation_timestamp': 1675595059,\n",
|
||||
" 'uri': 'url_of_some_picture.jpg'}],\n",
|
||||
" 'sender_name': 'User 2',\n",
|
||||
" 'timestamp_ms': 1675595060730},\n",
|
||||
" {'content': 'Online is at least $100',\n",
|
||||
" 'sender_name': 'User 2',\n",
|
||||
" 'timestamp_ms': 1675595045152},\n",
|
||||
" {'content': 'How much do you want?',\n",
|
||||
" 'sender_name': 'User 1',\n",
|
||||
" 'timestamp_ms': 1675594799696},\n",
|
||||
" {'content': 'Goodmorning! $50 is too low.',\n",
|
||||
" 'sender_name': 'User 2',\n",
|
||||
" 'timestamp_ms': 1675577876645},\n",
|
||||
" {'content': 'Hi! Im interested in your bag. Im offering $50. Let '\n",
|
||||
" 'me know if you are interested. Thanks!',\n",
|
||||
" 'sender_name': 'User 1',\n",
|
||||
" 'timestamp_ms': 1675549022673}],\n",
|
||||
" 'participants': [{'name': 'User 1'}, {'name': 'User 2'}],\n",
|
||||
" 'thread_path': 'inbox/User 1 and User 2 chat',\n",
|
||||
" 'title': 'User 1 and User 2 chat'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pprint(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using `JSONLoader`\n",
|
||||
"\n",
|
||||
"Suppose we are interested in extracting the values under the `content` field within the `messages` key of the JSON data. This can easily be done through the `JSONLoader` as shown below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path='./example_data/facebook_chat.json',\n",
|
||||
" jq_schema='.messages[].content')\n",
|
||||
"\n",
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Document(page_content='Bye!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 1}),\n",
|
||||
" Document(page_content='Oh no worries! Bye', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 2}),\n",
|
||||
" Document(page_content='No Im sorry it was my mistake, the blue one is not for sale', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 3}),\n",
|
||||
" Document(page_content='I thought you were selling the blue one!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 4}),\n",
|
||||
" Document(page_content='Im not interested in this bag. Im interested in the blue one!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 5}),\n",
|
||||
" Document(page_content='Here is $129', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 6}),\n",
|
||||
" Document(page_content='', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 7}),\n",
|
||||
" Document(page_content='Online is at least $100', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 8}),\n",
|
||||
" Document(page_content='How much do you want?', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 9}),\n",
|
||||
" Document(page_content='Goodmorning! $50 is too low.', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 10}),\n",
|
||||
" Document(page_content='Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 11})]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pprint(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Extracting metadata\n",
|
||||
"\n",
|
||||
"Generally, we want to include metadata available in the JSON file into the documents that we create from the content.\n",
|
||||
"\n",
|
||||
"The following demonstrates how metadata can be extracted using the `JSONLoader`.\n",
|
||||
"\n",
|
||||
"There are some key changes to be noted. In the previous example where we didn't collect the metadata, we managed to directly specify in the schema where the value for the `page_content` can be extracted from.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
".messages[].content\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"In the current example, we have to tell the loader to iterate over the records in the `messages` field. The jq_schema then has to be:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
".messages[]\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This allows us to pass the records (dict) into the `metadata_func` that has to be implemented. The `metadata_func` is responsible for identifying which pieces of information in the record should be included in the metadata stored in the final `Document` object.\n",
|
||||
"\n",
|
||||
"Additionally, we now have to explicitly specify in the loader, via the `content_key` argument, the key from the record where the value for the `page_content` needs to be extracted from."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define the metadata extraction function.\n",
|
||||
"def metadata_func(record: dict, metadata: dict) -> dict:\n",
|
||||
"\n",
|
||||
" metadata[\"sender_name\"] = record.get(\"sender_name\")\n",
|
||||
" metadata[\"timestamp_ms\"] = record.get(\"timestamp_ms\")\n",
|
||||
"\n",
|
||||
" return metadata\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path='./example_data/facebook_chat.json',\n",
|
||||
" jq_schema='.messages[]',\n",
|
||||
" content_key=\"content\",\n",
|
||||
" metadata_func=metadata_func\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Document(page_content='Bye!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 1, 'sender_name': 'User 2', 'timestamp_ms': 1675597571851}),\n",
|
||||
" Document(page_content='Oh no worries! Bye', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 2, 'sender_name': 'User 1', 'timestamp_ms': 1675597435669}),\n",
|
||||
" Document(page_content='No Im sorry it was my mistake, the blue one is not for sale', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 3, 'sender_name': 'User 2', 'timestamp_ms': 1675596277579}),\n",
|
||||
" Document(page_content='I thought you were selling the blue one!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 4, 'sender_name': 'User 1', 'timestamp_ms': 1675595140251}),\n",
|
||||
" Document(page_content='Im not interested in this bag. Im interested in the blue one!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 5, 'sender_name': 'User 1', 'timestamp_ms': 1675595109305}),\n",
|
||||
" Document(page_content='Here is $129', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 6, 'sender_name': 'User 2', 'timestamp_ms': 1675595068468}),\n",
|
||||
" Document(page_content='', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 7, 'sender_name': 'User 2', 'timestamp_ms': 1675595060730}),\n",
|
||||
" Document(page_content='Online is at least $100', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 8, 'sender_name': 'User 2', 'timestamp_ms': 1675595045152}),\n",
|
||||
" Document(page_content='How much do you want?', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 9, 'sender_name': 'User 1', 'timestamp_ms': 1675594799696}),\n",
|
||||
" Document(page_content='Goodmorning! $50 is too low.', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 10, 'sender_name': 'User 2', 'timestamp_ms': 1675577876645}),\n",
|
||||
" Document(page_content='Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!', metadata={'source': '/Users/avsolatorio/WBG/langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 11, 'sender_name': 'User 1', 'timestamp_ms': 1675549022673})]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pprint(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, you will see that the documents contain the metadata associated with the content we extracted."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## The `metadata_func`\n",
|
||||
"\n",
|
||||
"As shown above, the `metadata_func` accepts the default metadata generated by the `JSONLoader`. This allows full control to the user with respect to how the metadata is formatted.\n",
|
||||
"\n",
|
||||
"For example, the default metadata contains the `source` and the `seq_num` keys. However, it is possible that the JSON data contain these keys as well. The user can then exploit the `metadata_func` to rename the default keys and use the ones from the JSON data.\n",
|
||||
"\n",
|
||||
"The example below shows how we can modify the `source` to only contain information of the file source relative to the `langchain` directory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define the metadata extraction function.\n",
|
||||
"def metadata_func(record: dict, metadata: dict) -> dict:\n",
|
||||
"\n",
|
||||
" metadata[\"sender_name\"] = record.get(\"sender_name\")\n",
|
||||
" metadata[\"timestamp_ms\"] = record.get(\"timestamp_ms\")\n",
|
||||
" \n",
|
||||
" if \"source\" in metadata:\n",
|
||||
" source = metadata[\"source\"].split(\"/\")\n",
|
||||
" source = source[source.index(\"langchain\"):]\n",
|
||||
" metadata[\"source\"] = \"/\".join(source)\n",
|
||||
"\n",
|
||||
" return metadata\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path='./example_data/facebook_chat.json',\n",
|
||||
" jq_schema='.messages[]',\n",
|
||||
" content_key=\"content\",\n",
|
||||
" metadata_func=metadata_func\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[Document(page_content='Bye!', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 1, 'sender_name': 'User 2', 'timestamp_ms': 1675597571851}),\n",
|
||||
" Document(page_content='Oh no worries! Bye', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 2, 'sender_name': 'User 1', 'timestamp_ms': 1675597435669}),\n",
|
||||
" Document(page_content='No Im sorry it was my mistake, the blue one is not for sale', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 3, 'sender_name': 'User 2', 'timestamp_ms': 1675596277579}),\n",
|
||||
" Document(page_content='I thought you were selling the blue one!', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 4, 'sender_name': 'User 1', 'timestamp_ms': 1675595140251}),\n",
|
||||
" Document(page_content='Im not interested in this bag. Im interested in the blue one!', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 5, 'sender_name': 'User 1', 'timestamp_ms': 1675595109305}),\n",
|
||||
" Document(page_content='Here is $129', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 6, 'sender_name': 'User 2', 'timestamp_ms': 1675595068468}),\n",
|
||||
" Document(page_content='', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 7, 'sender_name': 'User 2', 'timestamp_ms': 1675595060730}),\n",
|
||||
" Document(page_content='Online is at least $100', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 8, 'sender_name': 'User 2', 'timestamp_ms': 1675595045152}),\n",
|
||||
" Document(page_content='How much do you want?', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 9, 'sender_name': 'User 1', 'timestamp_ms': 1675594799696}),\n",
|
||||
" Document(page_content='Goodmorning! $50 is too low.', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 10, 'sender_name': 'User 2', 'timestamp_ms': 1675577876645}),\n",
|
||||
" Document(page_content='Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!', metadata={'source': 'langchain/docs/modules/indexes/document_loaders/examples/example_data/facebook_chat.json', 'seq_num': 11, 'sender_name': 'User 1', 'timestamp_ms': 1675549022673})]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pprint(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Common JSON structures with jq schema\n",
|
||||
"\n",
|
||||
"The list below provides a reference to the possible `jq_schema` the user can use to extract content from the JSON data depending on the structure.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"JSON -> [{\"text\": ...}, {\"text\": ...}, {\"text\": ...}]\n",
|
||||
"jq_schema -> \".[].text\"\n",
|
||||
" \n",
|
||||
"JSON -> {\"key\": [{\"text\": ...}, {\"text\": ...}, {\"text\": ...}]}\n",
|
||||
"jq_schema -> \".key[].text\"\n",
|
||||
"\n",
|
||||
"JSON -> [\"...\", \"...\", \"...\"]\n",
|
||||
"jq_schema -> \".[]\"\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -4,15 +4,19 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Notebook\n",
|
||||
"# Jupyter Notebook\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from an .ipynb notebook into a format suitable by LangChain."
|
||||
">[Jupyter Notebook](https://en.wikipedia.org/wiki/Project_Jupyter#Applications) (formerly `IPython Notebook`) is a web-based interactive computational environment for creating notebook documents.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from a `Jupyter notebook (.ipynb)` into a format suitable by LangChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import NotebookLoader"
|
||||
@@ -20,8 +24,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = NotebookLoader(\"example_data/notebook.ipynb\", include_outputs=True, max_output_length=20, remove_newline=True)"
|
||||
@@ -43,16 +49,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='\\'markdown\\' cell: \\'[\\'# Notebook\\', \\'\\', \\'This notebook covers how to load data from an .ipynb notebook into a format suitable by LangChain.\\']\\'\\n\\n \\'code\\' cell: \\'[\\'from langchain.document_loaders import NotebookLoader\\']\\'\\n\\n \\'code\\' cell: \\'[\\'loader = NotebookLoader(\"example_data/notebook.ipynb\")\\']\\'\\n\\n \\'markdown\\' cell: \\'[\\'`NotebookLoader.load()` loads the `.ipynb` notebook file into a `Document` object.\\', \\'\\', \\'**Parameters**:\\', \\'\\', \\'* `include_outputs` (bool): whether to include cell outputs in the resulting document (default is False).\\', \\'* `max_output_length` (int): the maximum number of characters to include from each cell output (default is 10).\\', \\'* `remove_newline` (bool): whether to remove newline characters from the cell sources and outputs (default is False).\\', \\'* `traceback` (bool): whether to include full traceback (default is False).\\']\\'\\n\\n \\'code\\' cell: \\'[\\'loader.load(include_outputs=True, max_output_length=20, remove_newline=True)\\']\\'\\n\\n', lookup_str='', metadata={'source': 'example_data/notebook.ipynb'}, lookup_index=0)]"
|
||||
"[Document(page_content='\\'markdown\\' cell: \\'[\\'# Notebook\\', \\'\\', \\'This notebook covers how to load data from an .ipynb notebook into a format suitable by LangChain.\\']\\'\\n\\n \\'code\\' cell: \\'[\\'from langchain.document_loaders import NotebookLoader\\']\\'\\n\\n \\'code\\' cell: \\'[\\'loader = NotebookLoader(\"example_data/notebook.ipynb\")\\']\\'\\n\\n \\'markdown\\' cell: \\'[\\'`NotebookLoader.load()` loads the `.ipynb` notebook file into a `Document` object.\\', \\'\\', \\'**Parameters**:\\', \\'\\', \\'* `include_outputs` (bool): whether to include cell outputs in the resulting document (default is False).\\', \\'* `max_output_length` (int): the maximum number of characters to include from each cell output (default is 10).\\', \\'* `remove_newline` (bool): whether to remove newline characters from the cell sources and outputs (default is False).\\', \\'* `traceback` (bool): whether to include full traceback (default is False).\\']\\'\\n\\n \\'code\\' cell: \\'[\\'loader.load(include_outputs=True, max_output_length=20, remove_newline=True)\\']\\'\\n\\n', metadata={'source': 'example_data/notebook.ipynb'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -60,13 +68,6 @@
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -85,7 +86,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
@@ -94,5 +95,5 @@
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -7,34 +7,53 @@
|
||||
"source": [
|
||||
"# Markdown\n",
|
||||
"\n",
|
||||
"This covers how to load markdown documents into a document format that we can use downstream."
|
||||
">[Markdown](https://en.wikipedia.org/wiki/Markdown) is a lightweight markup language for creating formatted text using a plain-text editor.\n",
|
||||
"\n",
|
||||
"This covers how to load `markdown` documents into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "721c48aa",
|
||||
"id": "5282f85c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install unstructured > /dev/null"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "721c48aa",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import UnstructuredMarkdownLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "9d3d0e35",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredMarkdownLoader(\"../../../../README.md\")"
|
||||
"markdown_path = \"../../../../../README.md\"\n",
|
||||
"loader = UnstructuredMarkdownLoader(markdown_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "06073f91",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
@@ -42,17 +61,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "c9adc5cb",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content=\"ð\\x9f¦\\x9cï¸\\x8fð\\x9f”\\x97 LangChain\\n\\nâ\\x9a¡ Building applications with LLMs through composability â\\x9a¡\\n\\nProduction Support: As you move your LangChains into production, we'd love to offer more comprehensive support.\\nPlease fill out this form and we'll set up a dedicated support Slack channel.\\n\\nQuick Install\\n\\npip install langchain\\n\\nð\\x9f¤” What is this?\\n\\nLarge language models (LLMs) are emerging as a transformative technology, enabling\\ndevelopers to build applications that they previously could not.\\nBut using these LLMs in isolation is often not enough to\\ncreate a truly powerful app - the real power comes when you can combine them with other sources of computation or knowledge.\\n\\nThis library is aimed at assisting in the development of those types of applications. Common examples of these types of applications include:\\n\\nâ\\x9d“ Question Answering over specific documents\\n\\nDocumentation\\n\\nEnd-to-end Example: Question Answering over Notion Database\\n\\nð\\x9f’¬ Chatbots\\n\\nDocumentation\\n\\nEnd-to-end Example: Chat-LangChain\\n\\nð\\x9f¤\\x96 Agents\\n\\nDocumentation\\n\\nEnd-to-end Example: GPT+WolframAlpha\\n\\nð\\x9f“\\x96 Documentation\\n\\nPlease see here for full documentation on:\\n\\nGetting started (installation, setting up the environment, simple examples)\\n\\nHow-To examples (demos, integrations, helper functions)\\n\\nReference (full API docs)\\n Resources (high-level explanation of core concepts)\\n\\nð\\x9f\\x9a\\x80 What can this help with?\\n\\nThere are six main areas that LangChain is designed to help with.\\nThese are, in increasing order of complexity:\\n\\nð\\x9f“\\x83 LLMs and Prompts:\\n\\nThis includes prompt management, prompt optimization, generic interface for all LLMs, and common utilities for working with LLMs.\\n\\nð\\x9f”\\x97 Chains:\\n\\nChains go beyond just a single LLM call, and are sequences of calls (whether to an LLM or a different utility). LangChain provides a standard interface for chains, lots of integrations with other tools, and end-to-end chains for common applications.\\n\\nð\\x9f“\\x9a Data Augmented Generation:\\n\\nData Augmented Generation involves specific types of chains that first interact with an external datasource to fetch data to use in the generation step. Examples of this include summarization of long pieces of text and question/answering over specific data sources.\\n\\nð\\x9f¤\\x96 Agents:\\n\\nAgents involve an LLM making decisions about which Actions to take, taking that Action, seeing an Observation, and repeating that until done. LangChain provides a standard interface for agents, a selection of agents to choose from, and examples of end to end agents.\\n\\nð\\x9f§\\xa0 Memory:\\n\\nMemory is the concept of persisting state between calls of a chain/agent. LangChain provides a standard interface for memory, a collection of memory implementations, and examples of chains/agents that use memory.\\n\\nð\\x9f§\\x90 Evaluation:\\n\\n[BETA] Generative models are notoriously hard to evaluate with traditional metrics. One new way of evaluating them is using language models themselves to do the evaluation. LangChain provides some prompts/chains for assisting in this.\\n\\nFor more information on these concepts, please see our full documentation.\\n\\nð\\x9f’\\x81 Contributing\\n\\nAs an open source project in a rapidly developing field, we are extremely open to contributions, whether it be in the form of a new feature, improved infra, or better documentation.\\n\\nFor detailed information on how to contribute, see here.\", lookup_str='', metadata={'source': '../../../../README.md'}, lookup_index=0)]"
|
||||
"[Document(page_content=\"ð\\x9f¦\\x9cï¸\\x8fð\\x9f”\\x97 LangChain\\n\\nâ\\x9a¡ Building applications with LLMs through composability â\\x9a¡\\n\\nLooking for the JS/TS version? Check out LangChain.js.\\n\\nProduction Support: As you move your LangChains into production, we'd love to offer more comprehensive support.\\nPlease fill out this form and we'll set up a dedicated support Slack channel.\\n\\nQuick Install\\n\\npip install langchain\\nor\\nconda install langchain -c conda-forge\\n\\nð\\x9f¤” What is this?\\n\\nLarge language models (LLMs) are emerging as a transformative technology, enabling developers to build applications that they previously could not. However, using these LLMs in isolation is often insufficient for creating a truly powerful app - the real power comes when you can combine them with other sources of computation or knowledge.\\n\\nThis library aims to assist in the development of those types of applications. Common examples of these applications include:\\n\\nâ\\x9d“ Question Answering over specific documents\\n\\nDocumentation\\n\\nEnd-to-end Example: Question Answering over Notion Database\\n\\nð\\x9f’¬ Chatbots\\n\\nDocumentation\\n\\nEnd-to-end Example: Chat-LangChain\\n\\nð\\x9f¤\\x96 Agents\\n\\nDocumentation\\n\\nEnd-to-end Example: GPT+WolframAlpha\\n\\nð\\x9f“\\x96 Documentation\\n\\nPlease see here for full documentation on:\\n\\nGetting started (installation, setting up the environment, simple examples)\\n\\nHow-To examples (demos, integrations, helper functions)\\n\\nReference (full API docs)\\n\\nResources (high-level explanation of core concepts)\\n\\nð\\x9f\\x9a\\x80 What can this help with?\\n\\nThere are six main areas that LangChain is designed to help with.\\nThese are, in increasing order of complexity:\\n\\nð\\x9f“\\x83 LLMs and Prompts:\\n\\nThis includes prompt management, prompt optimization, a generic interface for all LLMs, and common utilities for working with LLMs.\\n\\nð\\x9f”\\x97 Chains:\\n\\nChains go beyond a single LLM call and involve sequences of calls (whether to an LLM or a different utility). LangChain provides a standard interface for chains, lots of integrations with other tools, and end-to-end chains for common applications.\\n\\nð\\x9f“\\x9a Data Augmented Generation:\\n\\nData Augmented Generation involves specific types of chains that first interact with an external data source to fetch data for use in the generation step. Examples include summarization of long pieces of text and question/answering over specific data sources.\\n\\nð\\x9f¤\\x96 Agents:\\n\\nAgents involve an LLM making decisions about which Actions to take, taking that Action, seeing an Observation, and repeating that until done. LangChain provides a standard interface for agents, a selection of agents to choose from, and examples of end-to-end agents.\\n\\nð\\x9f§\\xa0 Memory:\\n\\nMemory refers to persisting state between calls of a chain/agent. LangChain provides a standard interface for memory, a collection of memory implementations, and examples of chains/agents that use memory.\\n\\nð\\x9f§\\x90 Evaluation:\\n\\n[BETA] Generative models are notoriously hard to evaluate with traditional metrics. One new way of evaluating them is using language models themselves to do the evaluation. LangChain provides some prompts/chains for assisting in this.\\n\\nFor more information on these concepts, please see our full documentation.\\n\\nð\\x9f’\\x81 Contributing\\n\\nAs an open-source project in a rapidly developing field, we are extremely open to contributions, whether it be in the form of a new feature, improved infrastructure, or better documentation.\\n\\nFor detailed information on how to contribute, see here.\", metadata={'source': '../../../../../README.md'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -73,19 +94,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "064f9162",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredMarkdownLoader(\"../../../../README.md\", mode=\"elements\")"
|
||||
"loader = UnstructuredMarkdownLoader(markdown_path, mode=\"elements\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "abefbbdb",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
@@ -93,17 +118,19 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"id": "a547c534",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='ð\\x9f¦\\x9cï¸\\x8fð\\x9f”\\x97 LangChain', lookup_str='', metadata={'source': '../../../../README.md', 'page_number': 1, 'category': 'UncategorizedText'}, lookup_index=0)"
|
||||
"Document(page_content='ð\\x9f¦\\x9cï¸\\x8fð\\x9f”\\x97 LangChain', metadata={'source': '../../../../../README.md', 'page_number': 1, 'category': 'Title'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -111,14 +138,6 @@
|
||||
"source": [
|
||||
"data[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "381d4139",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -137,7 +156,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -0,0 +1,124 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MediaWikiDump\n",
|
||||
"\n",
|
||||
">[MediaWiki XML Dumps](https://www.mediawiki.org/wiki/Manual:Importing_XML_dumps) contain the content of a wiki (wiki pages with all their revisions), without the site-related data. A XML dump does not create a full backup of the wiki database, the dump does not contain user accounts, images, edit logs, etc.\n",
|
||||
"\n",
|
||||
"This covers how to load a MediaWiki XML dump file into a document format that we can use downstream.\n",
|
||||
"\n",
|
||||
"It uses `mwxml` from `mediawiki-utilities` to dump and `mwparserfromhell` from `earwig` to parse MediaWiki wikicode.\n",
|
||||
"\n",
|
||||
"Dump files can be obtained with dumpBackup.php or on the Special:Statistics page of the Wiki."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "IXigDil0pANf"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#mediawiki-utilities supports XML schema 0.11 in unmerged branches\n",
|
||||
"!pip install -qU git+https://github.com/mediawiki-utilities/python-mwtypes@updates_schema_0.11\n",
|
||||
"#mediawiki-utilities mwxml has a bug, fix PR pending\n",
|
||||
"!pip install -qU git+https://github.com/gdedrouas/python-mwxml@xml_format_0.11\n",
|
||||
"!pip install -qU mwparserfromhell"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"id": "8-vB5XGHsE85"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import MWDumpLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"id": "i6e42MSkqEeH"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"You have 177 document(s) in your data \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = MWDumpLoader(\"example_data/testmw_pages_current.xml\", encoding=\"utf8\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"print (f'You have {len(documents)} document(s) in your data ')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"id": "C2qbBVrjFK_H"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='\\t\\n\\t\\n\\tArtist\\n\\tReleased\\n\\tRecorded\\n\\tLength\\n\\tLabel\\n\\tProducer', metadata={'source': 'Album'}),\n",
|
||||
" Document(page_content='{| class=\"article-table plainlinks\" style=\"width:100%;\"\\n|- style=\"font-size:18px;\"\\n! style=\"padding:0px;\" | Template documentation\\n|-\\n| Note: portions of the template sample may not be visible without values provided.\\n|-\\n| View or edit this documentation. (About template documentation)\\n|-\\n| Editors can experiment in this template\\'s [ sandbox] and [ test case] pages.\\n|}Category:Documentation templates', metadata={'source': 'Documentation'}),\n",
|
||||
" Document(page_content='Description\\nThis template is used to insert descriptions on template pages.\\n\\nSyntax\\nAdd <noinclude></noinclude> at the end of the template page.\\n\\nAdd <noinclude></noinclude> to transclude an alternative page from the /doc subpage.\\n\\nUsage\\n\\nOn the Template page\\nThis is the normal format when used:\\n\\nTEMPLATE CODE\\n<includeonly>Any categories to be inserted into articles by the template</includeonly>\\n<noinclude>{{Documentation}}</noinclude>\\n\\nIf your template is not a completed div or table, you may need to close the tags just before {{Documentation}} is inserted (within the noinclude tags).\\n\\nA line break right before {{Documentation}} can also be useful as it helps prevent the documentation template \"running into\" previous code.\\n\\nOn the documentation page\\nThe documentation page is usually located on the /doc subpage for a template, but a different page can be specified with the first parameter of the template (see Syntax).\\n\\nNormally, you will want to write something like the following on the documentation page:\\n\\n==Description==\\nThis template is used to do something.\\n\\n==Syntax==\\nType <code>{{t|templatename}}</code> somewhere.\\n\\n==Samples==\\n<code><nowiki>{{templatename|input}}</nowiki></code> \\n\\nresults in...\\n\\n{{templatename|input}}\\n\\n<includeonly>Any categories for the template itself</includeonly>\\n<noinclude>[[Category:Template documentation]]</noinclude>\\n\\nUse any or all of the above description/syntax/sample output sections. You may also want to add \"see also\" or other sections.\\n\\nNote that the above example also uses the Template:T template.\\n\\nCategory:Documentation templatesCategory:Template documentation', metadata={'source': 'Documentation/doc'}),\n",
|
||||
" Document(page_content='Description\\nA template link with a variable number of parameters (0-20).\\n\\nSyntax\\n \\n\\nSource\\nImproved version not needing t/piece subtemplate developed on Templates wiki see the list of authors. Copied here via CC-By-SA 3.0 license.\\n\\nExample\\n\\nCategory:General wiki templates\\nCategory:Template documentation', metadata={'source': 'T/doc'}),\n",
|
||||
" Document(page_content='\\t\\n\\t\\t \\n\\t\\n\\t\\t Aliases\\n\\t Relatives\\n\\t Affiliation\\n Occupation\\n \\n Biographical information\\n Marital status\\n \\tDate of birth\\n Place of birth\\n Date of death\\n Place of death\\n \\n Physical description\\n Species\\n Gender\\n Height\\n Weight\\n Eye color\\n\\t\\n Appearances\\n Portrayed by\\n Appears in\\n Debut\\n ', metadata={'source': 'Character'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"documents[:5]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": [],
|
||||
"toc_visible": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,112 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Microsoft OneDrive\n",
|
||||
"\n",
|
||||
">[Microsoft OneDrive](https://en.wikipedia.org/wiki/OneDrive) (formerly `SkyDrive`) is a file hosting service operated by Microsoft.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load documents from `OneDrive`. Currently, only docx, doc, and pdf files are supported.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"1. Register an application with the [Microsoft identity platform](https://learn.microsoft.com/en-us/azure/active-directory/develop/quickstart-register-app) instructions.\n",
|
||||
"2. When registration finishes, the Azure portal displays the app registration's Overview pane. You see the Application (client) ID. Also called the `client ID`, this value uniquely identifies your application in the Microsoft identity platform.\n",
|
||||
"3. During the steps you will be following at **item 1**, you can set the redirect URI as `http://localhost:8000/callback`\n",
|
||||
"4. During the steps you will be following at **item 1**, generate a new password (`client_secret`) under Application Secrets section.\n",
|
||||
"5. Follow the instructions at this [document](https://learn.microsoft.com/en-us/azure/active-directory/develop/quickstart-configure-app-expose-web-apis#add-a-scope) to add the following `SCOPES` (`offline_access` and `Files.Read.All`) to your application.\n",
|
||||
"6. Visit the [Graph Explorer Playground](https://developer.microsoft.com/en-us/graph/graph-explorer) to obtain your `OneDrive ID`. The first step is to ensure you are logged in with the account associated your OneDrive account. Then you need to make a request to `https://graph.microsoft.com/v1.0/me/drive` and the response will return a payload with a field `id` that holds the ID of your OneDrive account.\n",
|
||||
"7. You need to install the o365 package using the command `pip install o365`.\n",
|
||||
"8. At the end of the steps you must have the following values: \n",
|
||||
"- `CLIENT_ID`\n",
|
||||
"- `CLIENT_SECRET`\n",
|
||||
"- `DRIVE_ID`\n",
|
||||
"\n",
|
||||
"## 🧑 Instructions for ingesting your documents from OneDrive\n",
|
||||
"\n",
|
||||
"### 🔑 Authentication\n",
|
||||
"\n",
|
||||
"By default, the `OneDriveLoader` expects that the values of `CLIENT_ID` and `CLIENT_SECRET` must be stored as environment variables named `O365_CLIENT_ID` and `O365_CLIENT_SECRET` respectively. You could pass those environment variables through a `.env` file at the root of your application or using the following command in your script.\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"os.environ['O365_CLIENT_ID'] = \"YOUR CLIENT ID\"\n",
|
||||
"os.environ['O365_CLIENT_SECRET'] = \"YOUR CLIENT SECRET\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This loader uses an authentication called [*on behalf of a user*](https://learn.microsoft.com/en-us/graph/auth-v2-user?context=graph%2Fapi%2F1.0&view=graph-rest-1.0). It is a 2 step authentication with user consent. When you instantiate the loader, it will call will print a url that the user must visit to give consent to the app on the required permissions. The user must then visit this url and give consent to the application. Then the user must copy the resulting page url and paste it back on the console. The method will then return True if the login attempt was succesful.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"from langchain.document_loaders.onedrive import OneDriveLoader\n",
|
||||
"\n",
|
||||
"loader = OneDriveLoader(drive_id=\"YOUR DRIVE ID\")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Once the authentication has been done, the loader will store a token (`o365_token.txt`) at `~/.credentials/` folder. This token could be used later to authenticate without the copy/paste steps explained earlier. To use this token for authentication, you need to change the `auth_with_token` parameter to True in the instantiation of the loader.\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"from langchain.document_loaders.onedrive import OneDriveLoader\n",
|
||||
"\n",
|
||||
"loader = OneDriveLoader(drive_id=\"YOUR DRIVE ID\", auth_with_token=True)\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"### 🗂️ Documents loader\n",
|
||||
"\n",
|
||||
"#### 📑 Loading documents from a OneDrive Directory\n",
|
||||
"\n",
|
||||
"`OneDriveLoader` can load documents from a specific folder within your OneDrive. For instance, you want to load all documents that are stored at `Documents/clients` folder within your OneDrive.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"from langchain.document_loaders.onedrive import OneDriveLoader\n",
|
||||
"\n",
|
||||
"loader = OneDriveLoader(drive_id=\"YOUR DRIVE ID\", folder_path=\"Documents/clients\", auth_with_token=True)\n",
|
||||
"documents = loader.load()\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"#### 📑 Loading documents from a list of Documents IDs\n",
|
||||
"\n",
|
||||
"Another possibility is to provide a list of `object_id` for each document you want to load. For that, you will need to query the [Microsoft Graph API](https://developer.microsoft.com/en-us/graph/graph-explorer) to find all the documents ID that you are interested in. This [link](https://learn.microsoft.com/en-us/graph/api/resources/onedrive?view=graph-rest-1.0#commonly-accessed-resources) provides a list of endpoints that will be helpful to retrieve the documents ID.\n",
|
||||
"\n",
|
||||
"For instance, to retrieve information about all objects that are stored at the root of the Documents folder, you need make a request to: `https://graph.microsoft.com/v1.0/drives/{YOUR DRIVE ID}/root/children`. Once you have the list of IDs that you are interested in, then you can instantiate the loader with the following parameters.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"from langchain.document_loaders.onedrive import OneDriveLoader\n",
|
||||
"\n",
|
||||
"loader = OneDriveLoader(drive_id=\"YOUR DRIVE ID\", object_ids=[\"ID_1\", \"ID_2\"], auth_with_token=True)\n",
|
||||
"documents = loader.load()\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -5,16 +5,20 @@
|
||||
"id": "39af9ecd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PowerPoint\n",
|
||||
"# Microsoft PowerPoint\n",
|
||||
"\n",
|
||||
"This covers how to load PowerPoint documents into a document format that we can use downstream."
|
||||
">[Microsoft PowerPoint](https://en.wikipedia.org/wiki/Microsoft_PowerPoint) is a presentation program by Microsoft.\n",
|
||||
"\n",
|
||||
"This covers how to load `Microsoft PowerPoint` documents into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "721c48aa",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import UnstructuredPowerPointLoader"
|
||||
@@ -24,7 +28,9 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "9d3d0e35",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = UnstructuredPowerPointLoader(\"example_data/fake-power-point.pptx\")"
|
||||
@@ -34,7 +40,9 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "06073f91",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
@@ -44,12 +52,14 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "c9adc5cb",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Adding a Bullet Slide\\n\\nFind the bullet slide layout\\n\\nUse _TextFrame.text for first bullet\\n\\nUse _TextFrame.add_paragraph() for subsequent bullets\\n\\nHere is a lot of text!\\n\\nHere is some text in a text box!', lookup_str='', metadata={'source': 'example_data/fake-power-point.pptx'}, lookup_index=0)]"
|
||||
"[Document(page_content='Adding a Bullet Slide\\n\\nFind the bullet slide layout\\n\\nUse _TextFrame.text for first bullet\\n\\nUse _TextFrame.add_paragraph() for subsequent bullets\\n\\nHere is a lot of text!\\n\\nHere is some text in a text box!', metadata={'source': 'example_data/fake-power-point.pptx'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
@@ -68,7 +78,7 @@
|
||||
"source": [
|
||||
"## Retain Elements\n",
|
||||
"\n",
|
||||
"Under the hood, Unstructured creates different \"elements\" for different chunks of text. By default we combine those together, but you can easily keep that separation by specifying `mode=\"elements\"`."
|
||||
"Under the hood, `Unstructured` creates different \"elements\" for different chunks of text. By default we combine those together, but you can easily keep that separation by specifying `mode=\"elements\"`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -137,7 +147,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -5,9 +5,11 @@
|
||||
"id": "39af9ecd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Word Documents\n",
|
||||
"# Microsoft Word\n",
|
||||
"\n",
|
||||
"This covers how to load Word documents into a document format that we can use downstream."
|
||||
">[Microsoft Word](https://www.microsoft.com/en-us/microsoft-365/word) is a word processor developed by Microsoft.\n",
|
||||
"\n",
|
||||
"This covers how to load `Word` documents into a document format that we can use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -198,7 +200,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -6,7 +6,12 @@
|
||||
"source": [
|
||||
"# Modern Treasury\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from the Modern Treasury REST API into a format that can be ingested into LangChain, along with example usage for vectorization."
|
||||
">[Modern Treasury](https://www.moderntreasury.com/) simplifies complex payment operations. It is a unified platform to power products and processes that move money.\n",
|
||||
">- Connect to banks and payment systems\n",
|
||||
">- Track transactions and balances in real-time\n",
|
||||
">- Automate payment operations for scale\n",
|
||||
"\n",
|
||||
"This notebook covers how to load data from the `Modern Treasury REST API` into a format that can be ingested into LangChain, along with example usage for vectorization."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -98,9 +103,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -5,7 +5,10 @@
|
||||
"id": "1dc7df1d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Notion\n",
|
||||
"# Notion DB 1/2\n",
|
||||
"\n",
|
||||
">[Notion](https://www.notion.so/) is a collaboration platform with modified Markdown support that integrates kanban boards, tasks, wikis and databases. It is an all-in-one workspace for notetaking, knowledge and data management, and project and task management.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load documents from a Notion database dump.\n",
|
||||
"\n",
|
||||
"In order to get this notion dump, follow these instructions:\n",
|
||||
@@ -74,7 +77,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -5,13 +5,15 @@
|
||||
"id": "1dc7df1d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Notion DB Loader\n",
|
||||
"# Notion DB 2/2\n",
|
||||
"\n",
|
||||
"NotionDBLoader is a Python class for loading content from a Notion database. It retrieves pages from the database, reads their content, and returns a list of Document objects.\n",
|
||||
">[Notion](https://www.notion.so/) is a collaboration platform with modified Markdown support that integrates kanban boards, tasks, wikis and databases. It is an all-in-one workspace for notetaking, knowledge and data management, and project and task management.\n",
|
||||
"\n",
|
||||
"`NotionDBLoader` is a Python class for loading content from a `Notion` database. It retrieves pages from the database, reads their content, and returns a list of Document objects.\n",
|
||||
"\n",
|
||||
"## Requirements\n",
|
||||
"\n",
|
||||
"- A Notion Database\n",
|
||||
"- A `Notion` Database\n",
|
||||
"- Notion Integration Token\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
@@ -28,12 +30,12 @@
|
||||
"## 2. Create a Notion Integration\n",
|
||||
"To create a Notion Integration, follow these steps:\n",
|
||||
"\n",
|
||||
"1. Visit the (Notion Developers)[https://www.notion.com/my-integrations] page and log in with your Notion account.\n",
|
||||
"1. Visit the [Notion Developers](https://www.notion.com/my-integrations) page and log in with your Notion account.\n",
|
||||
"2. Click on the \"+ New integration\" button.\n",
|
||||
"3. Give your integration a name and choose the workspace where your database is located.\n",
|
||||
"4. Select the require capabilities, this extension only need the Read content capability\n",
|
||||
"5. Click the \"Submit\" button to create the integration.\n",
|
||||
"Once the integration is created, you'll be provided with an Integration Token (API key). Copy this token and keep it safe, as you'll need it to use the NotionDBLoader.\n",
|
||||
"Once the integration is created, you'll be provided with an `Integration Token (API key)`. Copy this token and keep it safe, as you'll need it to use the NotionDBLoader.\n",
|
||||
"\n",
|
||||
"### 3. Connect the Integration to the Database\n",
|
||||
"To connect your integration to the database, follow these steps:\n",
|
||||
@@ -97,7 +99,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = NotionDBLoader(NOTION_TOKEN, DATABASE_ID)"
|
||||
"loader = NotionDBLoader(integration_token=NOTION_TOKEN, database_id=DATABASE_ID)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -145,7 +147,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,24 +1,29 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1dc7df1d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Obsidian\n",
|
||||
"This notebook covers how to load documents from an Obsidian database.\n",
|
||||
"\n",
|
||||
"Since Obsidian is just stored on disk as a folder of Markdown files, the loader just takes a path to this directory.\n",
|
||||
">[Obsidian](https://obsidian.md/) is a powerful and extensible knowledge base\n",
|
||||
"that works on top of your local folder of plain text files.\n",
|
||||
"\n",
|
||||
"Obsidian files also sometimes contain [metadata](https://help.obsidian.md/Editing+and+formatting/Metadata) which is a YAML block at the top of the file. These values will be added to the document's metadata. (`ObsidianLoader` can also be passed a `collect_metadata=False` argument to disable this behavior.)"
|
||||
"This notebook covers how to load documents from an `Obsidian` database.\n",
|
||||
"\n",
|
||||
"Since `Obsidian` is just stored on disk as a folder of Markdown files, the loader just takes a path to this directory.\n",
|
||||
"\n",
|
||||
"`Obsidian` files also sometimes contain [metadata](https://help.obsidian.md/Editing+and+formatting/Metadata) which is a YAML block at the top of the file. These values will be added to the document's metadata. (`ObsidianLoader` can also be passed a `collect_metadata=False` argument to disable this behavior.)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "007c5cbf",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import ObsidianLoader"
|
||||
@@ -61,7 +66,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
76
docs/modules/indexes/document_loaders/examples/odt.ipynb
Normal file
76
docs/modules/indexes/document_loaders/examples/odt.ipynb
Normal file
@@ -0,0 +1,76 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "22a849cc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Unstructured ODT Loader\n",
|
||||
"\n",
|
||||
"The `UnstructuredODTLoader` can be used to load Open Office ODT files."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e6616e3a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import UnstructuredODTLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a654e4d9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='Lorem ipsum dolor sit amet.', metadata={'source': 'example_data/fake.odt', 'filename': 'example_data/fake.odt', 'category': 'Title'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = UnstructuredODTLoader(\"example_data/fake.odt\", mode=\"elements\")\n",
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9ab94bde",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -5,9 +5,19 @@
|
||||
"id": "213a38a2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DataFrame Loader\n",
|
||||
"# Pandas DataFrame\n",
|
||||
"\n",
|
||||
"This notebook goes over how to load data from a pandas dataframe"
|
||||
"This notebook goes over how to load data from a [pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/index.html) DataFrame."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f6a7a9e4-80d6-486a-b2e3-636c568aa97c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install pandas"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -210,7 +220,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -7,7 +7,9 @@
|
||||
"source": [
|
||||
"# PDF\n",
|
||||
"\n",
|
||||
"This covers how to load pdfs into a document format that we can use downstream."
|
||||
">[Portable Document Format (PDF)](https://en.wikipedia.org/wiki/PDF), standardized as ISO 32000, is a file format developed by Adobe in 1992 to present documents, including text formatting and images, in a manner independent of application software, hardware, and operating systems.\n",
|
||||
"\n",
|
||||
"This covers how to load `PDF` documents into the Document format that we use downstream."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -22,9 +24,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "ae93c9e9-3684-42ab-844c-cc7eef4eed11",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install pypdf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "c428b0c5",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PyPDFLoader\n",
|
||||
@@ -37,12 +53,14 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "d333cabb",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='LayoutParser : A Uni\\x0ced Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1( \\x00), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1Allen Institute for AI\\nshannons@allenai.org\\n2Brown University\\nruochen zhang@brown.edu\\n3Harvard University\\nfmelissadell,jacob carlson g@fas.harvard.edu\\n4University of Washington\\nbcgl@cs.washington.edu\\n5University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model con\\x0cgurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\ne\\x0borts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser , an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io .\\nKeywords: Document Image Analysis ·Deep Learning ·Layout Analysis\\n·Character Recognition ·Open Source library ·Toolkit.\\n1 Introduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classi\\x0ccation [ 11,arXiv:2103.15348v2 [cs.CV] 21 Jun 2021', lookup_str='', metadata={'source': 'example_data/layout-parser-paper.pdf', 'page': '0'}, lookup_index=0)"
|
||||
"Document(page_content='LayoutParser : A Uni\\x0ced Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1( \\x00), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1Allen Institute for AI\\nshannons@allenai.org\\n2Brown University\\nruochen zhang@brown.edu\\n3Harvard University\\nfmelissadell,jacob carlson g@fas.harvard.edu\\n4University of Washington\\nbcgl@cs.washington.edu\\n5University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model con\\x0cgurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\ne\\x0borts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser , an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io .\\nKeywords: Document Image Analysis ·Deep Learning ·Layout Analysis\\n·Character Recognition ·Open Source library ·Toolkit.\\n1 Introduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classi\\x0ccation [ 11,arXiv:2103.15348v2 [cs.CV] 21 Jun 2021', metadata={'source': 'example_data/layout-parser-paper.pdf', 'page': 0})"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
@@ -63,10 +81,43 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "87fa7b3a",
|
||||
"cell_type": "markdown",
|
||||
"id": "b334d071-3477-4788-8ff7-9670b47de082",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "95b1674b-ec06-43ed-8d3e-60be0e015aa0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"OpenAI API Key: ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "87fa7b3a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -76,72 +127,10 @@
|
||||
"Fig. 4: Illustration of (a) the original historical Japanese document with layout\n",
|
||||
"detection results and (b) a recreated version of the document image that achieves\n",
|
||||
"much better character recognition recall. The reorganization algorithm rearranges\n",
|
||||
"the tokens based on the their detected bounding boxes given a maximum allowed\n",
|
||||
"height.\n",
|
||||
"4LayoutParser Community Platform\n",
|
||||
"Another focus of LayoutParser is promoting the reusability of layout detection\n",
|
||||
"models and full digitization pipelines. Similar to many existing deep learning\n",
|
||||
"libraries, LayoutParser comes with a community model hub for distributing\n",
|
||||
"layout models. End-users can upload their self-trained models to the model hub,\n",
|
||||
"and these models can be loaded into a similar interface as the currently available\n",
|
||||
"LayoutParser pre-trained models. For example, the model trained on the News\n",
|
||||
"Navigator dataset [17] has been incorporated in the model hub.\n",
|
||||
"Beyond DL models, LayoutParser also promotes the sharing of entire doc-\n",
|
||||
"ument digitization pipelines. For example, sometimes the pipeline requires the\n",
|
||||
"combination of multiple DL models to achieve better accuracy. Currently, pipelines\n",
|
||||
"are mainly described in academic papers and implementations are often not pub-\n",
|
||||
"licly available. To this end, the LayoutParser community platform also enables\n",
|
||||
"the sharing of layout pipelines to promote the discussion and reuse of techniques.\n",
|
||||
"For each shared pipeline, it has a dedicated project page, with links to the source\n",
|
||||
"code, documentation, and an outline of the approaches. A discussion panel is\n",
|
||||
"provided for exchanging ideas. Combined with the core LayoutParser library,\n",
|
||||
"users can easily build reusable components based on the shared pipelines and\n",
|
||||
"apply them to solve their unique problems.\n",
|
||||
"5 Use Cases\n",
|
||||
"The core objective of LayoutParser is to make it easier to create both large-scale\n",
|
||||
"and light-weight document digitization pipelines. Large-scale document processing\n",
|
||||
"the tokens based on the their detect\n",
|
||||
"3: 4 Z. Shen et al.\n",
|
||||
"Efficient Data AnnotationC u s t o m i z e d M o d e l T r a i n i n gModel Cust omizationDI A Model HubDI A Pipeline SharingCommunity PlatformLa y out Detection ModelsDocument Images \n",
|
||||
"T h e C o r e L a y o u t P a r s e r L i b r a r yOCR ModuleSt or age & VisualizationLa y out Data Structur e\n",
|
||||
"Fig. 1: The overall architecture of LayoutParser . For an input document image,\n",
|
||||
"the core LayoutParser library provides a set of o\u000b",
|
||||
"\n",
|
||||
"-the-shelf tools for layout\n",
|
||||
"detection, OCR, visualization, and storage, backed by a carefully designed layout\n",
|
||||
"data structure. LayoutParser also supports high level customization via e\u000ecient\n",
|
||||
"layout annotation and model training functions. These improve model accuracy\n",
|
||||
"on the target samples. The community platform enables the easy sharing of DIA\n",
|
||||
"models and whole digitization pipelines to promote reusability and reproducibility.\n",
|
||||
"A collection of detailed documentation, tutorials and exemplar projects make\n",
|
||||
"LayoutParser easy to learn and use.\n",
|
||||
"AllenNLP [ 8] and transformers [ 34] have provided the community with complete\n",
|
||||
"DL-based support for developing and deploying models for general computer\n",
|
||||
"vision and natural language processing problems. LayoutParser , on the other\n",
|
||||
"hand, specializes speci\f",
|
||||
"\n",
|
||||
"cally in DIA tasks. LayoutParser is also equipped with a\n",
|
||||
"community platform inspired by established model hubs such as Torch Hub [23]\n",
|
||||
"andTensorFlow Hub [1]. It enables the sharing of pretrained models as well as\n",
|
||||
"full document processing pipelines that are unique to DIA tasks.\n",
|
||||
"There have been a variety of document data collections to facilitate the\n",
|
||||
"development of DL models. Some examples include PRImA [ 3](magazine layouts),\n",
|
||||
"PubLayNet [ 38](academic paper layouts), Table Bank [ 18](tables in academic\n",
|
||||
"papers), Newspaper Navigator Dataset [ 16,17](newspaper \f",
|
||||
"\n",
|
||||
"gure layouts) and\n",
|
||||
"HJDataset [31](historical Japanese document layouts). A spectrum of models\n",
|
||||
"trained on these datasets are currently available in the LayoutParser model zoo\n",
|
||||
"to support di\u000b",
|
||||
"\n",
|
||||
"erent use cases.\n",
|
||||
"3 The Core LayoutParser Library\n",
|
||||
"At the core of LayoutParser is an o\u000b",
|
||||
"\n",
|
||||
"-the-shelf toolkit that streamlines DL-\n",
|
||||
"based document image analysis. Five components support a simple interface\n",
|
||||
"with comprehensive functionalities: 1) The layout detection models enable using\n",
|
||||
"pre-trained or self-trained DL models for layout detection with just four lines\n",
|
||||
"of code. 2) The detected layout information is stored in carefully engineered\n"
|
||||
"T h e C o r e L a y o u t P a r s e r L i b r a r yOCR ModuleSt or age & VisualizationLa y ou\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -152,7 +141,7 @@
|
||||
"faiss_index = FAISS.from_documents(pages, OpenAIEmbeddings())\n",
|
||||
"docs = faiss_index.similarity_search(\"How will the community be engaged?\", k=2)\n",
|
||||
"for doc in docs:\n",
|
||||
" print(str(doc.metadata[\"page\"]) + \":\", doc.page_content)"
|
||||
" print(str(doc.metadata[\"page\"]) + \":\", doc.page_content[:300])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -167,9 +156,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "950eb58f",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import MathpixPDFLoader"
|
||||
@@ -381,6 +372,44 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "483720b5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "96351714",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Using PyPDFium2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "003fcc1d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PyPDFium2Loader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "46766e29",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = PyPDFium2Loader(\"example_data/layout-parser-paper.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "010d5cdd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -671,7 +700,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -6,11 +6,24 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ReadTheDocs Documentation\n",
|
||||
"This notebook covers how to load content from html that was generated as part of a Read-The-Docs build.\n",
|
||||
"\n",
|
||||
">[Read the Docs](https://readthedocs.org/) is an open-sourced free software documentation hosting platform. It generates documentation written with the `Sphinx` documentation generator.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load content from HTML that was generated as part of a `Read-The-Docs` build.\n",
|
||||
"\n",
|
||||
"For an example of this in the wild, see [here](https://github.com/hwchase17/chat-langchain).\n",
|
||||
"\n",
|
||||
"This assumes that the html has already been scraped into a folder. This can be done by uncommenting and running the following command"
|
||||
"This assumes that the HTML has already been scraped into a folder. This can be done by uncommenting and running the following command"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3d153e07-8339-4cbe-8481-fc08644ba927",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install beautifulsoup4"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -25,9 +38,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"id": "92dd950b",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import ReadTheDocsLoader"
|
||||
@@ -70,7 +85,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Reddit\n",
|
||||
"\n",
|
||||
">[Reddit (reddit)](www.reddit.com) is an American social news aggregation, content rating, and discussion website.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This loader fetches the text from the Posts of Subreddits or Reddit users, using the `praw` Python package.\n",
|
||||
"\n",
|
||||
"Make a Reddit Application from https://www.reddit.com/prefs/apps/ and initialize the loader with with your Reddit API credentials."
|
||||
"Make a [Reddit Application](https://www.reddit.com/prefs/apps/) and initialize the loader with with your Reddit API credentials."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -89,7 +91,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "env1",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -103,10 +105,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.13"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -1,11 +1,15 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1dc7df1d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Roam\n",
|
||||
"\n",
|
||||
">[ROAM](https://roamresearch.com/) is a note-taking tool for networked thought, designed to create a personal knowledge base.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load documents from a Roam database. This takes a lot of inspiration from the example repo [here](https://github.com/JimmyLv/roam-qa).\n",
|
||||
"\n",
|
||||
"## 🧑 Instructions for ingesting your own dataset\n",
|
||||
@@ -40,7 +44,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = ObsidianLoader(\"Roam_DB\")"
|
||||
"loader = RoamLoader(\"Roam_DB\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -70,7 +74,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -4,11 +4,11 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Sitemap Loader\n",
|
||||
"# Sitemap\n",
|
||||
"\n",
|
||||
"Extends from the [WebBaseLoader](), this will load a sitemap from a given URL, and then scrape and load all the pages in the sitemap, returning each page as a document.\n",
|
||||
"Extends from the `WebBaseLoader`, `SitemapLoader` loads a sitemap from a given URL, and then scrape and load all pages in the sitemap, returning each page as a Document.\n",
|
||||
"\n",
|
||||
"The scraping is done concurrently, using `WebBaseLoader`. There are reasonable limits to concurrent requests, defaulting to 2 per second. If you aren't concerned about being a good citizen, or you control the server you are scraping and don't care about load, you can change the `requests_per_second` parameter to increase the max concurrent requests. Note, while this will speed up the scraping process, but may cause the server to block you. Be careful!"
|
||||
"The scraping is done concurrently. There are reasonable limits to concurrent requests, defaulting to 2 per second. If you aren't concerned about being a good citizen, or you control the scrapped server, or don't care about load, you can change the `requests_per_second` parameter to increase the max concurrent requests. Note, while this will speed up the scraping process, but it may cause the server to block you. Be careful!"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -20,10 +20,10 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: nest_asyncio in /Users/tasp/Code/projects/langchain/.venv/lib/python3.10/site-packages (1.5.6)\r\n",
|
||||
"\r\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\r\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\r\n"
|
||||
"Requirement already satisfied: nest_asyncio in /Users/tasp/Code/projects/langchain/.venv/lib/python3.10/site-packages (1.5.6)\n",
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -39,6 +39,7 @@
|
||||
"source": [
|
||||
"# fixes a bug with asyncio and jupyter\n",
|
||||
"import nest_asyncio\n",
|
||||
"\n",
|
||||
"nest_asyncio.apply()"
|
||||
]
|
||||
},
|
||||
@@ -88,7 +89,7 @@
|
||||
"source": [
|
||||
"## Filtering sitemap URLs\n",
|
||||
"\n",
|
||||
"Sitemaps can be massive files, with thousands of urls. Often you don't need every single one of them. You can filter the urls by passing a list of strings or regex patterns to the `url_filter` parameter. Only urls that match one of the patterns will be loaded."
|
||||
"Sitemaps can be massive files, with thousands of URLs. Often you don't need every single one of them. You can filter the URLs by passing a list of strings or regex patterns to the `url_filter` parameter. Only URLs that match one of the patterns will be loaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -148,9 +149,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user