This commit is contained in:
Bagatur 2023-08-17 13:03:28 -07:00
commit 25cbcd9374
801 changed files with 33305 additions and 217039 deletions

View File

@ -1,28 +1,20 @@
<!-- Thank you for contributing to LangChain!
Replace this comment with:
Replace this entire comment with:
- Description: a description of the change,
- Issue: the issue # it fixes (if applicable),
- Dependencies: any dependencies required for this change,
- Tag maintainer: for a quicker response, tag the relevant maintainer (see below),
- Twitter handle: we announce bigger features on Twitter. If your PR gets announced and you'd like a mention, we'll gladly shout you out!
Please make sure you're PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally.
Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally.
See contribution guidelines for more information on how to write/run tests, lint, etc:
https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md
If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on network access,
2. an example notebook showing its use.
2. an example notebook showing its use. These live is docs/extras directory.
Maintainer responsibilities:
- General / Misc / if you don't know who to tag: @baskaryan
- DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev
- Models / Prompts: @hwchase17, @baskaryan
- Memory: @hwchase17
- Agents / Tools / Toolkits: @hinthornw
- Tracing / Callbacks: @agola11
- Async: @agola11
If no one reviews your PR within a few days, feel free to @-mention the same people again.
See contribution guidelines for more information on how to write/run tests, lint, etc: https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md
If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17, @rlancemartin.
-->

View File

@ -62,6 +62,19 @@ runs:
run: |
poetry lock --check
- name: Set proper Poetry.lock file
shell: bash
env:
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
run: |
if [ -f "$WORKDIR/poetry.lock" ]; then
echo 'Using working directory poetry.lock in cache key'
cp "$WORKDIR/poetry.lock" poetry-lock.cache-key
else
echo 'Using the top-level poetry.lock in cache key'
cp poetry.lock poetry-lock.cache-key
fi
- uses: actions/cache@v3
id: cache-poetry
env:
@ -71,7 +84,8 @@ runs:
~/.cache/pypoetry/virtualenvs
~/.cache/pypoetry/cache
~/.cache/pypoetry/artifacts
key: poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles('poetry.lock') }}
${{ inputs.working-directory == '' && '.' || inputs.working-directory }}/.venv
key: poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles('poetry-lock.cache-key') }}
- run: ${{ inputs.install-command }}
working-directory: ${{ inputs.working-directory }}

View File

@ -13,9 +13,6 @@ env:
jobs:
build:
defaults:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
strategy:
matrix:
@ -26,21 +23,47 @@ jobs:
- "3.11"
steps:
- uses: actions/checkout@v3
- uses: actions/cache@v3
id: cache-pip
name: Cache langchain editable pip install - ${{ matrix.python-version }}
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
with:
path: |
~/.cache/pip
key: pip-editable-langchain-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ matrix.python-version }}
- name: Install poetry
run: |
pipx install poetry==$POETRY_VERSION
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
with:
python-version: ${{ matrix.python-version }}
cache: poetry
cache-dependency-path: |
${{ inputs.working-directory == '' && '.' || inputs.working-directory }}/**/poetry.lock
- name: Install dependencies
working-directory: ${{ inputs.working-directory }}
run: |
poetry install
- name: Install langchain editable
working-directory: ${{ inputs.working-directory }}
if: ${{ inputs.working-directory != 'langchain' }}
run: |
pip install -e ../langchain
- name: Get .mypy_cache to speed up mypy
uses: actions/cache@v3
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
with:
path: |
${{ env.WORKDIR }}/.mypy_cache
key: mypy-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}
- name: Analysing the code with our lint
working-directory: ${{ inputs.working-directory }}
run: |
make lint

View File

@ -10,7 +10,7 @@ on:
test_type:
type: string
description: "Test types to run"
default: '["core", "extended"]'
default: '["core", "extended", "core-pydantic-2"]'
env:
POETRY_VERSION: "1.4.2"
@ -43,19 +43,42 @@ jobs:
if [ "${{ matrix.test_type }}" == "core" ]; then
echo "Running core tests, installing dependencies with poetry..."
poetry install
elif [ "${{ matrix.test_type }}" == "core-pydantic-2" ]; then
echo "Running core-pydantic-v2 tests, installing dependencies with poetry..."
poetry install
poetry add pydantic@2.1
else
echo "Running extended tests, installing dependencies with poetry..."
poetry install -E extended_testing
fi
- name: Install langchain editable
if: ${{ inputs.working-directory != 'langchain' }}
- name: Verify pydantic version
run: |
pip install -e ../langchain
if [ "${{ matrix.test_type }}" == "core-pydantic-2" ]; then
EXPECTED_VERSION=2
else
EXPECTED_VERSION=1
fi
echo "Checking pydantic version... Expecting ${EXPECTED_VERSION}"
# Determine the major part of pydantic version
VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
# Check that the major part of pydantic version is as expected, if not
# raise an error
if [[ "$VERSION" -ne $EXPECTED_VERSION ]]; then
echo "Error: pydantic version must be equal to ${EXPECTED_VERSION}; Found: ${VERSION}"
exit 1
fi
echo "Found pydantic version ${VERSION}, as expected"
shell: bash
- name: Run ${{matrix.test_type}} tests
run: |
if [ "${{ matrix.test_type }}" == "core" ]; then
case "${{ matrix.test_type }}" in
core | core-pydantic-2)
make test
else
;;
*)
make extended_tests
fi
;;
esac
shell: bash

View File

@ -24,4 +24,5 @@ jobs:
./.github/workflows/_test.yml
with:
working-directory: libs/langchain
test_type: '["core", "extended", "core-pydantic-2"]'
secrets: inherit

View File

@ -1,6 +1,7 @@
name: Scheduled tests
on:
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
schedule:
- cron: '0 13 * * *'
@ -9,6 +10,9 @@ env:
jobs:
build:
defaults:
run:
working-directory: libs/langchain
runs-on: ubuntu-latest
environment: Scheduled testing
strategy:
@ -26,13 +30,13 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
poetry-version: "1.4.2"
working-directory: libs/langchain
install-command: |
echo "Running scheduled tests, installing dependencies with poetry..."
poetry install -E scheduled_testing
poetry install --with=test_integration
- name: Run tests
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
make scheduled_tests
shell: bash
secrets: inherit

View File

@ -21,7 +21,7 @@ Looking for the JS/TS version? Check out [LangChain.js](https://github.com/hwcha
**Production Support:** As you move your LangChains into production, we'd love to offer more hands-on support.
Fill out [this form](https://airtable.com/appwQzlErAS2qiP0L/shrGtGaVBVAz7NcV2) to share more about what you're building, and our team will get in touch.
## 🚨Breaking Changes for select chains (SQLDatabase) on 7/28
## 🚨Breaking Changes for select chains (SQLDatabase) on 7/28/23
In an effort to make `langchain` leaner and safer, we are moving select chains to `langchain_experimental`.
This migration has already started, but we are remaining backwards compatible until 7/28.

View File

@ -145,8 +145,15 @@ def _load_package_modules(
package_name = package_path.name
for file_path in package_path.rglob("*.py"):
if not file_path.name.startswith("__"):
if file_path.name.startswith("_"):
continue
relative_module_name = file_path.relative_to(package_path)
# Skip if any module part starts with an underscore
if any(part.startswith("_") for part in relative_module_name.parts):
continue
# Get the full namespace of the module
namespace = str(relative_module_name).replace(".py", "").replace("/", ".")
# Keep only the top level namespace
@ -222,7 +229,7 @@ Classes
"""
for class_ in classes:
if not class_['is_public']:
if not class_["is_public"]:
continue
if class_["kind"] == "TypedDict":

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,54 @@
# Community navigator
Hi! Thanks for being here. Were lucky to have a community of so many passionate developers building with LangChainwe have so much to teach and learn from each other. Community members contribute code, host meetups, write blog posts, amplify each others work, become each other's customers and collaborators, and so much more.
Whether youre new to LangChain, looking to go deeper, or just want to get more exposure to the world of building with LLMs, this page can point you in the right direction.
- **🦜 Contribute to LangChain**
- **🌍 Meetups, Events, and Hackathons**
- **📣 Help Us Amplify Your Work**
- **💬 Stay in the loop**
# 🦜 Contribute to LangChain
LangChain is the product of over 5,000+ contributions by 1,500+ contributors, and there is ******still****** so much to do together. Here are some ways to get involved:
- **[Open a pull request](https://github.com/langchain-ai/langchain/issues):** wed appreciate all forms of contributionsnew features, infrastructure improvements, better documentation, bug fixes, etc. If you have an improvement or an idea, wed love to work on it with you.
- **[Read our contributor guidelines:](https://github.com/langchain-ai/langchain/blob/bbd22b9b761389a5e40fc45b0570e1830aabb707/.github/CONTRIBUTING.md)** We ask contributors to follow a ["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow, run a few local checks for formatting, linting, and testing before submitting, and follow certain documentation and testing conventions.
- **First time contributor?** [Try one of these PRs with the “good first issue” tag](https://github.com/langchain-ai/langchain/contribute).
- **Become an expert:** our experts help the community by answering product questions in Discord. If thats a role youd like to play, wed be so grateful! (And we have some special experts-only goodies/perks we can tell you more about). Send us an email to introduce yourself at hello@langchain.dev and well take it from there!
- **Integrate with LangChain:** if your product integrates with LangChainor aspires towe want to help make sure the experience is as smooth as possible for you and end users. Send us an email at hello@langchain.dev and tell us what youre working on.
- **Become an Integration Maintainer:** Partner with our team to ensure your integration stays up-to-date and talk directly with users (and answer their inquiries) in our Discord. Introduce yourself at hello@langchain.dev if youd like to explore this role.
# 🌍 Meetups, Events, and Hackathons
One of our favorite things about working in AI is how much enthusiasm there is for building together. We want to help make that as easy and impactful for you as possible!
- **Find a meetup, hackathon, or webinar:** you can find the one for you on our [global events calendar](https://mirror-feeling-d80.notion.site/0bc81da76a184297b86ca8fc782ee9a3?v=0d80342540df465396546976a50cfb3f).
- **Submit an event to our calendar:** email us at events@langchain.dev with a link to your event page! We can also help you spread the word with our local communities.
- **Host a meetup:** If you want to bring a group of builders together, we want to help! We can publicize your event on our event calendar/Twitter, share with our local communities in Discord, send swag, or potentially hook you up with a sponsor. Email us at events@langchain.dev to tell us about your event!
- **Become a meetup sponsor:** we often hear from groups of builders that want to get together, but are blocked or limited on some dimension (space to host, budget for snacks, prizes to distribute, etc.). If youd like to help, send us an email to events@langchain.dev we can share more about how it works!
- **Speak at an event:** meetup hosts are always looking for great speakers, presenters, and panelists. If youd like to do that at an event, send us an email to hello@langchain.dev with more information about yourself, what you want to talk about, and what city youre based in and well try to match you with an upcoming event!
- **Tell us about your LLM community:** If you host or participate in a community that would welcome support from LangChain and/or our team, send us an email at hello@langchain.dev and let us know how we can help.
# 📣 Help Us Amplify Your Work
If youre working on something youre proud of, and think the LangChain community would benefit from knowing about it, we want to help you show it off.
- **Post about your work and mention us:** we love hanging out on Twitter to see what people in the space are talking about and working on. If you tag [@langchainai](https://twitter.com/LangChainAI), well almost certainly see it and can show you some love.
- **Publish something on our blog:** if youre writing about your experience building with LangChain, wed love to post (or crosspost) it on our blog! E-mail hello@langchain.dev with a draft of your post! Or even an idea for something you want to write about.
- **Get your product onto our [integrations hub](https://integrations.langchain.com/):** Many developers take advantage of our seamless integrations with other products, and come to our integrations hub to find out who those are. If you want to get your product up there, tell us about it (and how it works with LangChain) at hello@langchain.dev.
# ☀️ Stay in the loop
Heres where our team hangs out, talks shop, spotlights cool work, and shares what were up to. Wed love to see you there too.
- **[Twitter](https://twitter.com/LangChainAI):** we post about what were working on and what cool things were seeing in the space. If you tag @langchainai in your post, well almost certainly see it, and can snow you some love!
- **[Discord](https://discord.gg/6adMQxSpJS):** connect with with >30k developers who are building with LangChain
- **[GitHub](https://github.com/langchain-ai/langchain):** open pull requests, contribute to a discussion, and/or contribute
- **[Subscribe to our bi-weekly Release Notes](https://6w1pwbss0py.typeform.com/to/KjZB1auB):** a twice/month email roundup of the coolest things going on in our orbit
- **Slack:** if youre building an application in production at your company, wed love to get into a Slack channel together. Fill out [this form](https://airtable.com/appwQzlErAS2qiP0L/shrGtGaVBVAz7NcV2) and well get in touch about setting one up.

View File

@ -8,9 +8,9 @@ import DocCardList from "@theme/DocCardList";
Building applications with language models involves many moving parts. One of the most critical components is ensuring that the outcomes produced by your models are reliable and useful across a broad array of inputs, and that they work well with your application's other software components. Ensuring reliability usually boils down to some combination of application design, testing & evaluation, and runtime checks.
The guides in this section review the APIs and functionality LangChain provides to help yous better evaluate your applications. Evaluation and testing are both critical when thinking about deploying LLM applications, since production environments require repeatable and useful outcomes.
The guides in this section review the APIs and functionality LangChain provides to help you better evaluate your applications. Evaluation and testing are both critical when thinking about deploying LLM applications, since production environments require repeatable and useful outcomes.
LangChain offers various types of evaluators to help you measure performance and integrity on diverse data, and we hope to encourage the the community to create and share other useful evaluators so everyone can improve. These docs will introduce the evaluator types, how to use them, and provide some examples of their use in real-world scenarios.
LangChain offers various types of evaluators to help you measure performance and integrity on diverse data, and we hope to encourage the community to create and share other useful evaluators so everyone can improve. These docs will introduce the evaluator types, how to use them, and provide some examples of their use in real-world scenarios.
Each evaluator type in LangChain comes with ready-to-use implementations and an extensible API that allows for customization according to your unique requirements. Here are some of the types of evaluators we offer:

View File

@ -5,7 +5,7 @@ import DocCardList from "@theme/DocCardList";
LangSmith helps you trace and evaluate your language model applications and intelligent agents to help you
move from prototype to production.
Check out the [interactive walkthrough](walkthrough) below to get started.
Check out the [interactive walkthrough](/docs/guides/langsmith/walkthrough) below to get started.
For more information, please refer to the [LangSmith documentation](https://docs.smith.langchain.com/)

View File

@ -1,9 +0,0 @@
---
sidebar_position: 0
---
# API chains
APIChain enables using LLMs to interact with APIs to retrieve relevant information. Construct the chain by providing a question relevant to the provided API documentation.
import Example from "@snippets/modules/chains/popular/api.mdx"
<Example/>

View File

@ -0,0 +1,9 @@
---
sidebar_position: 3
---
# Web Scraping
Web scraping has historically been a challenging endeavor due to the ever-changing nature of website structures, making it tedious for developers to maintain their scraping scripts. Traditional methods often rely on specific HTML tags and patterns which, when altered, can disrupt data extraction processes.
Enter the LLM-based method for parsing HTML: By leveraging the capabilities of LLMs, and especially OpenAI Functions in LangChain's extraction chain, developers can instruct the model to extract only the desired data in a specified format. This method not only streamlines the extraction process but also significantly reduces the time spent on manual debugging and script modifications. Its adaptability means that even if websites undergo significant design changes, the extraction remains consistent and robust. This level of resilience translates to reduced maintenance efforts, cost savings, and ensures a higher quality of extracted data. Compared to its predecessors, LLM-based approach wins out the web scraping domain by transforming a historically cumbersome task into a more automated and efficient process.

View File

@ -12,7 +12,7 @@
"@docusaurus/preset-classic": "2.4.0",
"@docusaurus/remark-plugin-npm2yarn": "^2.4.0",
"@mdx-js/react": "^1.6.22",
"@mendable/search": "^0.0.137",
"@mendable/search": "^0.0.150",
"clsx": "^1.2.1",
"json-loader": "^0.5.7",
"process": "^0.11.10",
@ -3212,9 +3212,9 @@
}
},
"node_modules/@mendable/search": {
"version": "0.0.137",
"resolved": "https://registry.npmjs.org/@mendable/search/-/search-0.0.137.tgz",
"integrity": "sha512-2J2fd5eqToK+mLzrSDA6NAr4F1kfql7QRiHpD7AUJJX0nqpvInhr/mMJKBCUSCv2z76UKCmF5wLuPSw+C90Qdg==",
"version": "0.0.150",
"resolved": "https://registry.npmjs.org/@mendable/search/-/search-0.0.150.tgz",
"integrity": "sha512-Eb5SeAWlMxzEim/8eJ/Ysn01Pyh39xlPBzRBw/5OyOBhti0HVLXk4wd1Fq2TKgJC2ppQIvhEKO98PUcj9dNDFw==",
"dependencies": {
"html-react-parser": "^4.2.0",
"posthog-js": "^1.45.1"

View File

@ -23,7 +23,7 @@
"@docusaurus/preset-classic": "2.4.0",
"@docusaurus/remark-plugin-npm2yarn": "^2.4.0",
"@mdx-js/react": "^1.6.22",
"@mendable/search": "^0.0.137",
"@mendable/search": "^0.0.150",
"clsx": "^1.2.1",
"json-loader": "^0.5.7",
"process": "^0.11.10",

View File

@ -75,6 +75,7 @@ module.exports = {
slug: "additional_resources",
},
},
'community'
],
integrations: [
{

Binary file not shown.

After

Width:  |  Height:  |  Size: 232 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 471 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 520 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 117 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 307 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 193 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 130 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 152 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 172 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 716 KiB

File diff suppressed because it is too large Load Diff

View File

@ -42,8 +42,8 @@ Below are links to video tutorials and courses on LangChain. For written guides
### [LangChain 101](https://www.youtube.com/playlist?list=PLqZXAkvF1bPNQER9mLmDbntNfSpzdDIU5) by [Greg Kamradt (Data Indy)](https://www.youtube.com/@DataIndependent)
- [What Is LangChain? - LangChain + `ChatGPT` Overview](https://youtu.be/_v_fgW2SkkQ)
- [Quickstart Guide](https://youtu.be/kYRB-vJFy38)
- [Beginner Guide To 7 Essential Concepts](https://youtu.be/2xxziIWmaSA)
- [Beginner Guide To 9 Use Cases](https://youtu.be/vGP4pQdCocw)
- [Beginner's Guide To 7 Essential Concepts](https://youtu.be/2xxziIWmaSA)
- [Beginner's Guide To 9 Use Cases](https://youtu.be/vGP4pQdCocw)
- [Agents Overview + Google Searches](https://youtu.be/Jq9Sf68ozk0)
- [`OpenAI` + `Wolfram Alpha`](https://youtu.be/UijbzCIJ99g)
- [Ask Questions On Your Custom (or Private) Files](https://youtu.be/EnT-ZTrcPrg)
@ -73,7 +73,7 @@ Below are links to video tutorials and courses on LangChain. For written guides
- [Conversations with Memory (explanation & code walkthrough)](https://youtu.be/X550Zbz_ROE)
- [Chat with `Flan20B`](https://youtu.be/VW5LBavIfY4)
- [Using `Hugging Face Models` locally (code walkthrough)](https://youtu.be/Kn7SX2Mx_Jk)
- [`PAL` : Program-aided Language Models with LangChain code](https://youtu.be/dy7-LvDu-3s)
- [`PAL`: Program-aided Language Models with LangChain code](https://youtu.be/dy7-LvDu-3s)
- [Building a Summarization System with LangChain and `GPT-3` - Part 1](https://youtu.be/LNq_2s_H01Y)
- [Building a Summarization System with LangChain and `GPT-3` - Part 2](https://youtu.be/d-yeHDLgKHw)
- [Microsoft's `Visual ChatGPT` using LangChain](https://youtu.be/7YEiEyfPF5U)
@ -85,7 +85,7 @@ Below are links to video tutorials and courses on LangChain. For written guides
- [`BabyAGI`: Discover the Power of Task-Driven Autonomous Agents!](https://youtu.be/QBcDLSE2ERA)
- [Improve your `BabyAGI` with LangChain](https://youtu.be/DRgPyOXZ-oE)
- [Master `PDF` Chat with LangChain - Your essential guide to queries on documents](https://youtu.be/ZzgUqFtxgXI)
- [Using LangChain with `DuckDuckGO` `Wikipedia` & `PythonREPL` Tools](https://youtu.be/KerHlb8nuVc)
- [Using LangChain with `DuckDuckGO`, `Wikipedia` & `PythonREPL` Tools](https://youtu.be/KerHlb8nuVc)
- [Building Custom Tools and Agents with LangChain (gpt-3.5-turbo)](https://youtu.be/biS8G8x8DdA)
- [LangChain Retrieval QA Over Multiple Files with `ChromaDB`](https://youtu.be/3yPBVii7Ct0)
- [LangChain Retrieval QA with Instructor Embeddings & `ChromaDB` for PDFs](https://youtu.be/cFCGUjc33aU)
@ -107,7 +107,7 @@ Below are links to video tutorials and courses on LangChain. For written guides
- [Working with MULTIPLE `PDF` Files in LangChain: `ChatGPT` for your Data](https://youtu.be/s5LhRdh5fu4)
- [`ChatGPT` for YOUR OWN `PDF` files with LangChain](https://youtu.be/TLf90ipMzfE)
- [Talk to YOUR DATA without OpenAI APIs: LangChain](https://youtu.be/wrD-fZvT6UI)
- [Langchain: PDF Chat App (GUI) | ChatGPT for Your PDF FILES](https://youtu.be/RIWbalZ7sTo)
- [LangChain: PDF Chat App (GUI) | ChatGPT for Your PDF FILES](https://youtu.be/RIWbalZ7sTo)
- [LangFlow: Build Chatbots without Writing Code](https://youtu.be/KJ-ux3hre4s)
- [LangChain: Giving Memory to LLMs](https://youtu.be/dxO6pzlgJiY)
- [BEST OPEN Alternative to `OPENAI's EMBEDDINGs` for Retrieval QA: LangChain](https://youtu.be/ogEalPMUCSY)

View File

@ -0,0 +1 @@
label: 'Adapters'

View File

@ -0,0 +1,323 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "700a516b",
"metadata": {},
"source": [
"# OpenAI Adapter\n",
"\n",
"A lot of people get started with OpenAI but want to explore other models. LangChain's integrations with many model providers make this easy to do so. While LangChain has it's own message and model APIs, we've also made it as easy as possible to explore other models by exposing an adapter to adapt LangChain models to the OpenAI api.\n",
"\n",
"At the moment this only deals with output and does not return other information (token counts, stop reasons, etc)."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "6017f26a",
"metadata": {},
"outputs": [],
"source": [
"import openai\n",
"from langchain.adapters import openai as lc_openai"
]
},
{
"cell_type": "markdown",
"id": "b522ceda",
"metadata": {},
"source": [
"## ChatCompletion.create"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "1d22eb61",
"metadata": {},
"outputs": [],
"source": [
"messages = [{\"role\": \"user\", \"content\": \"hi\"}]"
]
},
{
"cell_type": "markdown",
"id": "d550d3ad",
"metadata": {},
"source": [
"Original OpenAI call"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "e1d27dfa",
"metadata": {},
"outputs": [],
"source": [
"result = openai.ChatCompletion.create(\n",
" messages=messages, \n",
" model=\"gpt-3.5-turbo\", \n",
" temperature=0\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "012d81ae",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'role': 'assistant', 'content': 'Hello! How can I assist you today?'}"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result[\"choices\"][0]['message'].to_dict_recursive()"
]
},
{
"cell_type": "markdown",
"id": "db5b5500",
"metadata": {},
"source": [
"LangChain OpenAI wrapper call"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "87c2d515",
"metadata": {},
"outputs": [],
"source": [
"lc_result = lc_openai.ChatCompletion.create(\n",
" messages=messages, \n",
" model=\"gpt-3.5-turbo\", \n",
" temperature=0\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "c67a5ac8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'role': 'assistant', 'content': 'Hello! How can I assist you today?'}"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lc_result[\"choices\"][0]['message']"
]
},
{
"cell_type": "markdown",
"id": "034ba845",
"metadata": {},
"source": [
"Swapping out model providers"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "7a2c011c",
"metadata": {},
"outputs": [],
"source": [
"lc_result = lc_openai.ChatCompletion.create(\n",
" messages=messages, \n",
" model=\"claude-2\", \n",
" temperature=0, \n",
" provider=\"ChatAnthropic\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "f7c94827",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'role': 'assistant', 'content': ' Hello!'}"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lc_result[\"choices\"][0]['message']"
]
},
{
"cell_type": "markdown",
"id": "cb3f181d",
"metadata": {},
"source": [
"## ChatCompletion.stream"
]
},
{
"cell_type": "markdown",
"id": "f7b8cd18",
"metadata": {},
"source": [
"Original OpenAI call"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "fd8cb1ea",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'role': 'assistant', 'content': ''}\n",
"{'content': 'Hello'}\n",
"{'content': '!'}\n",
"{'content': ' How'}\n",
"{'content': ' can'}\n",
"{'content': ' I'}\n",
"{'content': ' assist'}\n",
"{'content': ' you'}\n",
"{'content': ' today'}\n",
"{'content': '?'}\n",
"{}\n"
]
}
],
"source": [
"for c in openai.ChatCompletion.create(\n",
" messages = messages,\n",
" model=\"gpt-3.5-turbo\", \n",
" temperature=0,\n",
" stream=True\n",
"):\n",
" print(c[\"choices\"][0]['delta'].to_dict_recursive())"
]
},
{
"cell_type": "markdown",
"id": "0b2a076b",
"metadata": {},
"source": [
"LangChain OpenAI wrapper call"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "9521218c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'role': 'assistant', 'content': ''}\n",
"{'content': 'Hello'}\n",
"{'content': '!'}\n",
"{'content': ' How'}\n",
"{'content': ' can'}\n",
"{'content': ' I'}\n",
"{'content': ' assist'}\n",
"{'content': ' you'}\n",
"{'content': ' today'}\n",
"{'content': '?'}\n",
"{}\n"
]
}
],
"source": [
"for c in lc_openai.ChatCompletion.create(\n",
" messages = messages,\n",
" model=\"gpt-3.5-turbo\", \n",
" temperature=0,\n",
" stream=True\n",
"):\n",
" print(c[\"choices\"][0]['delta'])"
]
},
{
"cell_type": "markdown",
"id": "0fc39750",
"metadata": {},
"source": [
"Swapping out model providers"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "68f0214e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'role': 'assistant', 'content': ' Hello'}\n",
"{'content': '!'}\n",
"{}\n"
]
}
],
"source": [
"for c in lc_openai.ChatCompletion.create(\n",
" messages = messages,\n",
" model=\"claude-2\", \n",
" temperature=0,\n",
" stream=True,\n",
" provider=\"ChatAnthropic\",\n",
"):\n",
" print(c[\"choices\"][0]['delta'])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -1638,196 +1638,6 @@
"source": [
"moderated_chain.invoke({\"input\": \"you are stupid\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a0a85ba4-f782-47b8-b16f-8b7a61d6dab7",
"metadata": {},
"outputs": [],
"source": [
"## Conversational Retrieval With Memory"
]
},
{
"cell_type": "markdown",
"id": "92c87dd8-bb6f-4f32-a30d-8f5459ce6265",
"metadata": {},
"source": [
"## Fallbacks\n",
"\n",
"With LCEL you can easily introduce fallbacks for any Runnable component, like an LLM."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "1b1cb744-31fc-4261-ab25-65fe1fcad559",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content='To get to the other side.', additional_kwargs={}, example=False)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"\n",
"bad_llm = ChatOpenAI(model_name=\"gpt-fake\")\n",
"good_llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\")\n",
"llm = bad_llm.with_fallbacks([good_llm])\n",
"\n",
"llm.invoke(\"Why did the the chicken cross the road?\")"
]
},
{
"cell_type": "markdown",
"id": "b8cf3982-03f6-49b3-8ff5-7cd12444f19c",
"metadata": {},
"source": [
"Looking at the trace, we can see that the first model failed but the second succeeded, so we still got an output: https://smith.langchain.com/public/dfaf0bf6-d86d-43e9-b084-dd16a56df15c/r\n",
"\n",
"We can add an arbitrary sequence of fallbacks, which will be executed in order:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "31819be0-7f40-4e67-b5ab-61340027b948",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content='To get to the other side.', additional_kwargs={}, example=False)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"llm = bad_llm.with_fallbacks([bad_llm, bad_llm, good_llm])\n",
"\n",
"llm.invoke(\"Why did the the chicken cross the road?\")"
]
},
{
"cell_type": "markdown",
"id": "acad6e88-8046-450e-b005-db7e50f33b80",
"metadata": {},
"source": [
"Trace: https://smith.langchain.com/public/c09efd01-3184-4369-a225-c9da8efcaf47/r\n",
"\n",
"We can continue to use our Runnable with fallbacks the same way we use any Runnable, mean we can include it in sequences:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "bab114a1-bb93-4b7e-a639-e7e00f21aebc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content='To show off its incredible jumping skills! Kangaroos are truly amazing creatures.', additional_kwargs={}, example=False)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain.prompts import ChatPromptTemplate\n",
"\n",
"prompt = ChatPromptTemplate.from_messages(\n",
" [\n",
" (\"system\", \"You're a nice assistant who always includes a compliment in your response\"),\n",
" (\"human\", \"Why did the {animal} cross the road\"),\n",
" ]\n",
")\n",
"chain = prompt | llm\n",
"chain.invoke({\"animal\": \"kangaroo\"})"
]
},
{
"cell_type": "markdown",
"id": "58340afa-8187-4ffe-9bd2-7912fb733a15",
"metadata": {},
"source": [
"Trace: https://smith.langchain.com/public/ba03895f-f8bd-4c70-81b7-8b930353eabd/r\n",
"\n",
"Note, since every sequence of Runnables is itself a Runnable, we can create fallbacks for whole Sequences. We can also continue using the full interface, including asynchronous calls, batched calls, and streams:"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "45aa3170-b2e6-430d-887b-bd879048060a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[\"\\n\\nAnswer: The rabbit crossed the road to get to the other side. That's quite clever of him!\",\n",
" '\\n\\nAnswer: The turtle crossed the road to get to the other side. You must be pretty clever to come up with that riddle!']"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain.llms import OpenAI\n",
"from langchain.prompts import PromptTemplate\n",
"\n",
"chat_prompt = ChatPromptTemplate.from_messages(\n",
" [\n",
" (\"system\", \"You're a nice assistant who always includes a compliment in your response\"),\n",
" (\"human\", \"Why did the {animal} cross the road\"),\n",
" ]\n",
")\n",
"chat_model = ChatOpenAI(model_name=\"gpt-fake\")\n",
"\n",
"prompt_template = \"\"\"Instructions: You should always include a compliment in your response.\n",
"\n",
"Question: Why did the {animal} cross the road?\"\"\"\n",
"prompt = PromptTemplate.from_template(prompt_template)\n",
"llm = OpenAI()\n",
"\n",
"bad_chain = chat_prompt | chat_model\n",
"good_chain = prompt | llm\n",
"chain = bad_chain.with_fallbacks([good_chain])\n",
"await chain.abatch([{\"animal\": \"rabbit\"}, {\"animal\": \"turtle\"}])"
]
},
{
"cell_type": "markdown",
"id": "af6731c6-0c73-4b1d-a433-6e8f6ecce2bb",
"metadata": {},
"source": [
"Traces: \n",
"1. https://smith.langchain.com/public/ccd73236-9ae5-48a6-94b5-41210be18a46/r\n",
"2. https://smith.langchain.com/public/f43f608e-075c-45c7-bf73-b64e4d3f3082/r"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d2fe1fe-506b-4ee5-8056-8b9df801765f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@ -1846,7 +1656,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
"version": "3.10.1"
}
},
"nbformat": 4,

View File

@ -41,7 +41,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 1,
"id": "466b65b3",
"metadata": {},
"outputs": [],
@ -256,6 +256,131 @@
"source": [
"await chain.abatch([{\"topic\": \"bears\"}])"
]
},
{
"cell_type": "markdown",
"id": "0a1c409d",
"metadata": {},
"source": [
"## Parallelism\n",
"\n",
"Let's take a look at how LangChain Expression Language support parralel requests as much as possible. For example, when using a RunnableMapping (often written as a dictionary) it executes each element in parralel."
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "e3014c7a",
"metadata": {},
"outputs": [],
"source": [
"from langchain.schema.runnable import RunnableMap\n",
"chain1 = ChatPromptTemplate.from_template(\"tell me a joke about {topic}\") | model\n",
"chain2 = ChatPromptTemplate.from_template(\"write a short (2 line) poem about {topic}\") | model\n",
"combined = RunnableMap({\n",
" \"joke\": chain1,\n",
" \"poem\": chain2,\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "08044c0a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 31.7 ms, sys: 8.59 ms, total: 40.3 ms\n",
"Wall time: 1.05 s\n"
]
},
{
"data": {
"text/plain": [
"AIMessage(content=\"Why don't bears like fast food?\\n\\nBecause they can't catch it!\", additional_kwargs={}, example=False)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"chain1.invoke({\"topic\": \"bears\"})"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "22c56804",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 42.9 ms, sys: 10.2 ms, total: 53 ms\n",
"Wall time: 1.93 s\n"
]
},
{
"data": {
"text/plain": [
"AIMessage(content=\"In forest's embrace, bears roam free,\\nSilent strength, nature's majesty.\", additional_kwargs={}, example=False)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"chain2.invoke({\"topic\": \"bears\"})"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "4fff4cbb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 96.3 ms, sys: 20.4 ms, total: 117 ms\n",
"Wall time: 1.1 s\n"
]
},
{
"data": {
"text/plain": [
"{'joke': AIMessage(content=\"Why don't bears wear socks?\\n\\nBecause they have bear feet!\", additional_kwargs={}, example=False),\n",
" 'poem': AIMessage(content=\"In forest's embrace,\\nMajestic bears leave their trace.\", additional_kwargs={}, example=False)}"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"combined.invoke({\"topic\": \"bears\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fab75d1d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@ -274,7 +399,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
"version": "3.10.1"
}
},
"nbformat": 4,

View File

@ -0,0 +1,430 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "19c9cbd6",
"metadata": {},
"source": [
"# Fallbacks\n",
"\n",
"When working with language models, you may often encounter issues from the underlying APIs, whether these be rate limiting or downtime. Therefore, as you go to move your LLM applications into production it becomes more and more important to safe guard against these. That's why we've introduced the concept of fallbacks.\n",
"\n",
"Crucially, fallbacks can be applied not only on the LLM level but on the whole runnable level. This is important because often times different models require different prompts. So if your call to OpenAI fails, you don't just want to send the same prompt to Anthropic - you probably want want to use a different prompt template and send a different version there."
]
},
{
"cell_type": "markdown",
"id": "a6bb9ba9",
"metadata": {},
"source": [
"## Handling LLM API Errors\n",
"\n",
"This is maybe the most common use case for fallbacks. A request to an LLM API can fail for a variety of reasons - the API could be down, you could have hit rate limits, any number of things. Therefor, using fallbacks can help protect against these types of things.\n",
"\n",
"IMPORTANT: By default, a lot of the LLM wrappers catch errors and retry. You will most likely want to turn those off when working with fallbacks. Otherwise the first wrapper will keep on retying and not failing."
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "d3e893bf",
"metadata": {},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI, ChatAnthropic"
]
},
{
"cell_type": "markdown",
"id": "4847c82d",
"metadata": {},
"source": [
"First, let's mock out what happens if we hit a RateLimitError from OpenAI"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "dfdd8bf5",
"metadata": {},
"outputs": [],
"source": [
"from unittest.mock import patch\n",
"from openai.error import RateLimitError"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "e6fdffc1",
"metadata": {},
"outputs": [],
"source": [
"# Note that we set max_retries = 0 to avoid retrying on RateLimits, etc\n",
"openai_llm = ChatOpenAI(max_retries=0)\n",
"anthropic_llm = ChatAnthropic()\n",
"llm = openai_llm.with_fallbacks([anthropic_llm])"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "584461ab",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hit error\n"
]
}
],
"source": [
"# Let's use just the OpenAI LLm first, to show that we run into an error\n",
"with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n",
" try:\n",
" print(openai_llm.invoke(\"Why did the chicken cross the road?\"))\n",
" except:\n",
" print(\"Hit error\")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "4fc1e673",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"content=' I don\\'t actually know why the chicken crossed the road, but here are some possible humorous answers:\\n\\n- To get to the other side!\\n\\n- It was too chicken to just stand there. \\n\\n- It wanted a change of scenery.\\n\\n- It wanted to show the possum it could be done.\\n\\n- It was on its way to a poultry farmers\\' convention.\\n\\nThe joke plays on the double meaning of \"the other side\" - literally crossing the road to the other side, or the \"other side\" meaning the afterlife. So it\\'s an anti-joke, with a silly or unexpected pun as the answer.' additional_kwargs={} example=False\n"
]
}
],
"source": [
"# Now let's try with fallbacks to Anthropic\n",
"with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n",
" try:\n",
" print(llm.invoke(\"Why did the the chicken cross the road?\"))\n",
" except:\n",
" print(\"Hit error\")"
]
},
{
"cell_type": "markdown",
"id": "f00bea25",
"metadata": {},
"source": [
"We can use our \"LLM with Fallbacks\" as we would a normal LLM."
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "4f8eaaa0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"content=\" I don't actually know why the kangaroo crossed the road, but I can take a guess! Here are some possible reasons:\\n\\n- To get to the other side (the classic joke answer!)\\n\\n- It was trying to find some food or water \\n\\n- It was trying to find a mate during mating season\\n\\n- It was fleeing from a predator or perceived threat\\n\\n- It was disoriented and crossed accidentally \\n\\n- It was following a herd of other kangaroos who were crossing\\n\\n- It wanted a change of scenery or environment \\n\\n- It was trying to reach a new habitat or territory\\n\\nThe real reason is unknown without more context, but hopefully one of those potential explanations does the joke justice! Let me know if you have any other animal jokes I can try to decipher.\" additional_kwargs={} example=False\n"
]
}
],
"source": [
"from langchain.prompts import ChatPromptTemplate\n",
"\n",
"prompt = ChatPromptTemplate.from_messages(\n",
" [\n",
" (\"system\", \"You're a nice assistant who always includes a compliment in your response\"),\n",
" (\"human\", \"Why did the {animal} cross the road\"),\n",
" ]\n",
")\n",
"chain = prompt | llm\n",
"with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n",
" try:\n",
" print(chain.invoke({\"animal\": \"kangaroo\"}))\n",
" except:\n",
" print(\"Hit error\")"
]
},
{
"cell_type": "markdown",
"id": "8d62241b",
"metadata": {},
"source": [
"## Fallbacks for Sequences\n",
"\n",
"We can also create fallbacks for sequences, that are sequences themselves. Here we do that with two different models: ChatOpenAI and then normal OpenAI (which does not use a chat model). Because OpenAI is NOT a chat model, you likely want a different prompt."
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "6d0b8056",
"metadata": {},
"outputs": [],
"source": [
"# First let's create a chain with a ChatModel\n",
"# We add in a string output parser here so the outputs between the two are the same type\n",
"from langchain.schema.output_parser import StrOutputParser\n",
"\n",
"chat_prompt = ChatPromptTemplate.from_messages(\n",
" [\n",
" (\"system\", \"You're a nice assistant who always includes a compliment in your response\"),\n",
" (\"human\", \"Why did the {animal} cross the road\"),\n",
" ]\n",
")\n",
"# Here we're going to use a bad model name to easily create a chain that will error\n",
"chat_model = ChatOpenAI(model_name=\"gpt-fake\")\n",
"bad_chain = chat_prompt | chat_model | StrOutputParser()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "8d1fc2a5",
"metadata": {},
"outputs": [],
"source": [
"# Now lets create a chain with the normal OpenAI model\n",
"from langchain.llms import OpenAI\n",
"from langchain.prompts import PromptTemplate\n",
"\n",
"prompt_template = \"\"\"Instructions: You should always include a compliment in your response.\n",
"\n",
"Question: Why did the {animal} cross the road?\"\"\"\n",
"prompt = PromptTemplate.from_template(prompt_template)\n",
"llm = OpenAI()\n",
"good_chain = prompt | llm"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "283bfa44",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'\\n\\nAnswer: The turtle crossed the road to get to the other side, and I have to say he had some impressive determination.'"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# We can now create a final chain which combines the two\n",
"chain = bad_chain.with_fallbacks([good_chain])\n",
"chain.invoke({\"animal\": \"turtle\"})"
]
},
{
"cell_type": "markdown",
"id": "ec4685b4",
"metadata": {},
"source": [
"## Handling Long Inputs\n",
"\n",
"One of the big limiting factors of LLMs in their context window. Usually you can count and track the length of prompts before sending them to an LLM, but in situations where that is hard/complicated you can fallback to a model with longer context length."
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "564b84c9",
"metadata": {},
"outputs": [],
"source": [
"short_llm = ChatOpenAI()\n",
"long_llm = ChatOpenAI(model=\"gpt-3.5-turbo-16k\")\n",
"llm = short_llm.with_fallbacks([long_llm])"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "5e27a775",
"metadata": {},
"outputs": [],
"source": [
"inputs = \"What is the next number: \" + \", \".join([\"one\", \"two\"] * 3000)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "0a502731",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"This model's maximum context length is 4097 tokens. However, your messages resulted in 12012 tokens. Please reduce the length of the messages.\n"
]
}
],
"source": [
"try:\n",
" print(short_llm.invoke(inputs))\n",
"except Exception as e:\n",
" print(e)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "d91ba5d7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"content='The next number in the sequence is two.' additional_kwargs={} example=False\n"
]
}
],
"source": [
"try:\n",
" print(llm.invoke(inputs))\n",
"except Exception as e:\n",
" print(e)"
]
},
{
"cell_type": "markdown",
"id": "2a6735df",
"metadata": {},
"source": [
"## Fallback to Better Model\n",
"\n",
"Often times we ask models to output format in a specific format (like JSON). Models like GPT-3.5 can do this okay, but sometimes struggle. This naturally points to fallbacks - we can try with GPT-3.5 (faster, cheaper), but then if parsing fails we can use GPT-4."
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "867a3793",
"metadata": {},
"outputs": [],
"source": [
"from langchain.output_parsers import DatetimeOutputParser"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "b8d9959d",
"metadata": {},
"outputs": [],
"source": [
"prompt = ChatPromptTemplate.from_template(\n",
" \"what time was {event} (in %Y-%m-%dT%H:%M:%S.%fZ format - only return this value)\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 75,
"id": "98087a76",
"metadata": {},
"outputs": [],
"source": [
"# In this case we are going to do the fallbacks on the LLM + output parser level\n",
"# Because the error will get raised in the OutputParser\n",
"openai_35 = ChatOpenAI() | DatetimeOutputParser()\n",
"openai_4 = ChatOpenAI(model=\"gpt-4\")| DatetimeOutputParser()"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "17ec9e8f",
"metadata": {},
"outputs": [],
"source": [
"only_35 = prompt | openai_35 \n",
"fallback_4 = prompt | openai_35.with_fallbacks([openai_4])"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "7e536f0b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Error: Could not parse datetime string: The Super Bowl in 1994 took place on January 30th at 3:30 PM local time. Converting this to the specified format (%Y-%m-%dT%H:%M:%S.%fZ) results in: 1994-01-30T15:30:00.000Z\n"
]
}
],
"source": [
"try:\n",
" print(only_35.invoke({\"event\": \"the superbowl in 1994\"}))\n",
"except Exception as e:\n",
" print(f\"Error: {e}\")"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "01355c5e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1994-01-30 15:30:00\n"
]
}
],
"source": [
"try:\n",
" print(fallback_4.invoke({\"event\": \"the superbowl in 1994\"}))\n",
"except Exception as e:\n",
" print(f\"Error: {e}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c537f9d0",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,807 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "b8982428",
"metadata": {},
"source": [
"# Private, local, open source LLMs\n",
"\n",
"## Use case\n",
"\n",
"The popularity of projects like [PrivateGPT](https://github.com/imartinez/privateGPT), [llama.cpp](https://github.com/ggerganov/llama.cpp), and [GPT4All](https://github.com/nomic-ai/gpt4all) underscore the demand to run LLMs locally (on your own device).\n",
"\n",
"This has at least two important benefits:\n",
"\n",
"1. `Privacy`: Your data is not sent to a third party, and it is not subject to the terms of service of a commercial service\n",
"2. `Cost`: There is no inference fee, which is important for token-intensive applications (e.g., [long-running simulations](https://twitter.com/RLanceMartin/status/1691097659262820352?s=20), summarization)\n",
"\n",
"## Overview\n",
"\n",
"Running an LLM locally requires a few things:\n",
"\n",
"1. `Open source LLM`: An open source LLM that can be freely modified and shared \n",
"2. `Inference`: Ability to run this LLM on your device w/ acceptable latency\n",
"\n",
"### Open Source LLMs\n",
"\n",
"Users can now gain access to a rapidly growing set of [open source LLMs](https://cameronrwolfe.substack.com/p/the-history-of-open-source-llms-better). \n",
"\n",
"These LLMs can be assessed across at least two dimentions (see figure):\n",
" \n",
"1. `Base model`: What is the base-model and how was it trained?\n",
"2. `Fine-tuning approach`: Was the base-model fine-tuned and, if so, what [set of instructions](https://cameronrwolfe.substack.com/p/beyond-llama-the-power-of-open-llms#%C2%A7alpaca-an-instruction-following-llama-model) was used?\n",
"\n",
"![Image description](/img/OSS_LLM_overview.png)\n",
"\n",
"The relative performance of these models can be assessed using several leaderboards, including:\n",
"\n",
"1. [LmSys](https://chat.lmsys.org/?arena)\n",
"2. [GPT4All](https://gpt4all.io/index.html)\n",
"3. [HuggingFace](https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard)\n",
"\n",
"### Inference\n",
"\n",
"A few frameworks for this have emerged to support inference of open source LLMs on various devices:\n",
"\n",
"1. [`llama.cpp`](https://github.com/ggerganov/llama.cpp): C++ implementation of llama inference code with [weight optimization / quantization](https://finbarr.ca/how-is-llama-cpp-possible/)\n",
"2. [`gpt4all`](https://docs.gpt4all.io/index.html): Optimized C backend for inference\n",
"3. [`Ollama`](https://ollama.ai/): Bundles model weights and environment into an app that runs on device and serves the LLM \n",
"\n",
"In general, these frameworks will do a few things:\n",
"\n",
"1. `Quantization`: Reduce the memory footprint of the raw model weights\n",
"2. `Efficient implementation for inference`: Support inference on consumer hardware (e.g., CPU or laptop GPU)\n",
"\n",
"In particular, see [this excellent post](https://finbarr.ca/how-is-llama-cpp-possible/) on the importance of quantization.\n",
"\n",
"![Image description](/img/llama-memory-weights.png)\n",
"\n",
"With less precision, we radically decrease the memory needed to store the LLM in memory.\n",
"\n",
"In addition, we can see the importance of GPU memory bandwidth [sheet](https://docs.google.com/spreadsheets/d/1OehfHHNSn66BP2h3Bxp2NJTVX97icU0GmCXF6pK23H8/edit#gid=0)!\n",
"\n",
"A Mac M2 Max is 5-6x faster than a M1 for inference due to the larger GPU memory bandwidth.\n",
"\n",
"![Image description](/img/llama_t_put.png)\n",
"\n",
"## Quickstart\n",
"\n",
"[`Ollama`](https://ollama.ai/) is one way to easily run inference on macOS.\n",
" \n",
"The instructions [here](docs/integrations/llms/ollama) provide details, which we summarize:\n",
" \n",
"* [Download and run](https://ollama.ai/download) the app\n",
"* From command line, fetch a model from this [list of options](https://github.com/jmorganca/ollama): e.g., `ollama pull llama2`\n",
"* When the app is running, all models are automatically served on `localhost:11434`\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "86178adb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"' The first man on the moon was Neil Armstrong, who landed on the moon on July 20, 1969 as part of the Apollo 11 mission. obviously.'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain.llms import Ollama\n",
"llm = Ollama(model=\"llama2\")\n",
"llm(\"The first man on the moon was ...\")"
]
},
{
"cell_type": "markdown",
"id": "343ab645",
"metadata": {},
"source": [
"Stream tokens as they are being generated."
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "9cd83603",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" The first man to walk on the moon was Neil Armstrong, an American astronaut who was part of the Apollo 11 mission in 1969. февруари 20, 1969, Armstrong stepped out of the lunar module Eagle and onto the moon's surface, famously declaring \"That's one small step for man, one giant leap for mankind\" as he took his first steps. He was followed by fellow astronaut Edwin \"Buzz\" Aldrin, who also walked on the moon during the mission."
]
},
{
"data": {
"text/plain": [
"' The first man to walk on the moon was Neil Armstrong, an American astronaut who was part of the Apollo 11 mission in 1969. февруари 20, 1969, Armstrong stepped out of the lunar module Eagle and onto the moon\\'s surface, famously declaring \"That\\'s one small step for man, one giant leap for mankind\" as he took his first steps. He was followed by fellow astronaut Edwin \"Buzz\" Aldrin, who also walked on the moon during the mission.'"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain.callbacks.manager import CallbackManager\n",
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler \n",
"llm = Ollama(model=\"llama2\", \n",
" callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]))\n",
"llm(\"The first man on the moon was ...\")"
]
},
{
"cell_type": "markdown",
"id": "5cb27414",
"metadata": {},
"source": [
"## Environment\n",
"\n",
"Inference speed is a chllenge when running models locally (see above).\n",
"\n",
"To minimize latency, it is desiable to run models locally on GPU, which ships with many consumer laptops [e.g., Apple devices](https://www.apple.com/newsroom/2022/06/apple-unveils-m2-with-breakthrough-performance-and-capabilities/).\n",
"\n",
"And even with GPU, the available GPU memory bandwidth (as noted above) is important.\n",
"\n",
"### Running Apple silicon GPU\n",
"\n",
"`Ollama` will automatically utilize the GPU on Apple devices.\n",
" \n",
"Other frameworks require the user to set up the environment to utilize the Apple GPU.\n",
"\n",
"For example, `llama.cpp` python bindings can be configured to use the GPU via [Metal](https://developer.apple.com/metal/).\n",
"\n",
"Metal is a graphics and compute API created by Apple providing near-direct access to the GPU. \n",
"\n",
"See the [`llama.cpp`](docs/integrations/llms/llamacpp) setup [here](https://github.com/abetlen/llama-cpp-python/blob/main/docs/install/macos.md) to enable this.\n",
"\n",
"In particular, ensure that conda is using the correct virtual enviorment that you created (`miniforge3`).\n",
"\n",
"E.g., for me:\n",
"\n",
"```\n",
"conda activate /Users/rlm/miniforge3/envs/llama\n",
"```\n",
"\n",
"With the above confirmed, then:\n",
"\n",
"```\n",
"CMAKE_ARGS=\"-DLLAMA_METAL=on\" FORCE_CMAKE=1 pip install -U llama-cpp-python --no-cache-dir\n",
"```"
]
},
{
"cell_type": "markdown",
"id": "c382e79a",
"metadata": {},
"source": [
"## LLMs\n",
"\n",
"There are various ways to gain access to quantized model weights.\n",
"\n",
"1. [`HuggingFace`](https://huggingface.co/TheBloke) - Many quantized model are available for download and can be run with framework such as [`llama.cpp`](https://github.com/ggerganov/llama.cpp)\n",
"2. [`gpt4all`](https://gpt4all.io/index.html) - The model explorer offers a leaderboard of metrics and associated quantized models available for download \n",
"3. [`Ollama`](https://github.com/jmorganca/ollama) - Several models can be accessed directly via `pull`\n",
"\n",
"### Ollama\n",
"\n",
"With [Ollama](docs/integrations/llms/ollama), fetch a model via `ollama pull <model family>:<tag>`:\n",
"\n",
"* E.g., for Llama-7b: `ollama pull llama2` will download the most basic version of the model (e.g., smallest # parameters and 4 bit quantization)\n",
"* We can also specify a particular version from the [model list](https://github.com/jmorganca/ollama), e.g., `ollama pull llama2:13b`\n",
"* See the full set of parameters on the [API reference page](https://api.python.langchain.com/en/latest/llms/langchain.llms.ollama.Ollama.html)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "8ecd2f78",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"' Sure! Here\\'s the answer, broken down step by step:\\n\\nThe first man on the moon was... Neil Armstrong.\\n\\nHere\\'s how I arrived at that answer:\\n\\n1. The first manned mission to land on the moon was Apollo 11.\\n2. The mission included three astronauts: Neil Armstrong, Edwin \"Buzz\" Aldrin, and Michael Collins.\\n3. Neil Armstrong was the mission commander and the first person to set foot on the moon.\\n4. On July 20, 1969, Armstrong stepped out of the lunar module Eagle and onto the moon\\'s surface, famously declaring \"That\\'s one small step for man, one giant leap for mankind.\"\\n\\nSo, the first man on the moon was Neil Armstrong!'"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain.llms import Ollama\n",
"llm = Ollama(model=\"llama2:13b\")\n",
"llm(\"The first man on the moon was ... think step by step\")"
]
},
{
"cell_type": "markdown",
"id": "07c8c0d1",
"metadata": {},
"source": [
"### Llama.cpp\n",
"\n",
"Llama.cpp is compatible with a [broad set of models](https://github.com/ggerganov/llama.cpp).\n",
"\n",
"For example, below we run inference on `llama2-13b` with 4 bit quantization downloaded from [HuggingFace](https://huggingface.co/TheBloke/Llama-2-13B-GGML/tree/main).\n",
"\n",
"As noted above, see the [API reference](https://api.python.langchain.com/en/latest/llms/langchain.llms.llamacpp.LlamaCpp.html?highlight=llamacpp#langchain.llms.llamacpp.LlamaCpp) for the full set of parameters. \n",
"\n",
"From the [llama.cpp docs](https://python.langchain.com/docs/integrations/llms/llamacpp), a few are worth commenting on:\n",
"\n",
"`n_gpu_layers`: number of layers to be loaded into GPU memory\n",
"\n",
"* Value: 1\n",
"* Meaning: Only one layer of the model will be loaded into GPU memory (1 is often sufficient).\n",
"\n",
"`n_batch`: number of tokens the model should process in parallel \n",
"* Value: n_batch\n",
"* Meaning: It's recommended to choose a value between 1 and n_ctx (which in this case is set to 2048)\n",
"\n",
"`n_ctx`: Token context window .\n",
"* Value: 2048\n",
"* Meaning: The model will consider a window of 2048 tokens at a time\n",
"\n",
"`f16_kv`: whether the model should use half-precision for the key/value cache\n",
"* Value: True\n",
"* Meaning: The model will use half-precision, which can be more memory efficient; Metal only support True."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5eba38dc",
"metadata": {},
"outputs": [],
"source": [
"pip install llama-cpp-python"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "9d5f94b5",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"objc[10142]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x2a0c4c208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2c28bc208). One of the two will be used. Which one is undefined.\n",
"llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
"llama_model_load_internal: format = ggjt v3 (latest)\n",
"llama_model_load_internal: n_vocab = 32000\n",
"llama_model_load_internal: n_ctx = 2048\n",
"llama_model_load_internal: n_embd = 5120\n",
"llama_model_load_internal: n_mult = 256\n",
"llama_model_load_internal: n_head = 40\n",
"llama_model_load_internal: n_layer = 40\n",
"llama_model_load_internal: n_rot = 128\n",
"llama_model_load_internal: freq_base = 10000.0\n",
"llama_model_load_internal: freq_scale = 1\n",
"llama_model_load_internal: ftype = 2 (mostly Q4_0)\n",
"llama_model_load_internal: n_ff = 13824\n",
"llama_model_load_internal: model size = 13B\n",
"llama_model_load_internal: ggml ctx size = 0.09 MB\n",
"llama_model_load_internal: mem required = 8953.71 MB (+ 1608.00 MB per state)\n",
"llama_new_context_with_model: kv self size = 1600.00 MB\n",
"ggml_metal_init: allocating\n",
"ggml_metal_init: using MPS\n",
"ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
"ggml_metal_init: loaded kernel_add 0x47774af60\n",
"ggml_metal_init: loaded kernel_mul 0x47774bc00\n",
"ggml_metal_init: loaded kernel_mul_row 0x47774c230\n",
"ggml_metal_init: loaded kernel_scale 0x47774c890\n",
"ggml_metal_init: loaded kernel_silu 0x47774cef0\n",
"ggml_metal_init: loaded kernel_relu 0x10e33e500\n",
"ggml_metal_init: loaded kernel_gelu 0x47774b2f0\n",
"ggml_metal_init: loaded kernel_soft_max 0x47771a580\n",
"ggml_metal_init: loaded kernel_diag_mask_inf 0x47774dab0\n",
"ggml_metal_init: loaded kernel_get_rows_f16 0x47774e110\n",
"ggml_metal_init: loaded kernel_get_rows_q4_0 0x47774e7d0\n",
"ggml_metal_init: loaded kernel_get_rows_q4_1 0x13efd7170\n",
"ggml_metal_init: loaded kernel_get_rows_q2_K 0x13efd73d0\n",
"ggml_metal_init: loaded kernel_get_rows_q3_K 0x13efd7630\n",
"ggml_metal_init: loaded kernel_get_rows_q4_K 0x13efd7890\n",
"ggml_metal_init: loaded kernel_get_rows_q5_K 0x4744c9740\n",
"ggml_metal_init: loaded kernel_get_rows_q6_K 0x4744ca6b0\n",
"ggml_metal_init: loaded kernel_rms_norm 0x4744cb250\n",
"ggml_metal_init: loaded kernel_norm 0x4744cb970\n",
"ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x10e33f700\n",
"ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x10e33fcd0\n",
"ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x4744cc2d0\n",
"ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x4744cc6f0\n",
"ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x4744cd6b0\n",
"ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x4744cde20\n",
"ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x10e33ff30\n",
"ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x10e340190\n",
"ggml_metal_init: loaded kernel_rope 0x10e3403f0\n",
"ggml_metal_init: loaded kernel_alibi_f32 0x10e340de0\n",
"ggml_metal_init: loaded kernel_cpy_f32_f16 0x10e3416d0\n",
"ggml_metal_init: loaded kernel_cpy_f32_f32 0x10e342080\n",
"ggml_metal_init: loaded kernel_cpy_f16_f16 0x10e342ca0\n",
"ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
"ggml_metal_init: hasUnifiedMemory = true\n",
"ggml_metal_init: maxTransferRate = built-in GPU\n",
"ggml_metal_add_buffer: allocated 'data ' buffer, size = 6984.06 MB, ( 6986.19 / 21845.34)\n",
"ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1032.00 MB, ( 8018.19 / 21845.34)\n",
"ggml_metal_add_buffer: allocated 'kv ' buffer, size = 1602.00 MB, ( 9620.19 / 21845.34)\n",
"ggml_metal_add_buffer: allocated 'scr0 ' buffer, size = 426.00 MB, (10046.19 / 21845.34)\n",
"ggml_metal_add_buffer: allocated 'scr1 ' buffer, size = 512.00 MB, (10558.19 / 21845.34)\n",
"AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n"
]
}
],
"source": [
"from langchain.llms import LlamaCpp\n",
"llm = LlamaCpp(\n",
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
" n_gpu_layers=1,\n",
" n_batch=512,\n",
" n_ctx=2048,\n",
" f16_kv=True, \n",
" callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),\n",
" verbose=True,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "f56f5168",
"metadata": {},
"source": [
"The console log will show the the below to indicate Metal was enabled properly from steps above:\n",
"```\n",
"ggml_metal_init: allocating\n",
"ggml_metal_init: using MPS\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "7890a077",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Llama.generate: prefix-match hit\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" and use logical reasoning to figure out who the first man on the moon was.\n",
"\n",
"Here are some clues:\n",
"\n",
"1. The first man on the moon was an American.\n",
"2. He was part of the Apollo 11 mission.\n",
"3. He stepped out of the lunar module and became the first person to set foot on the moon's surface.\n",
"4. His last name is Armstrong.\n",
"\n",
"Now, let's use our reasoning skills to figure out who the first man on the moon was. Based on clue #1, we know that the first man on the moon was an American. Clue #2 tells us that he was part of the Apollo 11 mission. Clue #3 reveals that he was the first person to set foot on the moon's surface. And finally, clue #4 gives us his last name: Armstrong.\n",
"Therefore, the first man on the moon was Neil Armstrong!"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"llama_print_timings: load time = 9623.21 ms\n",
"llama_print_timings: sample time = 143.77 ms / 203 runs ( 0.71 ms per token, 1412.01 tokens per second)\n",
"llama_print_timings: prompt eval time = 485.94 ms / 7 tokens ( 69.42 ms per token, 14.40 tokens per second)\n",
"llama_print_timings: eval time = 6385.16 ms / 202 runs ( 31.61 ms per token, 31.64 tokens per second)\n",
"llama_print_timings: total time = 7279.28 ms\n"
]
},
{
"data": {
"text/plain": [
"\" and use logical reasoning to figure out who the first man on the moon was.\\n\\nHere are some clues:\\n\\n1. The first man on the moon was an American.\\n2. He was part of the Apollo 11 mission.\\n3. He stepped out of the lunar module and became the first person to set foot on the moon's surface.\\n4. His last name is Armstrong.\\n\\nNow, let's use our reasoning skills to figure out who the first man on the moon was. Based on clue #1, we know that the first man on the moon was an American. Clue #2 tells us that he was part of the Apollo 11 mission. Clue #3 reveals that he was the first person to set foot on the moon's surface. And finally, clue #4 gives us his last name: Armstrong.\\nTherefore, the first man on the moon was Neil Armstrong!\""
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"llm(\"The first man on the moon was ... Let's think step by step\")"
]
},
{
"cell_type": "markdown",
"id": "831ddf7c",
"metadata": {},
"source": [
"### GPT4All\n",
"\n",
"We can use model weights downloaded from [GPT4All](https://python.langchain.com/docs/integrations/llms/gpt4all) model explorer.\n",
"\n",
"Similar to what is shown above, we can run inference and use [the API reference](https://api.python.langchain.com/en/latest/llms/langchain.llms.gpt4all.GPT4All.html?highlight=gpt4all#langchain.llms.gpt4all.GPT4All) to set parameters of interest."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e27baf6e",
"metadata": {},
"outputs": [],
"source": [
"pip install gpt4all"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "b55a2147",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found model file at /Users/rlm/Desktop/Code/gpt4all/models/nous-hermes-13b.ggmlv3.q4_0.bin\n",
"llama_new_context_with_model: max tensor size = 87.89 MB\n",
"llama_new_context_with_model: max tensor size = 87.89 MB\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"llama.cpp: using Metal\n",
"llama.cpp: loading model from /Users/rlm/Desktop/Code/gpt4all/models/nous-hermes-13b.ggmlv3.q4_0.bin\n",
"llama_model_load_internal: format = ggjt v3 (latest)\n",
"llama_model_load_internal: n_vocab = 32001\n",
"llama_model_load_internal: n_ctx = 2048\n",
"llama_model_load_internal: n_embd = 5120\n",
"llama_model_load_internal: n_mult = 256\n",
"llama_model_load_internal: n_head = 40\n",
"llama_model_load_internal: n_layer = 40\n",
"llama_model_load_internal: n_rot = 128\n",
"llama_model_load_internal: ftype = 2 (mostly Q4_0)\n",
"llama_model_load_internal: n_ff = 13824\n",
"llama_model_load_internal: n_parts = 1\n",
"llama_model_load_internal: model size = 13B\n",
"llama_model_load_internal: ggml ctx size = 0.09 MB\n",
"llama_model_load_internal: mem required = 9031.71 MB (+ 1608.00 MB per state)\n",
"llama_new_context_with_model: kv self size = 1600.00 MB\n",
"ggml_metal_init: allocating\n",
"ggml_metal_init: using MPS\n",
"ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/ggml-metal.metal'\n",
"ggml_metal_init: loaded kernel_add 0x37944d850\n",
"ggml_metal_init: loaded kernel_mul 0x37944f350\n",
"ggml_metal_init: loaded kernel_mul_row 0x37944fdd0\n",
"ggml_metal_init: loaded kernel_scale 0x3794505a0\n",
"ggml_metal_init: loaded kernel_silu 0x379450800\n",
"ggml_metal_init: loaded kernel_relu 0x379450a60\n",
"ggml_metal_init: loaded kernel_gelu 0x379450cc0\n",
"ggml_metal_init: loaded kernel_soft_max 0x379450ff0\n",
"ggml_metal_init: loaded kernel_diag_mask_inf 0x379451250\n",
"ggml_metal_init: loaded kernel_get_rows_f16 0x3794514b0\n",
"ggml_metal_init: loaded kernel_get_rows_q4_0 0x379451710\n",
"ggml_metal_init: loaded kernel_get_rows_q4_1 0x379451970\n",
"ggml_metal_init: loaded kernel_get_rows_q2_k 0x379451bd0\n",
"ggml_metal_init: loaded kernel_get_rows_q3_k 0x379451e30\n",
"ggml_metal_init: loaded kernel_get_rows_q4_k 0x379452090\n",
"ggml_metal_init: loaded kernel_get_rows_q5_k 0x3794522f0\n",
"ggml_metal_init: loaded kernel_get_rows_q6_k 0x379452550\n",
"ggml_metal_init: loaded kernel_rms_norm 0x3794527b0\n",
"ggml_metal_init: loaded kernel_norm 0x379452a10\n",
"ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x379452c70\n",
"ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x379452ed0\n",
"ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x379453130\n",
"ggml_metal_init: loaded kernel_mul_mat_q2_k_f32 0x379453390\n",
"ggml_metal_init: loaded kernel_mul_mat_q3_k_f32 0x3794535f0\n",
"ggml_metal_init: loaded kernel_mul_mat_q4_k_f32 0x379453850\n",
"ggml_metal_init: loaded kernel_mul_mat_q5_k_f32 0x379453ab0\n",
"ggml_metal_init: loaded kernel_mul_mat_q6_k_f32 0x379453d10\n",
"ggml_metal_init: loaded kernel_rope 0x379453f70\n",
"ggml_metal_init: loaded kernel_alibi_f32 0x3794541d0\n",
"ggml_metal_init: loaded kernel_cpy_f32_f16 0x379454430\n",
"ggml_metal_init: loaded kernel_cpy_f32_f32 0x379454690\n",
"ggml_metal_init: loaded kernel_cpy_f16_f16 0x3794548f0\n",
"ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
"ggml_metal_init: hasUnifiedMemory = true\n",
"ggml_metal_init: maxTransferRate = built-in GPU\n",
"ggml_metal_add_buffer: allocated 'data ' buffer, size = 6984.06 MB, (17542.94 / 21845.34)\n",
"ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1024.00 MB, (18566.94 / 21845.34)\n",
"ggml_metal_add_buffer: allocated 'kv ' buffer, size = 1602.00 MB, (20168.94 / 21845.34)\n",
"ggml_metal_add_buffer: allocated 'scr0 ' buffer, size = 512.00 MB, (20680.94 / 21845.34)\n",
"ggml_metal_add_buffer: allocated 'scr1 ' buffer, size = 512.00 MB, (21192.94 / 21845.34)\n",
"ggml_metal_free: deallocating\n"
]
}
],
"source": [
"from langchain.llms import GPT4All\n",
"llm = GPT4All(model=\"/Users/rlm/Desktop/Code/gpt4all/models/nous-hermes-13b.ggmlv3.q4_0.bin\")"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "e3d4526f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\".\\n1) The United States decides to send a manned mission to the moon.2) They choose their best astronauts and train them for this specific mission.3) They build a spacecraft that can take humans to the moon, called the Lunar Module (LM).4) They also create a larger spacecraft, called the Saturn V rocket, which will launch both the LM and the Command Service Module (CSM), which will carry the astronauts into orbit.5) The mission is planned down to the smallest detail: from the trajectory of the rockets to the exact movements of the astronauts during their moon landing.6) On July 16, 1969, the Saturn V rocket launches from Kennedy Space Center in Florida, carrying the Apollo 11 mission crew into space.7) After one and a half orbits around the Earth, the LM separates from the CSM and begins its descent to the moon's surface.8) On July 20, 1969, at 2:56 pm EDT (GMT-4), Neil Armstrong becomes the first man on the moon. He speaks these\""
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"llm(\"The first man on the moon was ... Let's think step by step\")"
]
},
{
"cell_type": "markdown",
"id": "6b84e543",
"metadata": {},
"source": [
"## Prompts\n",
"\n",
"Some LLMs will benefit from specific prompts.\n",
"\n",
"For example, llama2 can use [special tokens](https://twitter.com/RLanceMartin/status/1681879318493003776?s=20).\n",
"\n",
"We can use `ConditionalPromptSelector` to set prompt based on the model type."
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "d082b10a",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
"llama_model_load_internal: format = ggjt v3 (latest)\n",
"llama_model_load_internal: n_vocab = 32000\n",
"llama_model_load_internal: n_ctx = 2048\n",
"llama_model_load_internal: n_embd = 5120\n",
"llama_model_load_internal: n_mult = 256\n",
"llama_model_load_internal: n_head = 40\n",
"llama_model_load_internal: n_layer = 40\n",
"llama_model_load_internal: n_rot = 128\n",
"llama_model_load_internal: freq_base = 10000.0\n",
"llama_model_load_internal: freq_scale = 1\n",
"llama_model_load_internal: ftype = 2 (mostly Q4_0)\n",
"llama_model_load_internal: n_ff = 13824\n",
"llama_model_load_internal: model size = 13B\n",
"llama_model_load_internal: ggml ctx size = 0.09 MB\n",
"llama_model_load_internal: mem required = 8953.71 MB (+ 1608.00 MB per state)\n",
"llama_new_context_with_model: kv self size = 1600.00 MB\n",
"ggml_metal_init: allocating\n",
"ggml_metal_init: using MPS\n",
"ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
"ggml_metal_init: loaded kernel_add 0x4744d09d0\n",
"ggml_metal_init: loaded kernel_mul 0x3781cb3d0\n",
"ggml_metal_init: loaded kernel_mul_row 0x37813bb60\n",
"ggml_metal_init: loaded kernel_scale 0x474481080\n",
"ggml_metal_init: loaded kernel_silu 0x4744d29f0\n",
"ggml_metal_init: loaded kernel_relu 0x3781254c0\n",
"ggml_metal_init: loaded kernel_gelu 0x47447f280\n",
"ggml_metal_init: loaded kernel_soft_max 0x4744cf470\n",
"ggml_metal_init: loaded kernel_diag_mask_inf 0x4744cf6d0\n",
"ggml_metal_init: loaded kernel_get_rows_f16 0x4744cf930\n",
"ggml_metal_init: loaded kernel_get_rows_q4_0 0x4744cfb90\n",
"ggml_metal_init: loaded kernel_get_rows_q4_1 0x4744cfdf0\n",
"ggml_metal_init: loaded kernel_get_rows_q2_K 0x4744d0050\n",
"ggml_metal_init: loaded kernel_get_rows_q3_K 0x4744ce980\n",
"ggml_metal_init: loaded kernel_get_rows_q4_K 0x4744cebe0\n",
"ggml_metal_init: loaded kernel_get_rows_q5_K 0x4744cee40\n",
"ggml_metal_init: loaded kernel_get_rows_q6_K 0x4744cf0a0\n",
"ggml_metal_init: loaded kernel_rms_norm 0x474482450\n",
"ggml_metal_init: loaded kernel_norm 0x4744826b0\n",
"ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x474482910\n",
"ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x474482b70\n",
"ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x474482dd0\n",
"ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x474483030\n",
"ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x474483290\n",
"ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x4744834f0\n",
"ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x474483750\n",
"ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x4744839b0\n",
"ggml_metal_init: loaded kernel_rope 0x474483c10\n",
"ggml_metal_init: loaded kernel_alibi_f32 0x474483e70\n",
"ggml_metal_init: loaded kernel_cpy_f32_f16 0x4744840d0\n",
"ggml_metal_init: loaded kernel_cpy_f32_f32 0x474484330\n",
"ggml_metal_init: loaded kernel_cpy_f16_f16 0x474484590\n",
"ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
"ggml_metal_init: hasUnifiedMemory = true\n",
"ggml_metal_init: maxTransferRate = built-in GPU\n",
"ggml_metal_add_buffer: allocated 'data ' buffer, size = 6984.06 MB, ( 6986.94 / 21845.34)\n",
"ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1032.00 MB, ( 8018.94 / 21845.34)\n",
"ggml_metal_add_buffer: allocated 'kv ' buffer, size = 1602.00 MB, ( 9620.94 / 21845.34)\n",
"ggml_metal_add_buffer: allocated 'scr0 ' buffer, size = 426.00 MB, (10046.94 / 21845.34)\n",
"ggml_metal_add_buffer: allocated 'scr1 ' buffer, size = 512.00 MB, (10558.94 / 21845.34)\n",
"AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n"
]
}
],
"source": [
"# Set our LLM\n",
"llm = LlamaCpp(\n",
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
" n_gpu_layers=1,\n",
" n_batch=512,\n",
" n_ctx=2048,\n",
" f16_kv=True, \n",
" callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),\n",
" verbose=True,\n",
")"
]
},
{
"cell_type": "markdown",
"id": "66656084",
"metadata": {},
"source": [
"Set the associated prompt."
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "8555f5bf",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"PromptTemplate(input_variables=['question'], output_parser=None, partial_variables={}, template='<<SYS>> \\n You are an assistant tasked with improving Google search results. \\n <</SYS>> \\n\\n [INST] Generate THREE Google search queries that are similar to this question. The output should be a numbered list of questions and each should have a question mark at the end: \\n\\n {question} [/INST]', template_format='f-string', validate_template=True)"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain import PromptTemplate, LLMChain\n",
"from langchain.chains.prompt_selector import ConditionalPromptSelector\n",
"\n",
"DEFAULT_LLAMA_SEARCH_PROMPT = PromptTemplate(\n",
" input_variables=[\"question\"],\n",
" template=\"\"\"<<SYS>> \\n You are an assistant tasked with improving Google search \\\n",
"results. \\n <</SYS>> \\n\\n [INST] Generate THREE Google search queries that \\\n",
"are similar to this question. The output should be a numbered list of questions \\\n",
"and each should have a question mark at the end: \\n\\n {question} [/INST]\"\"\",\n",
")\n",
"\n",
"DEFAULT_SEARCH_PROMPT = PromptTemplate(\n",
" input_variables=[\"question\"],\n",
" template=\"\"\"You are an assistant tasked with improving Google search \\\n",
"results. Generate THREE Google search queries that are similar to \\\n",
"this question. The output should be a numbered list of questions and each \\\n",
"should have a question mark at the end: {question}\"\"\",\n",
")\n",
"\n",
"QUESTION_PROMPT_SELECTOR = ConditionalPromptSelector(\n",
" default_prompt=DEFAULT_SEARCH_PROMPT,\n",
" conditionals=[\n",
" (lambda llm: isinstance(llm, LlamaCpp), DEFAULT_LLAMA_SEARCH_PROMPT)\n",
" ],\n",
" )\n",
"\n",
"prompt = QUESTION_PROMPT_SELECTOR.get_prompt(llm)\n",
"prompt"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "d0aedfd2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Sure! Here are three similar search queries with a question mark at the end:\n",
"\n",
"1. Which NBA team did LeBron James lead to a championship in the year he was drafted?\n",
"2. Who won the Grammy Awards for Best New Artist and Best Female Pop Vocal Performance in the same year that Lady Gaga was born?\n",
"3. What MLB team did Babe Ruth play for when he hit 60 home runs in a single season?"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"llama_print_timings: load time = 14943.19 ms\n",
"llama_print_timings: sample time = 72.93 ms / 101 runs ( 0.72 ms per token, 1384.87 tokens per second)\n",
"llama_print_timings: prompt eval time = 14942.95 ms / 93 tokens ( 160.68 ms per token, 6.22 tokens per second)\n",
"llama_print_timings: eval time = 3430.85 ms / 100 runs ( 34.31 ms per token, 29.15 tokens per second)\n",
"llama_print_timings: total time = 18578.26 ms\n"
]
},
{
"data": {
"text/plain": [
"' Sure! Here are three similar search queries with a question mark at the end:\\n\\n1. Which NBA team did LeBron James lead to a championship in the year he was drafted?\\n2. Who won the Grammy Awards for Best New Artist and Best Female Pop Vocal Performance in the same year that Lady Gaga was born?\\n3. What MLB team did Babe Ruth play for when he hit 60 home runs in a single season?'"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Chain\n",
"llm_chain = LLMChain(prompt=prompt,llm=llm)\n",
"question = \"What NFL team won the Super Bowl in the year that Justin Bieber was born?\"\n",
"llm_chain.run({\"question\":question})"
]
},
{
"cell_type": "markdown",
"id": "6ba66260",
"metadata": {},
"source": [
"## Use cases\n",
"\n",
"Given an `llm` created from one of the models above, you can use it for [many use cases](docs/use_cases).\n",
"\n",
"For example, here is a guide to [RAG](docs/use_cases/question_answering/how_to/local_retrieval_qa) with local LLMs.\n",
"\n",
"In general, use cases for local model can be driven by at least two factors:\n",
"\n",
"* `Privacy`: private data (e.g., journals, etc) that a user does not want to share \n",
"* `Cost`: text preprocessing (extraction/tagging), summarization, and agent simulations are token-use-intensive tasks\n",
"\n",
"There are a few approach to support specific use-cases: \n",
"\n",
"* Fine-tuning (e.g., [gpt-llm-trainer](https://github.com/mshumer/gpt-llm-trainer), [Anyscale](https://www.anyscale.com/blog/fine-tuning-llama-2-a-comprehensive-case-study-for-tailoring-models-to-unique-applications)) \n",
"* [Function-calling](https://github.com/MeetKai/functionary/tree/main) for use-cases like extraction or tagging\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,121 @@
# Pydantic Compatibility
- Pydantic v2 was released in June, 2023 (https://docs.pydantic.dev/2.0/blog/pydantic-v2-final/)
- v2 contains has a number of breaking changes (https://docs.pydantic.dev/2.0/migration/)
- Pydantic v2 and v1 are under the same package name, so both versions cannot be installed at the same time
## LangChain Pydantic Migration Plan
Langchain will carry out the migration to pydantic v2 in two steps:
1. 2023-08-17: LangChain will allow users to install either Pydantic V1 or V2.
* Internally LangChain will continue to [use V1](https://docs.pydantic.dev/latest/migration/#continue-using-pydantic-v1-features).
* During this time, users can pin their pydantic version to v1 to avoid breaking changes, or start a partial
migration using pydantic v2 throughout their code, but avoiding mixing v1 and v2 code for LangChain (see below).
2. 2023-08-25: Langchain will migrate internally to using V2 code.
* Users will have to upgrade to V2 as well to use LangChain.
* Users should stop using the `pydantic.v1` namespace when using LangChain.
* See the [bump-pydantic package](https://github.com/pydantic/bump-pydantic) to help with the upgrade process.
## Between 2023-08-17 and 2023-08-25 releases
User can either pin to pydantic v1, and upgrade their code in one go once LangChain has migrated to v2 internally, or they can start a partial migration to v2, but must avoid mixing v1 and v2 code for LangChain.
Below are two examples of showing how to avoid mixing pydantic v1 and v2 code in
the case of inheritance and in the case of passing objects to LangChain.
**Example 1: Extending via inheritance**
**YES**
```python
from pydantic.v1 import root_validator, validator
class CustomTool(BaseTool): # BaseTool is v1 code
x: int = Field(default=1)
def _run(*args, **kwargs):
return "hello"
@validator('x') # v1 code
@classmethod
def validate_x(cls, x: int) -> int:
return 1
CustomTool(
name='custom_tool',
description="hello",
x=1,
)
```
Mixing Pydantic v2 primitives with Pydantic v1 primitives can raise cryptic errors
**NO**
```python
from pydantic import Field, field_validator # pydantic v2
class CustomTool(BaseTool): # BaseTool is v1 code
x: int = Field(default=1)
def _run(*args, **kwargs):
return "hello"
@field_validator('x') # v2 code
@classmethod
def validate_x(cls, x: int) -> int:
return 1
CustomTool(
name='custom_tool',
description="hello",
x=1,
)
```
**Example 2: Passing objects to LangChain**
**YES**
```python
from langchain.tools.base import Tool
from pydantic.v1 import BaseModel, Field # <-- Uses v1 namespace
class CalculatorInput(BaseModel):
question: str = Field()
Tool.from_function( # <-- tool uses v1 namespace
func=lambda question: 'hello',
name="Calculator",
description="useful for when you need to answer questions about math",
args_schema=CalculatorInput
)
```
**NO**
```python
from langchain.tools.base import Tool
from pydantic import BaseModel, Field # <-- Uses v2 namespace
class CalculatorInput(BaseModel):
question: str = Field()
Tool.from_function( # <-- tool uses v1 namespace
func=lambda question: 'hello',
name="Calculator",
description="useful for when you need to answer questions about math",
args_schema=CalculatorInput
)
```
## After 2023-08-25 release
* Users must upgrade to v2
* Users should not pass `pydantic.v1` derived objects to LangChain or rely on `pydantic.v1` when extending functionality

View File

@ -147,7 +147,7 @@
" api_key=os.environ[\"ARGILLA_API_KEY\"],\n",
")\n",
"\n",
"dataset.push_to_argilla(\"langchain-dataset\")"
"dataset.push_to_argilla(\"langchain-dataset\");"
]
},
{

View File

@ -7,12 +7,12 @@
"source": [
"# Context\n",
"\n",
"![Context - Product Analytics for AI Chatbots](https://go.getcontext.ai/langchain.png)\n",
"![Context - User Analytics for LLM Powered Products](https://with.context.ai/langchain.png)\n",
"\n",
"[Context](https://getcontext.ai/) provides product analytics for AI chatbots.\n",
"[Context](https://context.ai/) provides user analytics for LLM powered products and features.\n",
"\n",
"Context helps you understand how users are interacting with your AI chat products.\n",
"Gain critical insights, optimise poor experiences, and minimise brand risks.\n"
"With Context, you can start understanding your users and improving their experiences in less than 30 minutes.\n",
"\n"
]
},
{
@ -55,7 +55,7 @@
"\n",
"To get your Context API token:\n",
"\n",
"1. Go to the settings page within your Context account (https://go.getcontext.ai/settings).\n",
"1. Go to the settings page within your Context account (https://with.context.ai/settings).\n",
"2. Generate a new API Token.\n",
"3. Store this token somewhere secure."
]
@ -207,7 +207,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
"version": "3.9.1"
},
"vscode": {
"interpreter": {

View File

@ -0,0 +1,382 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": true,
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# Label Studio\n",
"\n",
"<div>\n",
"<img src=\"https://labelstudio-pub.s3.amazonaws.com/lc/open-source-data-labeling-platform.png\" width=\"400\"/>\n",
"</div>\n",
"\n",
"Label Studio is an open-source data labeling platform that provides LangChain with flexibility when it comes to labeling data for fine-tuning large language models (LLMs). It also enables the preparation of custom training data and the collection and evaluation of responses through human feedback.\n",
"\n",
"In this guide, you will learn how to connect a LangChain pipeline to Label Studio to:\n",
"\n",
"- Aggregate all input prompts, conversations, and responses in a single LabelStudio project. This consolidates all the data in one place for easier labeling and analysis.\n",
"- Refine prompts and responses to create a dataset for supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) scenarios. The labeled data can be used to further train the LLM to improve its performance.\n",
"- Evaluate model responses through human feedback. LabelStudio provides an interface for humans to review and provide feedback on model responses, allowing evaluation and iteration."
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## Installation and setup"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"First install latest versions of Label Studio and Label Studio API client:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"!pip install -U label-studio label-studio-sdk openai"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"Next, run `label-studio` on the command line to start the local LabelStudio instance at `http://localhost:8080`. See the [Label Studio installation guide](https://labelstud.io/guide/install) for more options."
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"You'll need a token to make API calls.\n",
"\n",
"Open your LabelStudio instance in your browser, go to `Account & Settings > Access Token` and copy the key.\n",
"\n",
"Set environment variables with your LabelStudio URL, API key and OpenAI API key:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ['LABEL_STUDIO_URL'] = '<YOUR-LABEL-STUDIO-URL>' # e.g. http://localhost:8080\n",
"os.environ['LABEL_STUDIO_API_KEY'] = '<YOUR-LABEL-STUDIO-API-KEY>'\n",
"os.environ['OPENAI_API_KEY'] = '<YOUR-OPENAI-API-KEY>'"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## Collecting LLMs prompts and responses"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The data used for labeling is stored in projects within Label Studio. Every project is identified by an XML configuration that details the specifications for input and output data. \n",
"\n",
"Create a project that takes human input in text format and outputs an editable LLM response in a text area:\n",
"\n",
"```xml\n",
"<View>\n",
"<Style>\n",
" .prompt-box {\n",
" background-color: white;\n",
" border-radius: 10px;\n",
" box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1);\n",
" padding: 20px;\n",
" }\n",
"</Style>\n",
"<View className=\"root\">\n",
" <View className=\"prompt-box\">\n",
" <Text name=\"prompt\" value=\"$prompt\"/>\n",
" </View>\n",
" <TextArea name=\"response\" toName=\"prompt\"\n",
" maxSubmissions=\"1\" editable=\"true\"\n",
" required=\"true\"/>\n",
"</View>\n",
"<Header value=\"Rate the response:\"/>\n",
"<Rating name=\"rating\" toName=\"prompt\"/>\n",
"</View>\n",
"```\n",
"\n",
"1. To create a project in Label Studio, click on the \"Create\" button. \n",
"2. Enter a name for your project in the \"Project Name\" field, such as `My Project`.\n",
"3. Navigate to `Labeling Setup > Custom Template` and paste the XML configuration provided above."
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"You can collect input LLM prompts and output responses in a LabelStudio project, connecting it via `LabelStudioCallbackHandler`:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"from langchain.llms import OpenAI\n",
"from langchain.callbacks import LabelStudioCallbackHandler\n",
"\n",
"llm = OpenAI(\n",
" temperature=0,\n",
" callbacks=[\n",
" LabelStudioCallbackHandler(\n",
" project_name=\"My Project\"\n",
" )]\n",
")\n",
"print(llm(\"Tell me a joke\"))"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"In the Label Studio, open `My Project`. You will see the prompts, responses, and metadata like the model name. "
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## Collecting Chat model Dialogues"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can also track and display full chat dialogues in LabelStudio, with the ability to rate and modify the last response:\n",
"\n",
"1. Open Label Studio and click on the \"Create\" button.\n",
"2. Enter a name for your project in the \"Project Name\" field, such as `New Project with Chat`.\n",
"3. Navigate to Labeling Setup > Custom Template and paste the following XML configuration:\n",
"\n",
"```xml\n",
"<View>\n",
"<View className=\"root\">\n",
" <Paragraphs name=\"dialogue\"\n",
" value=\"$prompt\"\n",
" layout=\"dialogue\"\n",
" textKey=\"content\"\n",
" nameKey=\"role\"\n",
" granularity=\"sentence\"/>\n",
" <Header value=\"Final response:\"/>\n",
" <TextArea name=\"response\" toName=\"dialogue\"\n",
" maxSubmissions=\"1\" editable=\"true\"\n",
" required=\"true\"/>\n",
"</View>\n",
"<Header value=\"Rate the response:\"/>\n",
"<Rating name=\"rating\" toName=\"dialogue\"/>\n",
"</View>\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.schema import HumanMessage, SystemMessage\n",
"from langchain.callbacks import LabelStudioCallbackHandler\n",
"\n",
"chat_llm = ChatOpenAI(callbacks=[\n",
" LabelStudioCallbackHandler(\n",
" mode=\"chat\",\n",
" project_name=\"New Project with Chat\",\n",
" )\n",
"])\n",
"llm_results = chat_llm([\n",
" SystemMessage(content=\"Always use a lot of emojis\"),\n",
" HumanMessage(content=\"Tell me a joke\")\n",
"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In Label Studio, open \"New Project with Chat\". Click on a created task to view dialog history and edit/annotate responses."
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## Custom Labeling Configuration"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"You can modify the default labeling configuration in LabelStudio to add more target labels like response sentiment, relevance, and many [other types annotator's feedback](https://labelstud.io/tags/).\n",
"\n",
"New labeling configuration can be added from UI: go to `Settings > Labeling Interface` and set up a custom configuration with additional tags like `Choices` for sentiment or `Rating` for relevance. Keep in mind that [`TextArea` tag](https://labelstud.io/tags/textarea) should be presented in any configuration to display the LLM responses.\n",
"\n",
"Alternatively, you can specify the labeling configuration on the initial call before project creation:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"ls = LabelStudioCallbackHandler(project_config='''\n",
"<View>\n",
"<Text name=\"prompt\" value=\"$prompt\"/>\n",
"<TextArea name=\"response\" toName=\"prompt\"/>\n",
"<TextArea name=\"user_feedback\" toName=\"prompt\"/>\n",
"<Rating name=\"rating\" toName=\"prompt\"/>\n",
"<Choices name=\"sentiment\" toName=\"prompt\">\n",
" <Choice value=\"Positive\"/>\n",
" <Choice value=\"Negative\"/>\n",
"</Choices>\n",
"</View>\n",
"''')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note that if the project doesn't exist, it will be created with the specified labeling configuration."
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## Other parameters"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"The `LabelStudioCallbackHandler` accepts several optional parameters:\n",
"\n",
"- **api_key** - Label Studio API key. Overrides environmental variable `LABEL_STUDIO_API_KEY`.\n",
"- **url** - Label Studio URL. Overrides `LABEL_STUDIO_URL`, default `http://localhost:8080`.\n",
"- **project_id** - Existing Label Studio project ID. Overrides `LABEL_STUDIO_PROJECT_ID`. Stores data in this project.\n",
"- **project_name** - Project name if project ID not specified. Creates a new project. Default is `\"LangChain-%Y-%m-%d\"` formatted with the current date.\n",
"- **project_config** - [custom labeling configuration](#custom-labeling-configuration)\n",
"- **mode**: use this shortcut to create target configuration from scratch:\n",
" - `\"prompt\"` - Single prompt, single response. Default.\n",
" - `\"chat\"` - Multi-turn chat mode.\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "labelops",
"language": "python",
"name": "labelops"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 1
}

View File

@ -0,0 +1,88 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ERNIE-Bot Chat\n",
"\n",
"[ERNIE-Bot](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/jlil56u11) is a large language model developed by Baidu, covering a huge amount of Chinese data.\n",
"This notebook covers how to get started with ErnieBot chat models."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"from langchain.chat_models import ErnieBotChat\n",
"from langchain.schema import HumanMessage"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"chat = ErnieBotChat(ernie_client_id='YOUR_CLIENT_ID', ernie_client_secret='YOUR_CLIENT_SECRET')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"or you can set `client_id` and `client_secret` in your environment variables\n",
"```bash\n",
"export ERNIE_CLIENT_ID=YOUR_CLIENT_ID\n",
"export ERNIE_CLIENT_SECRET=YOUR_CLIENT_SECRET\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content='Hello, I am an artificial intelligence language model. My purpose is to help users answer questions or provide information. What can I do for you?', additional_kwargs={}, example=False)"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chat([\n",
" HumanMessage(content='hello there, who are you?')\n",
"])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,185 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "bf733a38-db84-4363-89e2-de6735c37230",
"metadata": {},
"source": [
"# 🚅 LiteLLM\n",
"\n",
"[LiteLLM](https://github.com/BerriAI/litellm) is a library that simplifies calling Anthropic, Azure, Huggingface, Replicate, etc. \n",
"\n",
"This notebook covers how to get started with using Langchain + the LiteLLM I/O library. "
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "d4a7c55d-b235-4ca4-a579-c90cc9570da9",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.chat_models import ChatLiteLLM\n",
"from langchain.prompts.chat import (\n",
" ChatPromptTemplate,\n",
" SystemMessagePromptTemplate,\n",
" AIMessagePromptTemplate,\n",
" HumanMessagePromptTemplate,\n",
")\n",
"from langchain.schema import AIMessage, HumanMessage, SystemMessage"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "70cf04e8-423a-4ff6-8b09-f11fb711c817",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"chat = ChatLiteLLM(model=\"gpt-3.5-turbo\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "8199ef8f-eb8b-4253-9ea0-6c24a013ca4c",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"messages = [\n",
" HumanMessage(\n",
" content=\"Translate this sentence from English to French. I love programming.\"\n",
" )\n",
"]\n",
"chat(messages)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "c361ab1e-8c0c-4206-9e3c-9d1424a12b9c",
"metadata": {},
"source": [
"## `ChatLiteLLM` also supports async and streaming functionality:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "93a21c5c-6ef9-4688-be60-b2e1f94842fb",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.callbacks.manager import CallbackManager\n",
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c5fac0e9-05a4-4fc1-a3b3-e5bbb24b971b",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"LLMResult(generations=[[ChatGeneration(text=\" J'aime programmer.\", generation_info=None, message=AIMessage(content=\" J'aime programmer.\", additional_kwargs={}, example=False))]], llm_output={}, run=[RunInfo(run_id=UUID('8cc8fb68-1c35-439c-96a0-695036a93652'))])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"await chat.agenerate([messages])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "025be980-e50d-4a68-93dc-c9c7b500ce34",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" J'aime la programmation."
]
},
{
"data": {
"text/plain": [
"AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chat = ChatLiteLLM(\n",
" streaming=True,\n",
" verbose=True,\n",
" callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),\n",
")\n",
"chat(messages)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c253883f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,309 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "62359e08-cf80-4210-a30c-f450000e65b9",
"metadata": {},
"source": [
"# ArcGIS\n",
"\n",
"This notebook demonstrates the use of the `langchain.document_loaders.ArcGISLoader` class.\n",
"\n",
"You will need to install the ArcGIS API for Python `arcgis` and, optionally, `bs4.BeautifulSoup`.\n",
"\n",
"You can use an `arcgis.gis.GIS` object for authenticated data loading, or leave it blank to access public data."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "b782cab5-0584-4e2a-9073-009fb8dc93a3",
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import ArcGISLoader\n",
"\n",
"\n",
"url = \"https://maps1.vcgov.org/arcgis/rest/services/Beaches/MapServer/7\"\n",
"\n",
"loader = ArcGISLoader(url)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "aa3053cf-4127-43ea-bf56-e378b348091f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 7.86 ms, sys: 0 ns, total: 7.86 ms\n",
"Wall time: 802 ms\n"
]
}
],
"source": [
"%%time\n",
"\n",
"docs = loader.load()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a2444519-9117-4feb-8bb9-8931ce286fa5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'accessed': '2023-08-15T04:30:41.689270+00:00Z',\n",
" 'name': 'Beach Ramps',\n",
" 'url': 'https://maps1.vcgov.org/arcgis/rest/services/Beaches/MapServer/7',\n",
" 'layer_description': '(Not Provided)',\n",
" 'item_description': '(Not Provided)',\n",
" 'layer_properties': {\n",
" \"currentVersion\": 10.81,\n",
" \"id\": 7,\n",
" \"name\": \"Beach Ramps\",\n",
" \"type\": \"Feature Layer\",\n",
" \"description\": \"\",\n",
" \"geometryType\": \"esriGeometryPoint\",\n",
" \"sourceSpatialReference\": {\n",
" \"wkid\": 2881,\n",
" \"latestWkid\": 2881\n",
" },\n",
" \"copyrightText\": \"\",\n",
" \"parentLayer\": null,\n",
" \"subLayers\": [],\n",
" \"minScale\": 750000,\n",
" \"maxScale\": 0,\n",
" \"drawingInfo\": {\n",
" \"renderer\": {\n",
" \"type\": \"simple\",\n",
" \"symbol\": {\n",
" \"type\": \"esriPMS\",\n",
" \"url\": \"9bb2e5ca499bb68aa3ee0d4e1ecc3849\",\n",
" \"imageData\": \"iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IB2cksfwAAAAlwSFlzAAAOxAAADsQBlSsOGwAAAJJJREFUOI3NkDEKg0AQRZ9kkSnSGBshR7DJqdJYeg7BMpcS0uQWQsqoCLExkcUJzGqT38zw2fcY1rEzbp7vjXz0EXC7gBxs1ABcG/8CYkCcDqwyLqsV+RlV0I/w7PzuJBArr1VB20H58Ls6h+xoFITkTwWpQJX7XSIBAnFwVj7MLAjJV/AC6G3QoAmK+74Lom04THTBEp/HCSc6AAAAAElFTkSuQmCC\",\n",
" \"contentType\": \"image/png\",\n",
" \"width\": 12,\n",
" \"height\": 12,\n",
" \"angle\": 0,\n",
" \"xoffset\": 0,\n",
" \"yoffset\": 0\n",
" },\n",
" \"label\": \"\",\n",
" \"description\": \"\"\n",
" },\n",
" \"transparency\": 0,\n",
" \"labelingInfo\": null\n",
" },\n",
" \"defaultVisibility\": true,\n",
" \"extent\": {\n",
" \"xmin\": -81.09480168806815,\n",
" \"ymin\": 28.858349245353473,\n",
" \"xmax\": -80.77512908572814,\n",
" \"ymax\": 29.41078388840041,\n",
" \"spatialReference\": {\n",
" \"wkid\": 4326,\n",
" \"latestWkid\": 4326\n",
" }\n",
" },\n",
" \"hasAttachments\": false,\n",
" \"htmlPopupType\": \"esriServerHTMLPopupTypeNone\",\n",
" \"displayField\": \"AccessName\",\n",
" \"typeIdField\": null,\n",
" \"subtypeFieldName\": null,\n",
" \"subtypeField\": null,\n",
" \"defaultSubtypeCode\": null,\n",
" \"fields\": [\n",
" {\n",
" \"name\": \"OBJECTID\",\n",
" \"type\": \"esriFieldTypeOID\",\n",
" \"alias\": \"OBJECTID\",\n",
" \"domain\": null\n",
" },\n",
" {\n",
" \"name\": \"Shape\",\n",
" \"type\": \"esriFieldTypeGeometry\",\n",
" \"alias\": \"Shape\",\n",
" \"domain\": null\n",
" },\n",
" {\n",
" \"name\": \"AccessName\",\n",
" \"type\": \"esriFieldTypeString\",\n",
" \"alias\": \"AccessName\",\n",
" \"length\": 40,\n",
" \"domain\": null\n",
" },\n",
" {\n",
" \"name\": \"AccessID\",\n",
" \"type\": \"esriFieldTypeString\",\n",
" \"alias\": \"AccessID\",\n",
" \"length\": 50,\n",
" \"domain\": null\n",
" },\n",
" {\n",
" \"name\": \"AccessType\",\n",
" \"type\": \"esriFieldTypeString\",\n",
" \"alias\": \"AccessType\",\n",
" \"length\": 25,\n",
" \"domain\": null\n",
" },\n",
" {\n",
" \"name\": \"GeneralLoc\",\n",
" \"type\": \"esriFieldTypeString\",\n",
" \"alias\": \"GeneralLoc\",\n",
" \"length\": 100,\n",
" \"domain\": null\n",
" },\n",
" {\n",
" \"name\": \"MilePost\",\n",
" \"type\": \"esriFieldTypeDouble\",\n",
" \"alias\": \"MilePost\",\n",
" \"domain\": null\n",
" },\n",
" {\n",
" \"name\": \"City\",\n",
" \"type\": \"esriFieldTypeString\",\n",
" \"alias\": \"City\",\n",
" \"length\": 50,\n",
" \"domain\": null\n",
" },\n",
" {\n",
" \"name\": \"AccessStatus\",\n",
" \"type\": \"esriFieldTypeString\",\n",
" \"alias\": \"AccessStatus\",\n",
" \"length\": 50,\n",
" \"domain\": null\n",
" },\n",
" {\n",
" \"name\": \"Entry_Date_Time\",\n",
" \"type\": \"esriFieldTypeDate\",\n",
" \"alias\": \"Entry_Date_Time\",\n",
" \"length\": 8,\n",
" \"domain\": null\n",
" },\n",
" {\n",
" \"name\": \"DrivingZone\",\n",
" \"type\": \"esriFieldTypeString\",\n",
" \"alias\": \"DrivingZone\",\n",
" \"length\": 50,\n",
" \"domain\": null\n",
" }\n",
" ],\n",
" \"geometryField\": {\n",
" \"name\": \"Shape\",\n",
" \"type\": \"esriFieldTypeGeometry\",\n",
" \"alias\": \"Shape\"\n",
" },\n",
" \"indexes\": null,\n",
" \"subtypes\": [],\n",
" \"relationships\": [],\n",
" \"canModifyLayer\": true,\n",
" \"canScaleSymbols\": false,\n",
" \"hasLabels\": false,\n",
" \"capabilities\": \"Map,Query,Data\",\n",
" \"maxRecordCount\": 1000,\n",
" \"supportsStatistics\": true,\n",
" \"supportsAdvancedQueries\": true,\n",
" \"supportedQueryFormats\": \"JSON, geoJSON\",\n",
" \"isDataVersioned\": false,\n",
" \"ownershipBasedAccessControlForFeatures\": {\n",
" \"allowOthersToQuery\": true\n",
" },\n",
" \"useStandardizedQueries\": true,\n",
" \"advancedQueryCapabilities\": {\n",
" \"useStandardizedQueries\": true,\n",
" \"supportsStatistics\": true,\n",
" \"supportsHavingClause\": true,\n",
" \"supportsCountDistinct\": true,\n",
" \"supportsOrderBy\": true,\n",
" \"supportsDistinct\": true,\n",
" \"supportsPagination\": true,\n",
" \"supportsTrueCurve\": true,\n",
" \"supportsReturningQueryExtent\": true,\n",
" \"supportsQueryWithDistance\": true,\n",
" \"supportsSqlExpression\": true\n",
" },\n",
" \"supportsDatumTransformation\": true,\n",
" \"dateFieldsTimeReference\": null,\n",
" \"supportsCoordinatesQuantization\": true\n",
" }}"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"docs[0].metadata"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "1d132b7d-5a13-4d66-98e8-785ffdf87af0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"OBJECTID\": 4, \"AccessName\": \"BEACHWAY AV\", \"AccessID\": \"NS-106\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1400 N ATLANTIC AV\", \"MilePost\": 1.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
"{\"OBJECTID\": 5, \"AccessName\": \"SEABREEZE BLVD\", \"AccessID\": \"DB-051\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK N ATLANTIC AV\", \"MilePost\": 14.24, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
"{\"OBJECTID\": 6, \"AccessName\": \"27TH AV\", \"AccessID\": \"NS-141\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3600 BLK S ATLANTIC AV\", \"MilePost\": 4.83, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
"{\"OBJECTID\": 11, \"AccessName\": \"INTERNATIONAL SPEEDWAY BLVD\", \"AccessID\": \"DB-059\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"300 BLK S ATLANTIC AV\", \"MilePost\": 15.27, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
"{\"OBJECTID\": 14, \"AccessName\": \"GRANADA BLVD\", \"AccessID\": \"OB-030\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"20 BLK OCEAN SHORE BLVD\", \"MilePost\": 10.02, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
"{\"OBJECTID\": 27, \"AccessName\": \"UNIVERSITY BLVD\", \"AccessID\": \"DB-048\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK N ATLANTIC AV\", \"MilePost\": 13.74, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
"{\"OBJECTID\": 38, \"AccessName\": \"BEACH ST\", \"AccessID\": \"PI-097\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"4890 BLK S ATLANTIC AV\", \"MilePost\": 25.85, \"City\": \"PONCE INLET\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
"{\"OBJECTID\": 42, \"AccessName\": \"BOTEFUHR AV\", \"AccessID\": \"DBS-067\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1900 BLK S ATLANTIC AV\", \"MilePost\": 16.68, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
"{\"OBJECTID\": 43, \"AccessName\": \"SILVER BEACH AV\", \"AccessID\": \"DB-064\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1000 BLK S ATLANTIC AV\", \"MilePost\": 15.98, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
"{\"OBJECTID\": 45, \"AccessName\": \"MILSAP RD\", \"AccessID\": \"OB-037\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"700 BLK S ATLANTIC AV\", \"MilePost\": 11.52, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
"{\"OBJECTID\": 56, \"AccessName\": \"3RD AV\", \"AccessID\": \"NS-118\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1200 BLK HILL ST\", \"MilePost\": 3.25, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
"{\"OBJECTID\": 64, \"AccessName\": \"DUNLAWTON BLVD\", \"AccessID\": \"DBS-078\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3400 BLK S ATLANTIC AV\", \"MilePost\": 20.61, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
"{\"OBJECTID\": 69, \"AccessName\": \"EMILIA AV\", \"AccessID\": \"DBS-082\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3790 BLK S ATLANTIC AV\", \"MilePost\": 21.38, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n",
"{\"OBJECTID\": 94, \"AccessName\": \"FLAGLER AV\", \"AccessID\": \"NS-110\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK FLAGLER AV\", \"MilePost\": 2.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
"{\"OBJECTID\": 96, \"AccessName\": \"CRAWFORD RD\", \"AccessID\": \"NS-108\", \"AccessType\": \"OPEN VEHICLE RAMP - PASS\", \"GeneralLoc\": \"800 BLK N ATLANTIC AV\", \"MilePost\": 2.19, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
"{\"OBJECTID\": 124, \"AccessName\": \"HARTFORD AV\", \"AccessID\": \"DB-043\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1890 BLK N ATLANTIC AV\", \"MilePost\": 12.76, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
"{\"OBJECTID\": 127, \"AccessName\": \"WILLIAMS AV\", \"AccessID\": \"DB-042\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2200 BLK N ATLANTIC AV\", \"MilePost\": 12.5, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
"{\"OBJECTID\": 136, \"AccessName\": \"CARDINAL DR\", \"AccessID\": \"OB-036\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"600 BLK S ATLANTIC AV\", \"MilePost\": 11.27, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
"{\"OBJECTID\": 229, \"AccessName\": \"EL PORTAL ST\", \"AccessID\": \"DBS-076\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3200 BLK S ATLANTIC AV\", \"MilePost\": 20.04, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
"{\"OBJECTID\": 230, \"AccessName\": \"HARVARD DR\", \"AccessID\": \"OB-038\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK S ATLANTIC AV\", \"MilePost\": 11.72, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
"{\"OBJECTID\": 232, \"AccessName\": \"VAN AV\", \"AccessID\": \"DBS-075\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3100 BLK S ATLANTIC AV\", \"MilePost\": 19.6, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
"{\"OBJECTID\": 234, \"AccessName\": \"ROCKEFELLER DR\", \"AccessID\": \"OB-034\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"400 BLK S ATLANTIC AV\", \"MilePost\": 10.9, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n",
"{\"OBJECTID\": 235, \"AccessName\": \"MINERVA RD\", \"AccessID\": \"DBS-069\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2300 BLK S ATLANTIC AV\", \"MilePost\": 17.52, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n"
]
}
],
"source": [
"for doc in docs:\n",
" print(doc.page_content)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,101 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "ad553e51",
"metadata": {},
"source": [
"# Async Chromium\n",
"\n",
"Chromium is one of the browsers supported by Playwright, a library used to control browser automation. \n",
"\n",
"By running `p.chromium.launch(headless=True)`, we are launching a headless instance of Chromium. \n",
"\n",
"Headless mode means that the browser is running without a graphical user interface.\n",
"\n",
"`AsyncChromiumLoader` load the page, and then we use `Html2TextTransformer` to trasnform to text."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1c3a4c19",
"metadata": {},
"outputs": [],
"source": [
"! pip install -q playwright beautifulsoup4\n",
"! playwright install"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "dd2cdea7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'<!DOCTYPE html><html lang=\"en\"><head><script src=\"https://s0.2mdn.net/instream/video/client.js\" asyn'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain.document_loaders import AsyncChromiumLoader\n",
"urls = [\"https://www.wsj.com\"]\n",
"loader = AsyncChromiumLoader(urls)\n",
"docs = loader.load()\n",
"docs[0].page_content[0:100]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "013caa7e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"Skip to Main ContentSkip to SearchSkip to... Select * Top News * What's News *\\nFeatured Stories * Retirement * Life & Arts * Hip-Hop * Sports * Video *\\nEconomy * Real Estate * Sports * CMO * CIO * CFO * Risk & Compliance *\\nLogistics Report * Sustainable Business * Heard on the Street * Barrons *\\nMarketWatch * Mansion Global * Penta * Opinion * Journal Reports * Sponsored\\nOffers Explore Our Brands * WSJ * * * * * Barron's * * * * * MarketWatch * * *\\n* * IBD # The Wall Street Journal SubscribeSig\""
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain.document_transformers import Html2TextTransformer\n",
"html2text = Html2TextTransformer()\n",
"docs_transformed = html2text.transform_documents(docs)\n",
"docs_transformed[0].page_content[0:500]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -9,66 +9,16 @@
"\n",
"GROBID is a machine learning library for extracting, parsing, and re-structuring raw documents.\n",
"\n",
"It is particularly good for sturctured PDFs, like academic papers.\n",
"It is designed and expected to be used to parse academic papers, where it works particularly well. Note: if the articles supplied to Grobid are large documents (e.g. dissertations) exceeding a certain number of elements, they might not be processed. \n",
"\n",
"This loader uses GROBIB to parse PDFs into `Documents` that retain metadata associated with the section of text.\n",
"This loader uses Grobid to parse PDFs into `Documents` that retain metadata associated with the section of text.\n",
"\n",
"---\n",
"The best approach is to install Grobid via docker, see https://grobid.readthedocs.io/en/latest/Grobid-docker/. \n",
"\n",
"For users on `Mac` - \n",
"(Note: additional instructions can be found [here](https://python.langchain.com/docs/extras/integrations/providers/grobid.mdx).)\n",
"\n",
"(Note: additional instructions can be found [here](https://python.langchain.com/docs/ecosystem/integrations/grobid.mdx).)\n",
"\n",
"Install Java (Apple Silicon):\n",
"```\n",
"$ arch -arm64 brew install openjdk@11\n",
"$ brew --prefix openjdk@11\n",
"/opt/homebrew/opt/openjdk@ 11\n",
"```\n",
"\n",
"In `~/.zshrc`:\n",
"```\n",
"export JAVA_HOME=/opt/homebrew/opt/openjdk@11\n",
"export PATH=$JAVA_HOME/bin:$PATH\n",
"```\n",
"\n",
"Then, in Terminal:\n",
"```\n",
"$ source ~/.zshrc\n",
"```\n",
"\n",
"Confirm install:\n",
"```\n",
"$ which java\n",
"/opt/homebrew/opt/openjdk@11/bin/java\n",
"$ java -version \n",
"openjdk version \"11.0.19\" 2023-04-18\n",
"OpenJDK Runtime Environment Homebrew (build 11.0.19+0)\n",
"OpenJDK 64-Bit Server VM Homebrew (build 11.0.19+0, mixed mode)\n",
"```\n",
"\n",
"Then, get [Grobid](https://grobid.readthedocs.io/en/latest/Install-Grobid/#getting-grobid):\n",
"```\n",
"$ curl -LO https://github.com/kermitt2/grobid/archive/0.7.3.zip\n",
"$ unzip 0.7.3.zip\n",
"```\n",
" \n",
"Build\n",
"```\n",
"$ ./gradlew clean install\n",
"```\n",
"\n",
"Then, run the server:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "2d8992fc",
"metadata": {},
"outputs": [],
"source": [
"! get_ipython().system_raw('nohup ./gradlew run > grobid.log 2>&1 &')"
"Once grobid is up-and-running you can interact as described below. \n"
]
},
{

View File

@ -0,0 +1,95 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "2ed9a4c2",
"metadata": {},
"source": [
"# Beautiful Soup\n",
"\n",
"Beautiful Soup offers fine-grained control over HTML content, enabling specific tag extraction, removal, and content cleaning. \n",
"\n",
"It's suited for cases where you want to extract specific information and clean up the HTML content according to your needs.\n",
"\n",
"For example, we can scrape text content within `<p>, <li>, <div>, and <a>` tags from the HTML content:\n",
"\n",
"* `<p>`: The paragraph tag. It defines a paragraph in HTML and is used to group together related sentences and/or phrases.\n",
" \n",
"* `<li>`: The list item tag. It is used within ordered (`<ol>`) and unordered (`<ul>`) lists to define individual items within the list.\n",
" \n",
"* `<div>`: The division tag. It is a block-level element used to group other inline or block-level elements.\n",
" \n",
"* `<a>`: The anchor tag. It is used to define hyperlinks."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "dd710e5b",
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import AsyncChromiumLoader\n",
"from langchain.document_transformers import BeautifulSoupTransformer\n",
"\n",
"# Load HTML\n",
"loader = AsyncChromiumLoader([\"https://www.wsj.com\"])\n",
"html = loader.load()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "052b64dd",
"metadata": {},
"outputs": [],
"source": [
"# Transform\n",
"bs_transformer = BeautifulSoupTransformer()\n",
"docs_transformed = bs_transformer.transform_documents(html,tags_to_extract=[\"p\", \"li\", \"div\", \"a\"])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "b53a5307",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Conservative legal activists are challenging Amazon, Comcast and others using many of the same tools that helped kill affirmative-action programs in colleges.1,2099 min read U.S. stock indexes fell and government-bond prices climbed, after Moodys lowered credit ratings for 10 smaller U.S. banks and said it was reviewing ratings for six larger ones. The Dow industrials dropped more than 150 points.3 min read Penn Entertainments Barstool Sportsbook app will be rebranded as ESPN Bet this fall as '"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"docs_transformed[0].page_content[0:500]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -14,7 +14,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 1,
"id": "60b6dbb2",
"metadata": {},
"outputs": [],
@ -42,25 +42,14 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 2,
"id": "9ca87a2e",
"metadata": {},
"outputs": [],
"source": [
"# Initialize a Fireworks LLM\n",
"os.environ['FIREWORKS_API_KEY'] = \"\" #change this to your own API KEY\n",
"llm = Fireworks(model_id=\"accounts/fireworks/models/fireworks-llama-v2-13b-chat\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "43a11ba8",
"metadata": {},
"outputs": [],
"source": [
"# Create LLM chain\n",
"llm_chain = LLMChain(prompt=prompt, llm=llm)"
"os.environ['FIREWORKS_API_KEY'] = \"<YOUR_API_KEY>\" # Change this to your own API key\n",
"llm = Fireworks(model_id=\"accounts/fireworks/models/llama-v2-13b-chat\")"
]
},
{
@ -72,16 +61,33 @@
"\n",
"You can use the LLMs to call the model for specified prompt(s). \n",
"\n",
"Current Specified Models: \n",
"* accounts/fireworks/models/fireworks-falcon-7b, accounts/fireworks/models/fireworks-falcon-40b-w8a16\n",
"* accounts/fireworks/models/fireworks-starcoder-1b-w8a16-1gpu, accounts/fireworks/models/fireworks-starcoder-3b-w8a16-1gpu, accounts/fireworks/models/fireworks-starcoder-7b-w8a16-1gpu, accounts/fireworks/models/fireworks-starcoder-16b-w8a16 \n",
"* accounts/fireworks/models/fireworks-llama-v2-13b, accounts/fireworks/models/fireworks-llama-v2-13b-chat, accounts/fireworks/models/fireworks-llama-v2-13b-w8a16, accounts/fireworks/models/fireworks-llama-v2-13b-chat-w8a16\n",
"* accounts/fireworks/models/fireworks-llama-v2-7b, accounts/fireworks/models/fireworks-llama-v2-7b-chat, accounts/fireworks/models/fireworks-llama-v2-7b-w8a16, accounts/fireworks/models/fireworks-llama-v2-7b-chat-w8a16, accounts/fireworks/models/fireworks-llama-v2-70b-chat-4gpu"
"Currently supported models: \n",
"\n",
"* Falcon\n",
" * `accounts/fireworks/models/falcon-7b`\n",
" * `accounts/fireworks/models/falcon-40b-w8a16`\n",
"* Llama 2\n",
" * `accounts/fireworks/models/llama-v2-7b`\n",
" * `accounts/fireworks/models/llama-v2-7b-w8a16`\n",
" * `accounts/fireworks/models/llama-v2-7b-chat`\n",
" * `accounts/fireworks/models/llama-v2-7b-chat-w8a16`\n",
" * `accounts/fireworks/models/llama-v2-13b`\n",
" * `accounts/fireworks/models/llama-v2-13b-w8a16`\n",
" * `accounts/fireworks/models/llama-v2-13b-chat`\n",
" * `accounts/fireworks/models/llama-v2-13b-chat-w8a16`\n",
" * `accounts/fireworks/models/llama-v2-70b-chat-4gpu`\n",
"* StarCoder\n",
" * `accounts/fireworks/models/starcoder-1b-w8a16-1gpu`\n",
" * `accounts/fireworks/models/starcoder-3b-w8a16-1gpu`\n",
" * `accounts/fireworks/models/starcoder-7b-w8a16-1gpu`\n",
" * `accounts/fireworks/models/starcoder-16b-w8a16`\n",
"\n",
"See the full, most up-to-date list on [app.fireworks.ai](https://app.fireworks.ai)."
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 3,
"id": "bf0a425c",
"metadata": {},
"outputs": [
@ -89,29 +95,41 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Is it Tom Brady, Aaron Rodgers, or someone else? It's a tough question to answer, and there are strong arguments for each of these quarterbacks. Here are some of the reasons why each of these quarterbacks could be considered the best:\n",
"\n",
"Tom Brady:\n",
"\n",
"It's a question that has been debated for years, with different analysts and fans making their cases for various signal-callers. Here are some of the top contenders for the title of best quarterback in the NFL:\n",
"* He has the most Super Bowl wins (6) of any quarterback in NFL history.\n",
"* He has been named Super Bowl MVP four times, more than any other player.\n",
"* He has led the New England Patriots to 18 playoff victories, the most in NFL history.\n",
"* He has thrown for over 70,000 yards in his career, the most of any quarterback in NFL history.\n",
"* He has thrown for 50 or more touchdowns in a season four times, the most of any quarterback in NFL history.\n",
"\n",
"1. Tom Brady: The New England Patriots legend has won six Super Bowls and has been named Super Bowl MVP four times. He's known for his precision passing, pocket presence, and ability to read defenses.\n",
"2. Aaron Rodgers: The Green Bay Packers quarterback has won two Super Bowls and has been named NFL MVP twice. He's known for his quick release, accuracy, and ability to extend plays with his feet.\n",
"3. Drew Brees: The New Orleans Saints quarterback has won a Super Bowl and has been named NFL MVP once. He's known for his accuracy, pocket presence, and ability to read defenses.\n",
"4. Patrick Mahomes: The Kansas City Chiefs quarterback has won a Super Bowl and has been named NFL MVP twice. He's known for his arm strength, athleticism, and ability to make plays outside of the pocket.\n",
"5. Russell Wilson: The Seattle Seahawks quarterback has won a Super Bowl and has been named NFL MVP once. He's known for his mobility, accuracy, and ability to extend plays with his feet.\n",
"Aaron Rodgers:\n",
"\n",
"Of course, there are other talented quarterbacks in the league, such as Lamar Jackson, Deshaun Watson, and Carson Wentz, who could also be considered among the best. Ultimately, the answer to the question of who's the best quarterback in the NFL is subjective and can vary depending on individual perspectives and criteria.\n"
"* He has led the Green Bay Packers to a Super Bowl victory in 2010.\n",
"* He has been named Super Bowl MVP once.\n",
"* He has thrown for over 40,000 yards in his career, the most of any quarterback in NFL history.\n",
"* He has thrown for 40 or more touchdowns in a season three times, the most of any quarterback in NFL history.\n",
"* He has a career passer rating of 103.1, the highest of any quarterback in NFL history.\n",
"\n",
"So, who's the best quarterback in the NFL? It's a tough call, but here's my opinion:\n",
"\n",
"I think Aaron Rodgers is the best quarterback in the NFL right now. He has led the Packers to a Super Bowl victory and has had some incredible seasons, including the 2011 season when he threw for 45 touchdowns and just 6 interceptions. He has a strong arm, great accuracy, and is incredibly mobile for a quarterback of his size. He also has a great sense of timing and knows when to take risks and when to play it safe.\n",
"\n",
"Tom Brady is a close second, though. He has an incredible track record of success, including six Super Bowl victories, and has been one of the most consistent quarterbacks in the league for the past two decades. He has a strong arm and is incredibly accurate\n"
]
}
],
"source": [
"#single prompt\n",
"# Single prompt\n",
"output = llm(\"Who's the best quarterback in the NFL?\")\n",
"print(output)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 4,
"id": "afc7de6f",
"metadata": {},
"outputs": [
@ -119,19 +137,22 @@
"name": "stdout",
"output_type": "stream",
"text": [
"generations=[[Generation(text=\"\\nWho is the best cricket player in the world in 2016?\\nThe best cricket player in the world in 2016 is Virat Kohli. The Indian captain has had a fabulous year, scoring heavily in all formats of the game, leading India to several victories, and breaking several records. In Test cricket, Kohli has scored 1215 runs at an average of 75.33 with 6 centuries and 4 fifties, which is the highest number of runs scored by any player in a calendar year. In ODI cricket, he has scored 1143 runs at an average of 83.42 with 7 centuries and 6 fifties, which is also the highest number of runs scored by any player in a calendar year. Additionally, Kohli has led India to the number one ranking in Test cricket, and has been named the ICC Test Player of the Year and the ICC ODI Player of the Year.\\nVirat Kohli has been in incredible form in 2016, and his performances have made him the standout player of the year. Other players who have had a great year include Steve Smith, Joe Root, and Kane Williamson, but Kohli's consistency and dominance in all formats of the game make him the best cricket player in the world in 2016.\", generation_info=None)], [Generation(text=\"\\n\\nA: LeBron James.\\n\\nB: Kevin Durant.\\n\\nC: Steph Curry.\\n\\nD: James Harden.\\n\\nE: Other (please specify).\\n\\nWhat's your answer?\", generation_info=None)]] llm_output={'token_usage': {}, 'model_id': 'fireworks-llama-v2-13b-chat'} run=[RunInfo(run_id=UUID('d14b6bee-7692-46ad-8798-acb6f72fc7fb')), RunInfo(run_id=UUID('b9f5b3b5-9e62-4eaf-b269-ecf0cbbcfb82'))]\n"
"[[Generation(text='\\nThe best cricket player in 2016 is a matter of opinion, but some of the top contenders for the title include:\\n\\n1. Virat Kohli (India): Kohli had a phenomenal year in 2016, scoring over 1,000 runs in Test cricket, including four centuries, and averaging over 70. He also scored heavily in ODI cricket, with an average of over 80.\\n2. Steve Smith (Australia): Smith had a remarkable year in 2016, leading Australia to a Test series victory in India and scoring over 1,000 runs in the format, including five centuries. He also averaged over 60 in ODI cricket.\\n3. KL Rahul (India): Rahul had a breakout year in 2016, scoring over 1,000 runs in Test cricket, including four centuries, and averaging over 60. He also scored heavily in ODI cricket, with an average of over 70.\\n4. Joe Root (England): Root had a solid year in 2016, scoring over 1,000 runs in Test cricket, including four centuries, and averaging over 50. He also scored heavily in ODI cricket, with an average of over 80.\\n5. Quinton de Kock (South Africa): De Kock had a remarkable year in 2016, scoring over 1,000 runs in ODI cricket, including six centuries, and averaging over 80. He also scored heavily in Test cricket, with an average of over 50.\\n\\nThese are just a few of the top contenders for the title of best cricket player in 2016, but there were many other talented players who also had impressive years. Ultimately, the answer to this question is subjective and depends on individual opinions and criteria for evaluation.', generation_info=None)], [Generation(text=\"\\nThis is a tough one, as there are so many great players in the league right now. But if I had to choose one, I'd say LeBron James is the best basketball player in the league. He's a once-in-a-generation talent who can dominate the game in so many ways. He's got incredible speed, strength, and court vision, and he's always finding new ways to improve his game. Plus, he's been doing it at an elite level for over a decade now, which is just amazing.\\n\\nBut don't just take my word for it - there are plenty of other great players in the league who could make a strong case for being the best. Guys like Kevin Durant, Steph Curry, James Harden, and Giannis Antetokounmpo are all having incredible seasons, and they've all got their own unique skills and strengths that make them special. So ultimately, it's up to you to decide who you think is the best basketball player in the league.\", generation_info=None)]]\n"
]
}
],
"source": [
"#calling multiple prompts\n",
"output = llm.generate([\"Who's the best cricket player in 2016?\", \"Who's the best basketball player in the league?\"])\n",
"print(output)"
"# Calling multiple prompts\n",
"output = llm.generate([\n",
" \"Who's the best cricket player in 2016?\",\n",
" \"Who's the best basketball player in the league?\",\n",
"])\n",
"print(output.generations)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 5,
"id": "b801c20d",
"metadata": {},
"outputs": [
@ -140,13 +161,13 @@
"output_type": "stream",
"text": [
"\n",
"Kansas City in December can be quite chilly, with average high\n"
"Kansas City in December is quite cold, with temperatures typically r\n"
]
}
],
"source": [
"#setting parameters: model_id, temperature, max_tokens, top_p\n",
"llm = Fireworks(model_id=\"accounts/fireworks/models/fireworks-llama-v2-13b-chat\", temperature=0.7, max_tokens=15, top_p=1.0)\n",
"# Setting additional parameters: temperature, max_tokens, top_p\n",
"llm = Fireworks(model_id=\"accounts/fireworks/models/llama-v2-13b-chat\", temperature=0.7, max_tokens=15, top_p=1.0)\n",
"print(llm(\"What's the weather like in Kansas City in December?\"))"
]
},
@ -162,7 +183,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 6,
"id": "fd2c6bc1",
"metadata": {},
"outputs": [
@ -171,34 +192,25 @@
"output_type": "stream",
"text": [
"\n",
"(Note: I'm just an AI and not a branding expert, so take this as a starting point for your own research and brainstorming.)\n",
"A good name for a company that makes football helmets could be:\n",
"Naming a company can be a fun and creative process! Here are a few name ideas for a company that makes football helmets:\n",
"\n",
"1. Helix Pro: This name plays off the idea of a helix, or spiral, shape that is commonly associated with football helmets. \"Pro\" implies a professional-level product.\n",
"2. Gridiron Gear: \"Gridiron\" is a term used to describe a football field, and \"gear\" highlights the company's focus on producing high-quality football helmets.\n",
"3. Linebacker Lab: \"Linebacker\" is a position on the football field, and \"Lab\" suggests a focus on research and development.\n",
"4. Helmet Hut: This name is simple and easy to remember, and it immediately conveys the company's focus on football helmets.\n",
"5. Tackle Tech: \"Tackle\" is a term used in football to describe a hit or collision, and \"Tech\" implies a focus on advanced technology and innovation.\n",
"6. Victory Vest: \"Victory\" implies a focus on winning and success, and \"Vest\" could suggest a protective or armored design.\n",
"7. Pigskin Pro: \"Pigskin\" is a term used to describe a football, and \"Pro\" implies a professional-level product.\n",
"8. Football Fusion: This name could suggest a combination of different materials or technologies to create a high-quality football helmet.\n",
"9. Endzone Edge: \"Endzone\" is the area of the football field where a team scores a touchdown, and \"Edge\" implies a competitive advantage.\n",
"10. MVP Masks: \"MVP\" stands for \"Most Valuable Player,\" and \"Masks\" highlights the protective nature of the company's football helmets.\n",
"1. Helix Headgear: This name plays off the idea of the helix shape of a football helmet and could be a memorable and catchy name for a company.\n",
"2. Gridiron Gear: \"Gridiron\" is a term used to describe a football field, and \"gear\" refers to the products the company sells. This name is straightforward and easy to understand.\n",
"3. Cushion Crusaders: This name emphasizes the protective qualities of football helmets and could appeal to customers looking for safety-conscious products.\n",
"4. Helmet Heroes: This name has a fun, heroic tone and could appeal to customers looking for high-quality products.\n",
"5. Tackle Tech: \"Tackle\" is a term used in football to describe a player's attempt to stop an opponent, and \"tech\" refers to the technology used in the helmets. This name could appeal to customers interested in innovative products.\n",
"6. Padded Protection: This name emphasizes the protective qualities of football helmets and could appeal to customers looking for products that prioritize safety.\n",
"7. Gridiron Gear Co.: This name is simple and straightforward, and it clearly conveys the company's focus on football-related products.\n",
"8. Helmet Haven: This name has a soothing, protective tone and could appeal to customers looking for a reliable brand.\n",
"\n",
"Remember, the name you choose for your company should be memorable, easy to pronounce and spell, and convey a sense of quality and professionalism. It's also important to check that the name isn't already in use by another company, and to consider any potential trademark issues.\n"
"Remember to choose a name that reflects your company's values and mission, and that resonates with your target market. Good luck with your company!\n"
]
}
],
"source": [
"human_message_prompt = HumanMessagePromptTemplate(\n",
" prompt=PromptTemplate(\n",
" template=\"What is a good name for a company that makes {product}?\",\n",
" input_variables=[\"product\"],\n",
" )\n",
")\n",
"\n",
"human_message_prompt = HumanMessagePromptTemplate.from_template(\"What is a good name for a company that makes {product}?\")\n",
"chat_prompt_template = ChatPromptTemplate.from_messages([human_message_prompt])\n",
"chat = Fireworks()\n",
"chat = FireworksChat()\n",
"chain = LLMChain(llm=chat, prompt=chat_prompt_template)\n",
"output = chain.run(\"football helmets\")\n",
"\n",
@ -222,7 +234,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.11.4"
}
},
"nbformat": 4,

View File

@ -31,12 +31,11 @@
},
"outputs": [],
"source": [
"from langchain.llms.bedrock import Bedrock\n",
"from langchain.llms import Bedrock\n",
"\n",
"llm = Bedrock(\n",
" credentials_profile_name=\"bedrock-admin\",\n",
" model_id=\"amazon.titan-tg1-large\",\n",
" endpoint_url=\"custom_endpoint_url\",\n",
" model_id=\"amazon.titan-tg1-large\"\n",
")"
]
},

View File

@ -0,0 +1,167 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# NIBittensorLLM\n",
"\n",
"NIBittensorLLM is developed by [Neural Internet](https://neuralinternet.ai/), powered by [Bittensor](https://bittensor.com/).\n",
"\n",
"This LLM showcases true potential of decentralized AI by giving you the best response(s) from the Bittensor protocol, which consist of various AI models such as OpenAI, LLaMA2 etc.\n",
"\n",
"Users can view their logs, requests, and API keys on the [Validator Endpoint Frontend](https://api.neuralinterent.ai/). However, changes to the configuration are currently prohibited; otherwise, the user's queries will be blocked.\n",
"\n",
"If you encounter any difficulties or have any questions, please feel free to reach out to our developer on [GitHub](https://github.com/Kunj-2206), [Discord](https://discordapp.com/users/683542109248159777) or join our discord server for latest update and queries [Neural Internet](https://discord.gg/neuralinternet).\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Different Parameter and response handling for NIBittensorLLM "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import langchain\n",
"from langchain.llms import NIBittensorLLM\n",
"import json\n",
"from pprint import pprint\n",
"\n",
"langchain.debug = True\n",
"\n",
"# System parameter in NIBittensorLLM is optional but you can set whatever you want to perform with model\n",
"llm_sys = NIBittensorLLM(\n",
" system_prompt=\"Your task is to determine response based on user prompt.Explain me like I am technical lead of a project\"\n",
")\n",
"sys_resp = llm_sys(\n",
" \"What is bittensor and What are the potential benifits of decentralized AI?\"\n",
")\n",
"print(f\"Response provided by LLM with system prompt set is : {sys_resp}\")\n",
"\n",
"# The top_responses parameter can give multiple responses based on its parameter value\n",
"# This below code retrive top 10 miner's response all the response are in format of json\n",
"\n",
"# Json response structure is\n",
"\"\"\" {\n",
" \"choices\": [\n",
" {\"index\": Bittensor's Metagraph index number,\n",
" \"uid\": Unique Identifier of a miner,\n",
" \"responder_hotkey\": Hotkey of a miner,\n",
" \"message\":{\"role\":\"assistant\",\"content\": Contains actual response},\n",
" \"response_ms\": Time in millisecond required to fetch response from a miner} \n",
" ]\n",
" } \"\"\"\n",
"\n",
"multi_response_llm = NIBittensorLLM(top_responses=10)\n",
"multi_resp = multi_response_llm(\"What is Neural Network Feeding Mechanism?\")\n",
"json_multi_resp = json.loads(multi_resp)\n",
"pprint(json_multi_resp)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using NIBittensorLLM with LLMChain and PromptTemplate"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import langchain\n",
"from langchain import PromptTemplate, LLMChain\n",
"from langchain.llms import NIBittensorLLM\n",
"\n",
"langchain.debug = True\n",
"\n",
"template = \"\"\"Question: {question}\n",
"\n",
"Answer: Let's think step by step.\"\"\"\n",
"\n",
"\n",
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])\n",
"\n",
"# System parameter in NIBittensorLLM is optional but you can set whatever you want to perform with model\n",
"llm = NIBittensorLLM(system_prompt=\"Your task is to determine response based on user prompt.\")\n",
"\n",
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
"question = \"What is bittensor?\"\n",
"\n",
"llm_chain.run(question)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Using NIBittensorLLM with Conversational Agent and Google Search Tool"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import (\n",
" AgentType,\n",
" initialize_agent,\n",
" load_tools,\n",
" ZeroShotAgent,\n",
" Tool,\n",
" AgentExecutor,\n",
")\n",
"from langchain.memory import ConversationBufferMemory\n",
"from langchain import LLMChain, PromptTemplate\n",
"from langchain.utilities import GoogleSearchAPIWrapper, SerpAPIWrapper\n",
"from langchain.llms import NIBittensorLLM\n",
"\n",
"memory = ConversationBufferMemory(memory_key=\"chat_history\")\n",
"\n",
"\n",
"prefix = \"\"\"Answer prompt based on LLM if there is need to search something then use internet and observe internet result and give accurate reply of user questions also try to use authenticated sources\"\"\"\n",
"suffix = \"\"\"Begin!\n",
" {chat_history}\n",
" Question: {input}\n",
" {agent_scratchpad}\"\"\"\n",
"\n",
"prompt = ZeroShotAgent.create_prompt(\n",
" tools,\n",
" prefix=prefix,\n",
" suffix=suffix,\n",
" input_variables=[\"input\", \"chat_history\", \"agent_scratchpad\"],\n",
")\n",
"\n",
"llm = NIBittensorLLM(system_prompt=\"Your task is to determine response based on user prompt\")\n",
"\n",
"llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
"\n",
"memory = ConversationBufferMemory(memory_key=\"chat_history\")\n",
"\n",
"agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)\n",
"agent_chain = AgentExecutor.from_agent_and_tools(\n",
" agent=agent, tools=tools, verbose=True, memory=memory\n",
")\n",
"\n",
"response = agent_chain.run(input=prompt)"
]
}
],
"metadata": {
"language_info": {
"name": "python"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,78 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "15d7ce70-8879-42a0-86d9-a3d604a3ec83",
"metadata": {},
"source": [
"# DeepSparse\n",
"\n",
"This page covers how to use the [DeepSparse](https://github.com/neuralmagic/deepsparse) inference runtime within LangChain.\n",
"It is broken into two parts: installation and setup, and then examples of DeepSparse usage.\n",
"\n",
"## Installation and Setup\n",
"\n",
"- Install the Python package with `pip install deepsparse`\n",
"- Choose a [SparseZoo model](https://sparsezoo.neuralmagic.com/?useCase=text_generation) or export a support model to ONNX [using Optimum](https://github.com/neuralmagic/notebooks/blob/main/notebooks/opt-text-generation-deepsparse-quickstart/OPT_Text_Generation_DeepSparse_Quickstart.ipynb)\n",
"\n",
"\n",
"There exists a DeepSparse LLM wrapper, that provides a unified interface for all models:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "79d24d37-737a-428c-b6c5-84c1633070d7",
"metadata": {},
"outputs": [],
"source": [
"from langchain.llms import DeepSparse\n",
"\n",
"llm = DeepSparse(model='zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none')\n",
"\n",
"print(llm('def fib():'))"
]
},
{
"cell_type": "markdown",
"id": "ea7ea674-d6b0-49d9-9c2b-014032973be6",
"metadata": {},
"source": [
"Additional parameters can be passed using the `config` parameter:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ff61b845-41e6-4457-8625-6e21a11bfe7c",
"metadata": {},
"outputs": [],
"source": [
"config = {'max_generated_tokens': 256}\n",
"\n",
"llm = DeepSparse(model='zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none', config=config)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -74,7 +74,7 @@
" typical_p=0.95,\n",
" temperature=0.01,\n",
" repetition_penalty=1.03,\n",
" stream=True\n",
" streaming=True\n",
")\n",
"llm(\"What did foo say about bar?\", callbacks=[StreamingStdOutCallbackHandler()])"
]

View File

@ -241,7 +241,9 @@
"# Make sure the model path is correct for your system!\n",
"llm = LlamaCpp(\n",
" model_path=\"/Users/rlm/Desktop/Code/llama/llama-2-7b-ggml/llama-2-7b-chat.ggmlv3.q4_0.bin\",\n",
" input={\"temperature\": 0.75, \"max_length\": 2000, \"top_p\": 1},\n",
" temperature=0.75,\n",
" max_tokens=2000,\n",
" top_p=1,\n",
" callback_manager=callback_manager,\n",
" verbose=True,\n",
")"

View File

@ -19,8 +19,29 @@
"First, follow [these instructions](https://github.com/jmorganca/ollama) to set up and run a local Ollama instance:\n",
"\n",
"* [Download](https://ollama.ai/download)\n",
"* Fetch a model, e.g., `Llama-7b`: `ollama pull llama2`\n",
"* Run `ollama run llama2`\n",
"* Fetch a model via `ollama pull <model family>`\n",
"* e.g., for `Llama-7b`: `ollama pull llama2` (see full list [here](https://github.com/jmorganca/ollama))\n",
"* This will download the most basic version of the model typically (e.g., smallest # parameters and `q4_0`)\n",
"* On Mac, it will download to \n",
"\n",
"`~/.ollama/models/manifests/registry.ollama.ai/library/<model family>/latest`\n",
"\n",
"* And we specify a particular version, e.g., for `ollama pull vicuna:13b-v1.5-16k-q4_0`\n",
"* The file is here with the model version in place of `latest`\n",
"\n",
"`~/.ollama/models/manifests/registry.ollama.ai/library/vicuna/13b-v1.5-16k-q4_0`\n",
"\n",
"You can easily access models in a few ways:\n",
"\n",
"1/ if the app is running:\n",
"* All of your local models are automatically served on `localhost:11434`\n",
"* Select your model when setting `llm = Ollama(..., model=\"<model family>:<version>\")`\n",
"* If you set `llm = Ollama(..., model=\"<model family\")` withoout a version it will simply look for `latest`\n",
"\n",
"2/ if building from source or just running the binary: \n",
"* Then you must run `ollama serve`\n",
"* All of your local models are automatically served on `localhost:11434`\n",
"* Then, select as shown above\n",
"\n",
"\n",
"## Usage\n",

View File

@ -1,80 +1,83 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "9597802c",
"metadata": {},
"source": [
"# Nebula\n",
"# Nebula (Symbl.ai)\n",
"[Nebula](https://symbl.ai/nebula/) is a large language model (LLM) built by [Symbl.ai](https://symbl.ai). It is trained to perform generative tasks on human conversations. Nebula excels at modeling the nuanced details of a conversation and performing tasks on the conversation.\n",
"\n",
"[Nebula](https://symbl.ai/nebula/) is a fully-managed Conversation platform, on which you can build, deploy, and manage scalable AI applications.\n",
"Nebula documentation: https://docs.symbl.ai/docs/nebula-llm\n",
"\n",
"This example goes over how to use LangChain to interact with the [Nebula platform](https://docs.symbl.ai/docs/nebula-llm-overview). \n",
"\n",
"It will send the requests to Nebula Service endpoint, which concatenates `SYMBLAI_NEBULA_SERVICE_URL` and `SYMBLAI_NEBULA_SERVICE_PATH`, with a token defined in `SYMBLAI_NEBULA_SERVICE_TOKEN`"
]
"This example goes over how to use LangChain to interact with the [Nebula platform](https://docs.symbl.ai/docs/nebula-llm)."
],
"metadata": {
"collapsed": false
},
"id": "bb8cd830db4a004e"
},
{
"cell_type": "markdown",
"id": "f15ebe0d",
"metadata": {},
"source": [
"### Integrate with a LLMChain"
]
"Make sure you have API Key with you. If you don't have one please [request one](https://info.symbl.ai/Nebula_Private_Beta.html)."
],
"metadata": {
"collapsed": false
},
"id": "519570b6539aa18c"
},
{
"cell_type": "code",
"execution_count": null,
"id": "5472a7cd-af26-48ca-ae9b-5f6ae73c74d2",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import os\n",
"from langchain.llms.symblai_nebula import Nebula\n",
"\n",
"os.environ[\"NEBULA_SERVICE_URL\"] = NEBULA_SERVICE_URL\n",
"os.environ[\"NEBULA_SERVICE_PATH\"] = NEBULA_SERVICE_PATH\n",
"os.environ[\"NEBULA_SERVICE_API_KEY\"] = NEBULA_SERVICE_API_KEY"
]
"llm = Nebula(nebula_api_key='<your_api_key>')"
],
"metadata": {
"collapsed": false
},
"id": "9f47bef45880aece"
},
{
"cell_type": "code",
"execution_count": null,
"id": "6fb585dd",
"metadata": {
"tags": []
},
"outputs": [],
"cell_type": "markdown",
"source": [
"from langchain.llms import OpenLLM\n",
"\n",
"llm = OpenLLM(\n",
" conversation=\"<Drop your text conversation that you want to ask Nebula to analyze here>\",\n",
")"
]
"Use a conversation transcript and instruction to construct a prompt."
],
"metadata": {
"collapsed": false
},
"id": "88c6a516ef51c74b"
},
{
"cell_type": "code",
"execution_count": null,
"id": "035dea0f",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain import PromptTemplate, LLMChain\n",
"\n",
"template = \"Identify the {count} main objectives or goals mentioned in this context concisely in less points. Emphasize on key intents.\"\n",
"conversation = \"\"\"Sam: Good morning, team! Let's keep this standup concise. We'll go in the usual order: what you did yesterday, what you plan to do today, and any blockers. Alex, kick us off.\n",
"Alex: Morning! Yesterday, I wrapped up the UI for the user dashboard. The new charts and widgets are now responsive. I also had a sync with the design team to ensure the final touchups are in line with the brand guidelines. Today, I'll start integrating the frontend with the new API endpoints Rhea was working on. The only blocker is waiting for some final API documentation, but I guess Rhea can update on that.\n",
"Rhea: Hey, all! Yep, about the API documentation - I completed the majority of the backend work for user data retrieval yesterday. The endpoints are mostly set up, but I need to do a bit more testing today. I'll finalize the API documentation by noon, so that should unblock Alex. After that, Ill be working on optimizing the database queries for faster data fetching. No other blockers on my end.\n",
"Sam: Great, thanks Rhea. Do reach out if you need any testing assistance or if there are any hitches with the database. Now, my update: Yesterday, I coordinated with the client to get clarity on some feature requirements. Today, I'll be updating our project roadmap and timelines based on their feedback. Additionally, I'll be sitting with the QA team in the afternoon for preliminary testing. Blocker: I might need both of you to be available for a quick call in case the client wants to discuss the changes live.\n",
"Alex: Sounds good, Sam. Just let us know a little in advance for the call.\n",
"Rhea: Agreed. We can make time for that.\n",
"Sam: Perfect! Let's keep the momentum going. Reach out if there are any sudden issues or support needed. Have a productive day!\n",
"Alex: You too.\n",
"Rhea: Thanks, bye!\"\"\"\n",
"\n",
"prompt = PromptTemplate(template=template, input_variables=[\"count\"])\n",
"instruction = \"Identify the main objectives mentioned in this conversation.\"\n",
"\n",
"prompt = PromptTemplate.from_template(\"{instruction}\\n{conversation}\")\n",
"\n",
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
"\n",
"generated = llm_chain.run(count=\"five\")\n",
"print(generated)"
]
"llm_chain.run(instruction=instruction, conversation=conversation)"
],
"metadata": {
"collapsed": false
},
"id": "5977ccc2d4432624"
}
],
"metadata": {

View File

@ -0,0 +1,169 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Titan Takeoff\n",
"\n",
"TitanML helps businesses build and deploy better, smaller, cheaper, and faster NLP models through our training, compression, and inference optimization platform. \n",
"\n",
"Our inference server, [Titan Takeoff](https://docs.titanml.co/docs/titan-takeoff/getting-started) enables deployment of LLMs locally on your hardware in a single command. Most generative model architectures are supported, such as Falcon, Llama 2, GPT2, T5 and many more."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Installation\n",
"\n",
"To get started with Iris Takeoff, all you need is to have docker and python installed on your local system. If you wish to use the server with gpu suport, then you will need to install docker with cuda support.\n",
"\n",
"For Mac and Windows users, make sure you have the docker daemon running! You can check this by running docker ps in your terminal. To start the daemon, open the docker desktop app.\n",
"\n",
"Run the following command to install the Iris CLI that will enable you to run the takeoff server:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "shellscript"
}
},
"outputs": [],
"source": [
"pip install titan-iris"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Choose a Model\n",
"Iris Takeoff supports many of the most powerful generative text models, such as Falcon, MPT, and Llama. See the [supported models](https://docs.titanml.co/docs/titan-takeoff/supported-models) for more information. For information about using your own models, see the [custom models](https://docs.titanml.co/docs/titan-takeoff/Advanced/custom-models).\n",
"\n",
"Going forward in this demo we will be using the falcon 7B instruct model. This is a good open source model that is trained to follow instructions, and is small enough to easily inference even on CPUs.\n",
"\n",
"## Taking off\n",
"Models are referred to by their model id on HuggingFace. Takeoff uses port 8000 by default, but can be configured to use another port. There is also support to use a Nvidia GPU by specifing cuda for the device flag.\n",
"\n",
"To start the takeoff server, run:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "shellscript"
}
},
"outputs": [],
"source": [
"iris takeoff --model tiiuae/falcon-7b-instruct --device cpu\n",
"iris takeoff --model tiiuae/falcon-7b-instruct --device cuda # Nvidia GPU required\n",
"iris takeoff --model tiiuae/falcon-7b-instruct --device cpu --port 5000 # run on port 5000 (default: 8000)\n",
"```"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"You will then be directed to a login page, where you will need to create an account to proceed.\n",
"After logging in, run the command onscreen to check whether the server is ready. When it is ready, you can start using the Takeoff integration\n",
"\n",
"## Inferencing your model\n",
"To access your LLM, use the TitanTakeoff LLM wrapper:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain.llms import TitanTakeoff\n",
"\n",
"llm = TitanTakeoff(\n",
" port=8000,\n",
" generate_max_length=128,\n",
" temperature=1.0\n",
")\n",
"\n",
"prompt = \"What is the largest planet in the solar system?\"\n",
"\n",
"llm(prompt)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"No parameters are needed by default, but a port can be specified and [generation parameters](https://docs.titanml.co/docs/titan-takeoff/Advanced/generation-parameters) can be supplied.\n",
"\n",
"### Streaming\n",
"Streaming is also supported via the streaming flag:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
"from langchain.callbacks.manager import CallbackManager\n",
"\n",
"llm = TitanTakeoff(port=8000, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), streaming=True)\n",
"\n",
"prompt = \"What is the capital of France?\"\n",
"\n",
"llm(prompt)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Integration with LLMChain"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain import PromptTemplate, LLMChain\n",
"\n",
"llm = TitanTakeoff()\n",
"\n",
"template = \"What is the capital of {country}\"\n",
"\n",
"prompt = PromptTemplate(template=template, input_variables=[\"country\"])\n",
"\n",
"llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
"\n",
"generated = llm_chain.run(country=\"Belgium\")\n",
"print(generated)"
]
}
],
"metadata": {
"language_info": {
"name": "python"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -170,6 +170,51 @@
"\n",
"llm(\"What is the future of AI?\")"
]
},
{
"cell_type": "markdown",
"id": "64e89be0-6ad7-43a8-9dac-1324dcd4e851",
"metadata": {
"tags": []
},
"source": [
"## OpenAI-Compatible Server\n",
"\n",
"vLLM can be deployed as a server that mimics the OpenAI API protocol. This allows vLLM to be used as a drop-in replacement for applications using OpenAI API.\n",
"\n",
"This server can be queried in the same format as OpenAI API.\n",
"\n",
"### OpenAI-Compatible Completion"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c3cbc428-0bb8-422a-913e-1c6fef8b89d4",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" a city that is filled with history, ancient buildings, and art around every corner\n"
]
}
],
"source": [
"from langchain.llms import VLLMOpenAI\n",
"\n",
"\n",
"llm = VLLMOpenAI(\n",
" openai_api_key=\"EMPTY\",\n",
" openai_api_base=\"http://localhost:8000/v1\",\n",
" model_name=\"tiiuae/falcon-7b\",\n",
" model_kwargs={\"stop\": [\".\"]}\n",
")\n",
"print(llm(\"Rome is\"))"
]
}
],
"metadata": {

View File

@ -13,7 +13,7 @@ pip install python-arango
Connect your ArangoDB Database with a Chat Model to get insights on your data.
See the notebook example [here](/docs/use_cases/graph/graph_arangodb_qa.html).
See the notebook example [here](/docs/use_cases/more/graph/graph_arangodb_qa.html).
```python
from arango import ArangoClient

View File

@ -0,0 +1,21 @@
# BagelDB
> [BagelDB](https://www.bageldb.ai/) (`Open Vector Database for AI`), is like GitHub for AI data.
It is a collaborative platform where users can create,
share, and manage vector datasets. It can support private projects for independent developers,
internal collaborations for enterprises, and public contributions for data DAOs.
## Installation and Setup
```bash
pip install betabageldb
```
## VectorStore
See a [usage example](/docs/integrations/vectorstores/bageldb).
```python
from langchain.vectorstores import Bagel
```

View File

@ -0,0 +1,37 @@
# NIBittensor
This page covers how to use the BittensorLLM inference runtime within LangChain.
It is broken into two parts: installation and setup, and then examples of NIBittensorLLM usage.
## Installation and Setup
- Install the Python package with `pip install langchain`
## Wrappers
### LLM
There exists a NIBittensor LLM wrapper, which you can access with:
```python
from langchain.llms import NIBittensorLLM
```
It provides a unified interface for all models:
```python
llm = NIBittensorLLM(system_prompt="Your task is to provide consice and accurate response based on user prompt")
print(llm('Write a fibonacci function in python with golder ratio'))
```
Multiple responses from top miners can be accessible using the `top_responses` parameter:
```python
multi_response_llm = NIBittensorLLM(top_responses=10)
multi_resp = multi_response_llm("What is Neural Network Feeding Mechanism?")
json_multi_resp = json.loads(multi_resp)
print(json_multi_resp)
```

View File

@ -0,0 +1,24 @@
# DashVector
> [DashVector](https://help.aliyun.com/document_detail/2510225.html) is a fully-managed vectorDB service that supports high-dimension dense and sparse vectors, real-time insertion and filtered search. It is built to scale automatically and can adapt to different application requirements.
This document demonstrates to leverage DashVector within the LangChain ecosystem. In particular, it shows how to install DashVector, and how to use it as a VectorStore plugin in LangChain.
It is broken into two parts: installation and setup, and then references to specific DashVector wrappers.
## Installation and Setup
Install the Python SDK:
```bash
pip install dashvector
```
## VectorStore
A DashVector Collection is wrapped as a familiar VectorStore for native usage within LangChain,
which allows it to be readily used for various scenarios, such as semantic search or example selection.
You may import the vectorstore by:
```python
from langchain.vectorstores import DashVector
```
For a detailed walkthrough of the DashVector wrapper, please refer to [this notebook](/docs/integrations/vectorstores/dashvector.html)

View File

@ -0,0 +1,35 @@
# DeepSparse
This page covers how to use the [DeepSparse](https://github.com/neuralmagic/deepsparse) inference runtime within LangChain.
It is broken into two parts: installation and setup, and then examples of DeepSparse usage.
## Installation and Setup
- Install the Python package with `pip install deepsparse`
- Choose a [SparseZoo model](https://sparsezoo.neuralmagic.com/?useCase=text_generation) or export a support model to ONNX [using Optimum](https://github.com/neuralmagic/notebooks/blob/main/notebooks/opt-text-generation-deepsparse-quickstart/OPT_Text_Generation_DeepSparse_Quickstart.ipynb)
## Wrappers
### LLM
There exists a DeepSparse LLM wrapper, which you can access with:
```python
from langchain.llms import DeepSparse
```
It provides a unified interface for all models:
```python
llm = DeepSparse(model='zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none')
print(llm('def fib():'))
```
Additional parameters can be passed using the `config` parameter:
```python
config = {'max_generated_tokens': 256}
llm = DeepSparse(model='zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none', config=config)
```

View File

@ -0,0 +1,19 @@
# Dingo
This page covers how to use the Dingo ecosystem within LangChain.
It is broken into two parts: installation and setup, and then references to specific Dingo wrappers.
## Installation and Setup
- Install the Python SDK with `pip install dingodb`
## VectorStore
There exists a wrapper around Dingo indexes, allowing you to use it as a vectorstore,
whether for semantic search or example selection.
To import this vectorstore:
```python
from langchain.vectorstores import Dingo
```
For a more detailed walkthrough of the Dingo wrapper, see [this notebook](/docs/integrations/vectorstores/dingo.html)

View File

@ -1,24 +1,54 @@
# Elasticsearch
>[Elasticsearch](https://www.elastic.co/elasticsearch/) is a distributed, RESTful search and analytics engine.
> [Elasticsearch](https://www.elastic.co/elasticsearch/) is a distributed, RESTful search and analytics engine.
> It provides a distributed, multi-tenant-capable full-text search engine with an HTTP web interface and schema-free
> JSON documents.
## Installation and Setup
There are two ways to get started with Elasticsearch:
#### Install Elasticsearch on your local machine via docker
Example: Run a single-node Elasticsearch instance with security disabled. This is not recommended for production use.
```bash
docker run -p 9200:9200 -e "discovery.type=single-node" -e "xpack.security.enabled=false" -e "xpack.security.http.ssl.enabled=false" docker.elastic.co/elasticsearch/elasticsearch:8.9.0
```
#### Deploy Elasticsearch on Elastic Cloud
Elastic Cloud is a managed Elasticsearch service. Signup for a [free trial](https://cloud.elastic.co/registration?storm=langchain-notebook).
### Install Client
```bash
pip install elasticsearch
```
## Retriever
## Vector Store
>In information retrieval, [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) (BM is an abbreviation of best matching) is a ranking function used by search engines to estimate the relevance of documents to a given search query. It is based on the probabilistic retrieval framework developed in the 1970s and 1980s by Stephen E. Robertson, Karen Spärck Jones, and others.
>The name of the actual ranking function is BM25. The fuller name, Okapi BM25, includes the name of the first system to use it, which was the Okapi information retrieval system, implemented at London's City University in the 1980s and 1990s. BM25 and its newer variants, e.g. BM25F (a version of BM25 that can take document structure and anchor text into account), represent TF-IDF-like retrieval functions used in document retrieval.
See a [usage example](/docs/integrations/retrievers/elastic_search_bm25).
The vector store is a simple wrapper around Elasticsearch. It provides a simple interface to store and retrieve vectors.
```python
from langchain.retrievers import ElasticSearchBM25Retriever
from langchain.vectorstores import ElasticsearchStore
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
loader = TextLoader("./state_of_the_union.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
db = ElasticsearchStore.from_documents(
docs, embeddings, es_url="http://localhost:9200", index_name="test-basic",
)
db.client.indices.refresh(index="test-basic")
query = "What did the president say about Ketanji Brown Jackson"
results = db.similarity_search(query)
```

View File

@ -4,14 +4,15 @@ This page covers how to use the Fireworks models within Langchain.
## Installation and Setup
- To use the Fireworks model, you need to have a Fireworks API key. To generate one, sign up at platform.fireworks.ai
- To use the Fireworks model, you need to have a Fireworks API key. To generate one, sign up at [app.fireworks.ai](https://app.fireworks.ai).
- Authenticate by setting the FIREWORKS_API_KEY environment variable.
## LLM
Fireworks integrates with Langchain through the LLM module, which allows for standardized usage of any models deployed on the Fireworks models.
In this example, we'll work the llama-v2-13b.
In this example, we'll work the llama-v2-13b-chat model.
```python
from langchain.llms.fireworks import Fireworks

View File

@ -1,22 +1,23 @@
# Grobid
GROBID is a machine learning library for extracting, parsing, and re-structuring raw documents.
It is designed and expected to be used to parse academic papers, where it works particularly well.
*Note*: if the articles supplied to Grobid are large documents (e.g. dissertations) exceeding a certain number
of elements, they might not be processed.
This page covers how to use the Grobid to parse articles for LangChain.
It is separated into two parts: installation and running the server
## Installation and Setup
#Ensure You have Java installed
!apt-get install -y openjdk-11-jdk -q
!update-alternatives --set java /usr/lib/jvm/java-11-openjdk-amd64/bin/java
## Installation
The grobid installation is described in details in https://grobid.readthedocs.io/en/latest/Install-Grobid/.
However, it is probably easier and less troublesome to run grobid through a docker container,
as documented [here](https://grobid.readthedocs.io/en/latest/Grobid-docker/).
#Clone and install the Grobid Repo
import os
!git clone https://github.com/kermitt2/grobid.git
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-11-openjdk-amd64"
os.chdir('grobid')
!./gradlew clean install
## Use Grobid with LangChain
#Run the server,
get_ipython().system_raw('nohup ./gradlew run > grobid.log 2>&1 &')
Once grobid is installed and up and running (you can check by accessing it http://localhost:8070),
you're ready to go.
You can now use the GrobidParser to produce documents
```python
@ -41,4 +42,5 @@ loader = GenericLoader.from_filesystem(
)
docs = loader.load()
```
Chunk metadata will include bboxes although these are a bit funky to parse, see https://grobid.readthedocs.io/en/latest/Coordinates-in-PDF/
Chunk metadata will include Bounding Boxes. Although these are a bit funky to parse,
they are explained in https://grobid.readthedocs.io/en/latest/Coordinates-in-PDF/

View File

@ -5,16 +5,14 @@ It is broken into two parts: installation and setup, and then references to spec
## Installation and Setup
- Get an Nebula API Key and set as environment variables (`SYMBLAI_NEBULA_SERVICE_URL`, `SYMBLAI_NEBULA_SERVICE_PATH`, `SYMBLAI_NEBULA_SERVICE_TOKEN`)
- Sign up for a FREE Symbl.ai/Nebula Account: [https://nebula.symbl.ai/playground/](https://nebula.symbl.ai/playground/)
- Please see the [Nebula documentation](https://docs.symbl.ai/docs/nebula-llm-overview) for more details.
- No time? Visit the [Nebula Quickstart Guide](https://docs.symbl.ai/docs/nebula-quickstart).
## Wrappers
- Get an [Nebula API Key](https://info.symbl.ai/Nebula_Private_Beta.html) and set as environment variable `NEBULA_API_KEY`
- Please see the [Nebula documentation](https://docs.symbl.ai/docs/nebula-llm) for more details.
- No time? Visit the [Nebula Quickstart Guide](https://docs.symbl.ai/docs/nebula-quickstart).
### LLM
There exists an Nebula LLM wrapper, which you can access with
```python
from langchain.llms import Nebula
llm = Nebula()
```

View File

@ -63,7 +63,7 @@ results = vectara.similarity_score("what is LangChain?")
- `k`: number of results to return (defaults to 5)
- `lambda_val`: the [lexical matching](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) factor for hybrid search (defaults to 0.025)
- `filter`: a [filter](https://docs.vectara.com/docs/common-use-cases/filtering-by-metadata/filter-overview) to apply to the results (default None)
- `n_sentence_context`: number of sentences to include before/after the actual matching segment when returning results. This defaults to 0 so as to return the exact text segment that matches, but can be used with other values e.g. 2 or 3 to return adjacent text segments.
- `n_sentence_context`: number of sentences to include before/after the actual matching segment when returning results. This defaults to 2.
The results are returned as a list of relevant documents, and a relevance score of each document.

View File

@ -45,7 +45,7 @@
"source": [
"from langchain.document_loaders import TextLoader\n",
"\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"loader = TextLoader(\"../../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()"
]
},
@ -136,7 +136,7 @@
{
"data": {
"text/plain": [
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\""
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and a former top litigator in private practice, and that she will continue Justice Breyer's legacy of excellence.\""
]
},
"execution_count": 7,
@ -168,7 +168,7 @@
{
"data": {
"text/plain": [
"' Justice Stephen Breyer'"
"' Ketanji Brown Jackson succeeded Justice Breyer.'"
]
},
"execution_count": 9,
@ -237,7 +237,7 @@
{
"data": {
"text/plain": [
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\""
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and a former top litigator in private practice, and that she will continue Justice Breyer's legacy of excellence.\""
]
},
"execution_count": 12,
@ -282,7 +282,7 @@
{
"data": {
"text/plain": [
"' Justice Stephen Breyer'"
"' Ketanji Brown Jackson succeeded Justice Breyer.'"
]
},
"execution_count": 14,
@ -342,7 +342,7 @@
{
"data": {
"text/plain": [
"Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'})"
"Document(page_content='Justice Breyer, thank you for your service. One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence. A former top litigator in private practice.', metadata={'source': '../../../modules/state_of_the_union.txt'})"
]
},
"execution_count": 17,
@ -396,7 +396,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 20,
"id": "24ebdaec",
"metadata": {},
"outputs": [
@ -404,7 +404,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\n"
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and a former top litigator in private practice, and that she will continue Justice Breyer's legacy of excellence.\n"
]
}
],
@ -423,7 +423,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 21,
"id": "e53a9d66",
"metadata": {
"tags": []
@ -437,7 +437,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 22,
"id": "bf205e35",
"metadata": {
"tags": []
@ -456,7 +456,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 23,
"id": "78155887",
"metadata": {
"tags": []
@ -470,7 +470,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 24,
"id": "e54b5fa2",
"metadata": {
"tags": []
@ -479,10 +479,10 @@
{
"data": {
"text/plain": [
"\" The president said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson, who he described as one of the nation's top legal minds, to continue Justice Breyer's legacy of excellence.\""
"\" The president said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson, who is one of the nation's top legal minds and a former top litigator in private practice.\""
]
},
"execution_count": 23,
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@ -503,7 +503,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 25,
"id": "d1058fd2",
"metadata": {
"tags": []
@ -515,7 +515,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 26,
"id": "a6594482",
"metadata": {
"tags": []
@ -534,7 +534,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 27,
"id": "e2badd21",
"metadata": {
"tags": []
@ -548,7 +548,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 28,
"id": "edb31fe5",
"metadata": {
"tags": []
@ -557,10 +557,10 @@
{
"data": {
"text/plain": [
"\" The president said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson, who he described as one of the nation's top legal minds, and that she will continue Justice Breyer's legacy of excellence.\\nSOURCES: ../../../state_of_the_union.txt\""
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and a former top litigator in private practice.\\nSOURCES: ../../../modules/state_of_the_union.txt\""
]
},
"execution_count": 27,
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@ -581,7 +581,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 29,
"id": "2efacec3-2690-4b05-8de3-a32fd2ac3911",
"metadata": {
"tags": []
@ -618,7 +618,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 30,
"id": "fd6d43f4-7428-44a4-81bc-26fe88a98762",
"metadata": {
"tags": []
@ -628,7 +628,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence."
" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and a former top litigator in private practice, and that she will continue Justice Breyer's legacy of excellence."
]
}
],
@ -640,7 +640,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 31,
"id": "5ab38978-f3e8-4fa7-808c-c79dec48379a",
"metadata": {
"tags": []
@ -650,7 +650,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
" Justice Stephen Breyer"
" Justice Breyer"
]
}
],
@ -671,7 +671,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 32,
"id": "a7ba9d8c",
"metadata": {
"tags": []
@ -692,7 +692,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 33,
"id": "a3e33c0d",
"metadata": {
"tags": []
@ -706,7 +706,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 34,
"id": "936dc62f",
"metadata": {
"tags": []
@ -715,10 +715,10 @@
{
"data": {
"text/plain": [
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence.\""
"\" The president said that Ketanji Brown Jackson is one of the nation's top legal minds and a former top litigator in private practice, and that she will continue Justice Breyer's legacy of excellence.\""
]
},
"execution_count": 33,
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}

View File

@ -97,8 +97,6 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"search_index = Vectara.from_texts(source_chunks, embedding=None)"
]
},
@ -161,7 +159,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[{'text': '\\n\\nEnvironment variables are a powerful tool for managing configuration settings in your applications. They allow you to store and access values from anywhere in your code, making it easier to keep your codebase organized and maintainable.\\n\\nHowever, there are times when you may want to use environment variables specifically for a single command. This is where shell variables come in. Shell variables are similar to environment variables, but they won\\'t be exported to spawned commands. They are defined with the following syntax:\\n\\n```sh\\nVAR_NAME=value\\n```\\n\\nFor example, if you wanted to use a shell variable instead of an environment variable in a command, you could do something like this:\\n\\n```sh\\nVAR=hello && echo $VAR && deno eval \"console.log(\\'Deno: \\' + Deno.env.get(\\'VAR\\'))\"\\n```\\n\\nThis would output the following:\\n\\n```\\nhello\\nDeno: undefined\\n```\\n\\nShell variables can be useful when you want to re-use a value, but don\\'t want it available in any spawned processes.\\n\\nAnother way to use environment variables is through pipelines. Pipelines provide a way to pipe the'}, {'text': '\\n\\nEnvironment variables are a great way to store and access sensitive information in your applications. They are also useful for configuring applications and managing different environments. In Deno, there are two ways to use environment variables: the built-in `Deno.env` and the `.env` file.\\n\\nThe `Deno.env` is a built-in feature of the Deno runtime that allows you to set and get environment variables. It has getter and setter methods that you can use to access and set environment variables. For example, you can set the `FIREBASE_API_KEY` and `FIREBASE_AUTH_DOMAIN` environment variables like this:\\n\\n```ts\\nDeno.env.set(\"FIREBASE_API_KEY\", \"examplekey123\");\\nDeno.env.set(\"FIREBASE_AUTH_DOMAIN\", \"firebasedomain.com\");\\n\\nconsole.log(Deno.env.get(\"FIREBASE_API_KEY\")); // examplekey123\\nconsole.log(Deno.env.get(\"FIREBASE_AUTH_DOMAIN\")); // firebasedomain'}, {'text': \"\\n\\nEnvironment variables are a powerful tool for managing configuration and settings in your applications. They allow you to store and access values that can be used in your code, and they can be set and changed without having to modify your code.\\n\\nIn Deno, environment variables are defined using the `export` command. For example, to set a variable called `VAR_NAME` to the value `value`, you would use the following command:\\n\\n```sh\\nexport VAR_NAME=value\\n```\\n\\nYou can then access the value of the environment variable in your code using the `Deno.env.get()` method. For example, if you wanted to log the value of the `VAR_NAME` variable, you could use the following code:\\n\\n```js\\nconsole.log(Deno.env.get('VAR_NAME'));\\n```\\n\\nYou can also set environment variables for a single command. To do this, you can list the environment variables before the command, like so:\\n\\n```\\nVAR=hello VAR2=bye deno run main.ts\\n```\\n\\nThis will set the environment variables `VAR` and `V\"}, {'text': \"\\n\\nEnvironment variables are a powerful tool for managing settings and configuration in your applications. They can be used to store information such as user preferences, application settings, and even passwords. In this blog post, we'll discuss how to make Deno scripts executable with a hashbang (shebang).\\n\\nA hashbang is a line of code that is placed at the beginning of a script. It tells the system which interpreter to use when running the script. In the case of Deno, the hashbang should be `#!/usr/bin/env -S deno run --allow-env`. This tells the system to use the Deno interpreter and to allow the script to access environment variables.\\n\\nOnce the hashbang is in place, you may need to give the script execution permissions. On Linux, this can be done with the command `sudo chmod +x hashbang.ts`. After that, you can execute the script by calling it like any other command: `./hashbang.ts`.\\n\\nIn the example program, we give the context permission to access the environment variables and print the Deno installation path. This is done by using the `Deno.env.get()` function, which returns the value of the specified environment\"}]\n"
"[{'text': '\\n\\nWhen it comes to running Deno CLI tasks, environment variables can be a powerful tool for customizing the behavior of your tasks. With the Deno Task Definition interface, you can easily configure environment variables to be set when executing your tasks.\\n\\nThe Deno Task Definition interface is configured in a `tasks.json` within your workspace. It includes a `env` field, which allows you to specify any environment variables that should be set when executing the task. For example, if you wanted to set the `NODE_ENV` environment variable to `production` when running a Deno task, you could add the following to your `tasks.json`:\\n\\n```json\\n{\\n \"version\": \"2.0.0\",\\n \"tasks\": [\\n {\\n \"type\": \"deno\",\\n \"command\": \"run\",\\n \"args\": [\\n \"mod.ts\"\\n ],\\n \"env\": {\\n \"NODE_ENV\": \"production\"\\n },\\n \"problemMatcher\": [\\n \"$deno\"\\n ],\\n \"label\": \"deno: run\"\\n }\\n ]\\n}\\n```\\n\\nThe Deno language server and this extension also'}, {'text': '\\n\\nEnvironment variables are a great way to store and access data in your applications. They are especially useful when you need to store sensitive information such as API keys, passwords, and other credentials.\\n\\nDeno.env is a library that provides getter and setter methods for environment variables. This makes it easy to store and retrieve data from environment variables. For example, you can use the setter method to set a variable like this:\\n\\n```ts\\nDeno.env.set(\"FIREBASE_API_KEY\", \"examplekey123\");\\nDeno.env.set(\"FIREBASE_AUTH_DOMAIN\", \"firebasedomain.com\");\\n```\\n\\nAnd then you can use the getter method to retrieve the data like this:\\n\\n```ts\\nconsole.log(Deno.env.get(\"FIREBASE_API_KEY\")); // examplekey123\\nconsole.log(Deno.env.get(\"FIREBASE_AUTH_DOMAIN\")); // firebasedomain.com\\n```\\n\\nYou can also store environment variables in a `.env` file and retrieve them using `dotenv` in the standard'}, {'text': '\\n\\nEnvironment variables are a powerful tool for developers, allowing them to store and access data without hard-coding it into their applications. Deno, the secure JavaScript and TypeScript runtime, offers built-in support for environment variables with the `Deno.env` API.\\n\\nUsing `Deno.env` is simple. It has getter and setter methods that allow you to easily set and retrieve environment variables. For example, you can set the `FIREBASE_API_KEY` and `FIREBASE_AUTH_DOMAIN` environment variables like this:\\n\\n```ts\\nDeno.env.set(\"FIREBASE_API_KEY\", \"examplekey123\");\\nDeno.env.set(\"FIREBASE_AUTH_DOMAIN\", \"firebasedomain.com\");\\n```\\n\\nAnd then you can retrieve them like this:\\n\\n```ts\\nconsole.log(Deno.env.get(\"FIREBASE_API_KEY\")); // examplekey123\\nconsole.log(Deno.env.get(\"FIREBASE_AUTH_DOMAIN\")); // firebasedomain.com\\n```'}, {'text': '\\n\\nEnvironment variables are an important part of any programming language, and Deno is no exception. Environment variables are used to store information about the environment in which a program is running, such as the operating system, user preferences, and other settings. In Deno, environment variables are used to set up proxies, control the output of colors, and more.\\n\\nThe `NO_PROXY` environment variable is a de facto standard in Deno that indicates which hosts should bypass the proxy set in other environment variables. This is useful for developers who want to access certain resources without having to go through a proxy. For more information on this standard, you can check out the website no-color.org.\\n\\nThe `Deno.noColor` environment variable is another important environment variable in Deno. This variable is used to control the output of colors in the Deno terminal. By setting this variable to true, you can disable the output of colors in the terminal. This can be useful for developers who want to focus on the output of their code without being distracted by the colors.\\n\\nFinally, the `Deno.env` environment variable is used to access the environment variables set in the Deno runtime. This variable is useful for developers who want'}]\n"
]
}
],

View File

@ -44,21 +44,20 @@
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2023-05-25T15:03:27.863217Z",
"start_time": "2023-05-25T15:03:25.690273Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
"ExecuteTime": {
"end_time": "2023-08-11T20:31:12.231459Z",
"start_time": "2023-08-11T20:31:11.211176Z"
}
},
"outputs": [],
"source": [
"from langchain.memory.chat_message_histories import ZepChatMessageHistory\n",
"from langchain.schema import HumanMessage, AIMessage\n",
"from uuid import uuid4\n",
"import getpass\n",
"import time\n",
"from uuid import uuid4\n",
"\n",
"from langchain.memory import ZepMemory, CombinedMemory, VectorStoreRetrieverMemory\n",
"from langchain.schema import HumanMessage, AIMessage\n",
"\n",
"# Set this to your Zep server URL\n",
"ZEP_API_URL = \"http://localhost:8000\""
@ -76,56 +75,50 @@
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
" ········\n"
]
"metadata": {
"ExecuteTime": {
"end_time": "2023-08-11T20:31:12.237545Z",
"start_time": "2023-08-11T20:31:12.232416Z"
}
],
},
"outputs": [],
"source": [
"# Provide your Zep API key. Note that this is optional. See https://docs.getzep.com/deployment/auth\n",
"AUTHENTICATE = False\n",
"\n",
"zep_api_key = getpass.getpass()"
"zep_api_key = None\n",
"if AUTHENTICATE:\n",
" zep_api_key = getpass.getpass()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2023-05-25T15:03:29.118416Z",
"start_time": "2023-05-25T15:03:29.022464Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
"ExecuteTime": {
"end_time": "2023-08-11T20:31:12.342790Z",
"start_time": "2023-08-11T20:31:12.235291Z"
}
},
"outputs": [],
"source": [
"session_id = str(uuid4()) # This is a unique identifier for the user/session\n",
"\n",
"# Set up Zep Chat History. We'll use this to add chat histories to the memory store\n",
"zep_chat_history = ZepChatMessageHistory(\n",
"# Initialize the Zep Memory Class\n",
"zep_memory = ZepMemory(\n",
" session_id=session_id, url=ZEP_API_URL, api_key=zep_api_key\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2023-05-25T15:03:30.271181Z",
"start_time": "2023-05-25T15:03:30.180442Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
"ExecuteTime": {
"end_time": "2023-08-11T20:31:14.455269Z",
"start_time": "2023-08-11T20:31:12.345635Z"
}
},
"outputs": [],
@ -191,11 +184,13 @@
"]\n",
"\n",
"for msg in test_history:\n",
" zep_chat_history.add_message(\n",
" zep_memory.chat_memory.add_message(\n",
" HumanMessage(content=msg[\"content\"])\n",
" if msg[\"role\"] == \"human\"\n",
" else AIMessage(content=msg[\"content\"])\n",
" )"
" )\n",
" \n",
"time.sleep(2) # Wait for the messages to be embedded"
]
},
{
@ -211,29 +206,20 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2023-05-25T15:03:32.979155Z",
"start_time": "2023-05-25T15:03:32.590310Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
"ExecuteTime": {
"end_time": "2023-08-11T20:31:14.758738Z",
"start_time": "2023-08-11T20:31:14.458850Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', metadata={'score': 0.8897116216176073, 'uuid': 'db60ff57-f259-4ec4-8a81-178ed4c6e54f', 'created_at': '2023-06-26T23:40:22.816214Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'GPE', 'Matches': [{'End': 20, 'Start': 15, 'Text': 'Sower'}], 'Name': 'Sower'}, {'Label': 'PERSON', 'Matches': [{'End': 65, 'Start': 51, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}, {'Label': 'DATE', 'Matches': [{'End': 84, 'Start': 80, 'Text': '1993'}], 'Name': '1993'}, {'Label': 'PERSON', 'Matches': [{'End': 124, 'Start': 110, 'Text': 'Lauren Olamina'}], 'Name': 'Lauren Olamina'}]}}, 'token_count': 56}),\n",
" Document(page_content=\"Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\", metadata={'score': 0.8856661080361157, 'uuid': 'f1a5981a-8f6d-4168-a548-6e9c32f35fa1', 'created_at': '2023-06-26T23:40:22.809621Z', 'role': 'human', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 32, 'Start': 26, 'Text': 'Butler'}], 'Name': 'Butler'}, {'Label': 'WORK_OF_ART', 'Matches': [{'End': 61, 'Start': 41, 'Text': 'Parable of the Sower'}], 'Name': 'Parable of the Sower'}]}}, 'token_count': 23}),\n",
" Document(page_content='Who was Octavia Butler?', metadata={'score': 0.7757595298492976, 'uuid': '361d0043-1009-4e13-a7f0-8aea8b1ee869', 'created_at': '2023-06-26T23:40:22.709886Z', 'role': 'human', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 22, 'Start': 8, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}], 'intent': 'The subject wants to know about the identity or background of an individual named Octavia Butler.'}}, 'token_count': 8}),\n",
" Document(page_content=\"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", metadata={'score': 0.7601242516059306, 'uuid': '56c45e8a-0f65-45f0-bc46-d9e65164b563', 'created_at': '2023-06-26T23:40:22.778836Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 16, 'Start': 0, 'Text': \"Octavia Butler's\"}], 'Name': \"Octavia Butler's\"}, {'Label': 'ORG', 'Matches': [{'End': 58, 'Start': 41, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 76, 'Start': 60, 'Text': 'Samuel R. Delany'}], 'Name': 'Samuel R. Delany'}, {'Label': 'PERSON', 'Matches': [{'End': 93, 'Start': 82, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}], 'intent': \"The subject is providing information about Octavia Butler's contemporaries.\"}}, 'token_count': 27}),\n",
" Document(page_content='You might want to read Ursula K. Le Guin or Joanna Russ.', metadata={'score': 0.7594731095320668, 'uuid': '6951f2fd-dfa4-4e05-9380-f322ef8f72f8', 'created_at': '2023-06-26T23:40:22.80464Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 40, 'Start': 23, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 55, 'Start': 44, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}]}}, 'token_count': 18})]"
]
"text/plain": "[Document(page_content='Who was Octavia Butler?', metadata={'score': 0.7758688965570713, 'uuid': 'b3322d28-f589-48c7-9daf-5eb092d65976', 'created_at': '2023-08-11T20:31:12.3856Z', 'role': 'human', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 22, 'Start': 8, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}]}}, 'token_count': 8}),\n Document(page_content=\"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", metadata={'score': 0.7602672137411663, 'uuid': '756b7136-0b4c-4664-ad33-c4431670356c', 'created_at': '2023-08-11T20:31:12.420717Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 16, 'Start': 0, 'Text': \"Octavia Butler's\"}], 'Name': \"Octavia Butler's\"}, {'Label': 'ORG', 'Matches': [{'End': 58, 'Start': 41, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 76, 'Start': 60, 'Text': 'Samuel R. Delany'}], 'Name': 'Samuel R. Delany'}, {'Label': 'PERSON', 'Matches': [{'End': 93, 'Start': 82, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}]}}, 'token_count': 27}),\n Document(page_content='You might want to read Ursula K. Le Guin or Joanna Russ.', metadata={'score': 0.7596040989115522, 'uuid': '166d9556-2d48-4237-8a84-5d8a1024d5f4', 'created_at': '2023-08-11T20:31:12.434522Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 40, 'Start': 23, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 55, 'Start': 44, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}]}}, 'token_count': 18}),\n Document(page_content='Who were her contemporaries?', metadata={'score': 0.7575531381951208, 'uuid': 'c6a16691-4012-439f-b223-84fd4e79c4cf', 'created_at': '2023-08-11T20:31:12.410336Z', 'role': 'human', 'token_count': 8}),\n Document(page_content='Octavia Estelle Butler (June 22, 1947 February 24, 2006) was an American science fiction author.', metadata={'score': 0.7546476914454683, 'uuid': '7c093a2a-0099-415a-95c5-615a8026a894', 'created_at': '2023-08-11T20:31:12.399979Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 22, 'Start': 0, 'Text': 'Octavia Estelle Butler'}], 'Name': 'Octavia Estelle Butler'}, {'Label': 'DATE', 'Matches': [{'End': 37, 'Start': 24, 'Text': 'June 22, 1947'}], 'Name': 'June 22, 1947'}, {'Label': 'DATE', 'Matches': [{'End': 57, 'Start': 40, 'Text': 'February 24, 2006'}], 'Name': 'February 24, 2006'}, {'Label': 'NORP', 'Matches': [{'End': 74, 'Start': 66, 'Text': 'American'}], 'Name': 'American'}]}}, 'token_count': 31})]"
},
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@ -260,29 +246,20 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2023-05-25T15:03:34.713354Z",
"start_time": "2023-05-25T15:03:34.577974Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
"ExecuteTime": {
"end_time": "2023-08-11T20:31:14.922838Z",
"start_time": "2023-08-11T20:31:14.751737Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', metadata={'score': 0.889661105796371, 'uuid': 'db60ff57-f259-4ec4-8a81-178ed4c6e54f', 'created_at': '2023-06-26T23:40:22.816214Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'GPE', 'Matches': [{'End': 20, 'Start': 15, 'Text': 'Sower'}], 'Name': 'Sower'}, {'Label': 'PERSON', 'Matches': [{'End': 65, 'Start': 51, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}, {'Label': 'DATE', 'Matches': [{'End': 84, 'Start': 80, 'Text': '1993'}], 'Name': '1993'}, {'Label': 'PERSON', 'Matches': [{'End': 124, 'Start': 110, 'Text': 'Lauren Olamina'}], 'Name': 'Lauren Olamina'}]}}, 'token_count': 56}),\n",
" Document(page_content=\"Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\", metadata={'score': 0.885754241595424, 'uuid': 'f1a5981a-8f6d-4168-a548-6e9c32f35fa1', 'created_at': '2023-06-26T23:40:22.809621Z', 'role': 'human', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 32, 'Start': 26, 'Text': 'Butler'}], 'Name': 'Butler'}, {'Label': 'WORK_OF_ART', 'Matches': [{'End': 61, 'Start': 41, 'Text': 'Parable of the Sower'}], 'Name': 'Parable of the Sower'}]}}, 'token_count': 23}),\n",
" Document(page_content='Who was Octavia Butler?', metadata={'score': 0.7758688965570713, 'uuid': '361d0043-1009-4e13-a7f0-8aea8b1ee869', 'created_at': '2023-06-26T23:40:22.709886Z', 'role': 'human', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 22, 'Start': 8, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}], 'intent': 'The subject wants to know about the identity or background of an individual named Octavia Butler.'}}, 'token_count': 8}),\n",
" Document(page_content=\"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", metadata={'score': 0.7602672137411663, 'uuid': '56c45e8a-0f65-45f0-bc46-d9e65164b563', 'created_at': '2023-06-26T23:40:22.778836Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 16, 'Start': 0, 'Text': \"Octavia Butler's\"}], 'Name': \"Octavia Butler's\"}, {'Label': 'ORG', 'Matches': [{'End': 58, 'Start': 41, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 76, 'Start': 60, 'Text': 'Samuel R. Delany'}], 'Name': 'Samuel R. Delany'}, {'Label': 'PERSON', 'Matches': [{'End': 93, 'Start': 82, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}], 'intent': \"The subject is providing information about Octavia Butler's contemporaries.\"}}, 'token_count': 27}),\n",
" Document(page_content='You might want to read Ursula K. Le Guin or Joanna Russ.', metadata={'score': 0.7596040989115522, 'uuid': '6951f2fd-dfa4-4e05-9380-f322ef8f72f8', 'created_at': '2023-06-26T23:40:22.80464Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 40, 'Start': 23, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 55, 'Start': 44, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}]}}, 'token_count': 18})]"
]
"text/plain": "[Document(page_content=\"Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\", metadata={'score': 0.8857504413268114, 'uuid': '82f07ab5-9d4b-4db6-aaae-6028e6fd836b', 'created_at': '2023-08-11T20:31:12.437365Z', 'role': 'human', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 32, 'Start': 26, 'Text': 'Butler'}], 'Name': 'Butler'}, {'Label': 'WORK_OF_ART', 'Matches': [{'End': 61, 'Start': 41, 'Text': 'Parable of the Sower'}], 'Name': 'Parable of the Sower'}]}}, 'token_count': 23}),\n Document(page_content='Who was Octavia Butler?', metadata={'score': 0.7758688965570713, 'uuid': 'b3322d28-f589-48c7-9daf-5eb092d65976', 'created_at': '2023-08-11T20:31:12.3856Z', 'role': 'human', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 22, 'Start': 8, 'Text': 'Octavia Butler'}], 'Name': 'Octavia Butler'}]}}, 'token_count': 8}),\n Document(page_content=\"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\", metadata={'score': 0.7602672137411663, 'uuid': '756b7136-0b4c-4664-ad33-c4431670356c', 'created_at': '2023-08-11T20:31:12.420717Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'PERSON', 'Matches': [{'End': 16, 'Start': 0, 'Text': \"Octavia Butler's\"}], 'Name': \"Octavia Butler's\"}, {'Label': 'ORG', 'Matches': [{'End': 58, 'Start': 41, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 76, 'Start': 60, 'Text': 'Samuel R. Delany'}], 'Name': 'Samuel R. Delany'}, {'Label': 'PERSON', 'Matches': [{'End': 93, 'Start': 82, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}]}}, 'token_count': 27}),\n Document(page_content='You might want to read Ursula K. Le Guin or Joanna Russ.', metadata={'score': 0.7596040989115522, 'uuid': '166d9556-2d48-4237-8a84-5d8a1024d5f4', 'created_at': '2023-08-11T20:31:12.434522Z', 'role': 'ai', 'metadata': {'system': {'entities': [{'Label': 'ORG', 'Matches': [{'End': 40, 'Start': 23, 'Text': 'Ursula K. Le Guin'}], 'Name': 'Ursula K. Le Guin'}, {'Label': 'PERSON', 'Matches': [{'End': 55, 'Start': 44, 'Text': 'Joanna Russ'}], 'Name': 'Joanna Russ'}]}}, 'token_count': 18}),\n Document(page_content='Who were her contemporaries?', metadata={'score': 0.7575531381951208, 'uuid': 'c6a16691-4012-439f-b223-84fd4e79c4cf', 'created_at': '2023-08-11T20:31:12.410336Z', 'role': 'human', 'token_count': 8})]"
},
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@ -293,19 +270,16 @@
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2023-05-18T20:09:21.298710Z",
"start_time": "2023-05-18T20:09:21.297169Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"execution_count": 6,
"outputs": [],
"source": []
"source": [],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-08-11T20:31:14.923032Z",
"start_time": "2023-08-11T20:31:14.918181Z"
}
}
}
],
"metadata": {

View File

@ -20,7 +20,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "282239c8-e03a-4abc-86c1-ca6120231a20",
"metadata": {},
"outputs": [],
@ -28,7 +28,7 @@
"from langchain.embeddings import BedrockEmbeddings\n",
"\n",
"embeddings = BedrockEmbeddings(\n",
" credentials_profile_name=\"bedrock-admin\", endpoint_url=\"custom_endpoint_url\"\n",
" credentials_profile_name=\"bedrock-admin\", region_name=\"us-east-1\"\n",
")"
]
},
@ -49,7 +49,29 @@
"metadata": {},
"outputs": [],
"source": [
"embeddings.embed_documents([\"This is a content of the document\"])"
"embeddings.embed_documents([\"This is a content of the document\", \"This is another document\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f6b364d",
"metadata": {},
"outputs": [],
"source": [
"# async embed query\n",
"await embeddings.aembed_query(\"This is a content of the document\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c9240a5a",
"metadata": {},
"outputs": [],
"source": [
"# async embed documents\n",
"await embeddings.aembed_documents([\"This is a content of the document\", \"This is another document\"])"
]
}
],
@ -69,7 +91,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
"version": "3.9.13"
}
},
"nbformat": 4,

View File

@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 6,
"id": "0be1af71",
"metadata": {},
"outputs": [],
@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 29,
"id": "2c66e5da",
"metadata": {},
"outputs": [],
@ -32,7 +32,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 30,
"id": "01370375",
"metadata": {},
"outputs": [],
@ -42,7 +42,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 31,
"id": "bfb6142c",
"metadata": {},
"outputs": [],
@ -52,7 +52,32 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 32,
"id": "91bc875d-829b-4c3d-8e6f-fc2dda30a3bd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[-0.003186025367556387,\n",
" 0.011071979803637493,\n",
" -0.004020420763285827,\n",
" -0.011658221276953042,\n",
" -0.0010534035786864363]"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query_result[:5]"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "0356c3b7",
"metadata": {},
"outputs": [],
@ -60,6 +85,31 @@
"doc_result = embeddings.embed_documents([text])"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "a4b0d49e-0c73-44b6-aed5-5b426564e085",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[-0.003186025367556387,\n",
" 0.011071979803637493,\n",
" -0.004020420763285827,\n",
" -0.011658221276953042,\n",
" -0.0010534035786864363]"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"doc_result[0][:5]"
]
},
{
"cell_type": "markdown",
"id": "bb61bbeb",
@ -70,7 +120,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "c0b072cc",
"metadata": {},
"outputs": [],
@ -80,17 +130,17 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 23,
"id": "a56b70f5",
"metadata": {},
"outputs": [],
"source": [
"embeddings = OpenAIEmbeddings()"
"embeddings = OpenAIEmbeddings(model=\"text-search-ada-doc-001\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 24,
"id": "14aefb64",
"metadata": {},
"outputs": [],
@ -100,7 +150,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 25,
"id": "3c39ed33",
"metadata": {},
"outputs": [],
@ -110,7 +160,32 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 26,
"id": "2ee7ce9f-d506-4810-8897-e44334412714",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0.004452846988523035,\n",
" 0.034550655976098514,\n",
" -0.015029939040690051,\n",
" 0.03827273883655212,\n",
" 0.005785414075152477]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query_result[:5]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "e3221db6",
"metadata": {},
"outputs": [],
@ -118,6 +193,31 @@
"doc_result = embeddings.embed_documents([text])"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "a0865409-3a6d-468f-939f-abde17c7cac3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0.004452846988523035,\n",
" 0.034550655976098514,\n",
" -0.015029939040690051,\n",
" 0.03827273883655212,\n",
" 0.005785414075152477]"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"doc_result[0][:5]"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -132,7 +232,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.11.1 64-bit",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@ -146,7 +246,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.1"
"version": "3.9.1"
},
"vscode": {
"interpreter": {

View File

@ -0,0 +1,134 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "245a954a",
"metadata": {
"id": "245a954a"
},
"source": [
"# Alpha Vantage\n",
"\n",
">[Alpha Vantage](https://www.alphavantage.co) Alpha Vantage provides realtime and historical financial market data through a set of powerful and developer-friendly data APIs and spreadsheets. \n",
"\n",
"Use the ``AlphaVantageAPIWrapper`` to get currency exchange rates."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "34bb5968",
"metadata": {
"id": "34bb5968"
},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
" ········\n"
]
}
],
"source": [
"import getpass\n",
"import os\n",
"\n",
"os.environ[\"ALPHAVANTAGE_API_KEY\"] = getpass.getpass()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "ac4910f8",
"metadata": {
"id": "ac4910f8"
},
"outputs": [],
"source": [
"from langchain.utilities.alpha_vantage import AlphaVantageAPIWrapper"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "84b8f773",
"metadata": {
"id": "84b8f773"
},
"outputs": [],
"source": [
"alpha_vantage = AlphaVantageAPIWrapper()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "068991a6",
"metadata": {
"id": "068991a6",
"outputId": "c5cdc6ec-03cf-4084-cc6f-6ae792d91d39"
},
"outputs": [
{
"data": {
"text/plain": [
"{'1. From_Currency Code': 'USD',\n",
" '2. From_Currency Name': 'United States Dollar',\n",
" '3. To_Currency Code': 'JPY',\n",
" '4. To_Currency Name': 'Japanese Yen',\n",
" '5. Exchange Rate': '144.93000000',\n",
" '6. Last Refreshed': '2023-08-11 21:31:01',\n",
" '7. Time Zone': 'UTC',\n",
" '8. Bid Price': '144.92600000',\n",
" '9. Ask Price': '144.93400000'}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"alpha_vantage.run(\"USD\", \"JPY\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "84fc2b66-c08f-4cd3-ae13-494c54789c09",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
},
"vscode": {
"interpreter": {
"hash": "53f3bc57609c7a84333bb558594977aa5b4026b1d6070b93987956689e367341"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -43,13 +43,14 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from langchain.utilities.dalle_image_generator import DallEAPIWrapper\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.chains import LLMChain\n",
"from langchain.llms import OpenAI\n",
"\n",
"llm = OpenAI(temperature=0.9)\n",
"prompt = PromptTemplate(\n",
@ -61,19 +62,31 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"image_url = DallEAPIWrapper().run(chain.run(\"halloween night at a haunted museum\"))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"https://oaidalleapiprodscus.blob.core.windows.net/private/org-rocrupyvzgcl4yf25rqq6d1v/user-WsxrbKyP2c8rfhCKWDyMfe8N/img-mg1OWiziXxQN1aR2XRsLNndg.png?st=2023-01-31T07%3A34%3A15Z&se=2023-01-31T09%3A34%3A15Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-01-30T22%3A19%3A44Z&ske=2023-01-31T22%3A19%3A44Z&sks=b&skv=2021-08-06&sig=XDPee5aEng%2BcbXq2mqhh39uHGZTBmJgGAerSd0g%2BMEs%3D\n"
"data": {
"text/plain": [
"'https://oaidalleapiprodscus.blob.core.windows.net/private/org-i0zjYONU3PemzJ222esBaAzZ/user-f6uEIOFxoiUZivy567cDSWni/img-i7Z2ZxvJ4IbbdAiO6OXJgS3v.png?st=2023-08-11T14%3A03%3A14Z&se=2023-08-11T16%3A03%3A14Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-08-10T20%3A58%3A32Z&ske=2023-08-11T20%3A58%3A32Z&sks=b&skv=2021-08-06&sig=/sECe7C0EAq37ssgBm7g7JkVIM/Q1W3xOstd0Go6slA%3D'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"image_url = DallEAPIWrapper().run(chain.run(\"halloween night at a haunted museum\"))"
"image_url"
]
},
{
@ -82,7 +95,7 @@
"metadata": {},
"outputs": [],
"source": [
"# You can click on the link above to display the image for\n",
"# You can click on the link above to display the image \n",
"# Or you can try the options below to display the image inline in this notebook\n",
"\n",
"try:\n",
@ -154,9 +167,9 @@
"provenance": []
},
"kernelspec": {
"display_name": "langchain",
"display_name": "poetry-venv",
"language": "python",
"name": "python3"
"name": "poetry-venv"
},
"language_info": {
"codemirror_mode": {
@ -168,7 +181,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.9.1"
},
"vscode": {
"interpreter": {
@ -177,5 +190,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 0
"nbformat_minor": 4
}

View File

@ -0,0 +1,980 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Activeloop's Deep Lake\n",
"\n",
">[Activeloop's Deep Lake](https://docs.activeloop.ai/) as a Multi-Modal Vector Store that stores embeddings and their metadata including text, jsons, images, audio, video, and more. It saves the data locally, in your cloud, or on Activeloop storage. It performs hybrid search including embeddings and their attributes.\n",
"\n",
"This notebook showcases basic functionality related to `Activeloop's Deep Lake`. While `Deep Lake` can store embeddings, it is capable of storing any type of data. It is a serverless data lake with version control, query engine and streaming dataloaders to deep learning frameworks. \n",
"\n",
"For more information, please see the Deep Lake [documentation](https://docs.activeloop.ai) or [api reference](https://docs.deeplake.ai)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install openai 'deeplake[enterprise]' tiktoken"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import DeepLake"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import getpass\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
"activeloop_token = getpass.getpass(\"activeloop token:\")\n",
"embeddings = OpenAIEmbeddings()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import TextLoader\n",
"\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n",
"\n",
"embeddings = OpenAIEmbeddings()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a dataset locally at `./deeplake/`, then run similarity search. The Deeplake+LangChain integration uses Deep Lake datasets under the hood, so `dataset` and `vector store` are used interchangeably. To create a dataset in your own cloud, or in the Deep Lake storage, [adjust the path accordingly](https://docs.activeloop.ai/storage-and-credentials/storage-options)."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": []
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='./my_deeplake/', tensors=['embedding', 'id', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding embedding (42, 1536) float32 None \n",
" id text (42, 1) str None \n",
" metadata json (42, 1) str None \n",
" text text (42, 1) str None \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": []
}
],
"source": [
"db = DeepLake(\n",
" dataset_path=\"./my_deeplake/\", embedding=embeddings, overwrite=True\n",
")\n",
"db.add_documents(docs)\n",
"# or shorter\n",
"# db = DeepLake.from_documents(docs, dataset_path=\"./my_deeplake/\", embedding=embeddings, overwrite=True)\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = db.similarity_search(query)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"To disable dataset summary printings all the time, you can specify verbose=False during VectorStore initialization."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n"
]
}
],
"source": [
"print(docs[0].page_content)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Later, you can reload the dataset without recomputing embeddings"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Deep Lake Dataset in ./my_deeplake/ already exists, loading from the storage\n"
]
}
],
"source": [
"db = DeepLake(\n",
" dataset_path=\"./my_deeplake/\", embedding=embeddings, read_only=True\n",
")\n",
"docs = db.similarity_search(query)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Deep Lake, for now, is single writer and multiple reader. Setting `read_only=True` helps to avoid acquiring the writer lock."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Retrieval Question/Answering"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ubuntu/langchain_activeloop/langchain/libs/langchain/langchain/llms/openai.py:786: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI`\n",
" warnings.warn(\n"
]
}
],
"source": [
"from langchain.chains import RetrievalQA\n",
"from langchain.llms import OpenAIChat\n",
"\n",
"qa = RetrievalQA.from_chain_type(\n",
" llm=OpenAIChat(model=\"gpt-3.5-turbo\"),\n",
" chain_type=\"stuff\",\n",
" retriever=db.as_retriever(),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'The president said that Ketanji Brown Jackson is a former top litigator in private practice and a former federal public defender. She comes from a family of public school educators and police officers. She is a consensus builder and has received a broad range of support since being nominated.'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"qa.run(query)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Attribute based filtering in metadata"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's create another vector store containing metadata with the year the documents were created."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": []
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='./my_deeplake/', tensors=['embedding', 'id', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding embedding (4, 1536) float32 None \n",
" id text (4, 1) str None \n",
" metadata json (4, 1) str None \n",
" text text (4, 1) str None \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": []
}
],
"source": [
"import random\n",
"\n",
"for d in docs:\n",
" d.metadata[\"year\"] = random.randint(2012, 2014)\n",
"\n",
"db = DeepLake.from_documents(\n",
" docs, embeddings, dataset_path=\"./my_deeplake/\", overwrite=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 4/4 [00:00<00:00, 2936.16it/s]\n"
]
},
{
"data": {
"text/plain": [
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWell also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLets pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013})]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"db.similarity_search(\n",
" \"What did the president say about Ketanji Brown Jackson\",\n",
" filter={\"metadata\": {\"year\": 2013}},\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Choosing distance function\n",
"Distance function `L2` for Euclidean, `L1` for Nuclear, `Max` l-infinity distance, `cos` for cosine similarity, `dot` for dot product "
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWell also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLets pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight Im offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2012})]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"db.similarity_search(\n",
" \"What did the president say about Ketanji Brown Jackson?\", distance_metric=\"cos\"\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Maximal Marginal relevance\n",
"Using maximal marginal relevance"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='Tonight, Im announcing a crackdown on these companies overcharging American businesses and consumers. \\n\\nAnd as Wall Street firms take over more nursing homes, quality in those homes has gone down and costs have gone up. \\n\\nThat ends on my watch. \\n\\nMedicare is going to set higher standards for nursing homes and make sure your loved ones get the care they deserve and expect. \\n\\nWell also cut costs and keep the economy going strong by giving workers a fair shot, provide more training and apprenticeships, hire them based on their skills not degrees. \\n\\nLets pass the Paycheck Fairness Act and paid leave. \\n\\nRaise the minimum wage to $15 an hour and extend the Child Tax Credit, so no one has to raise a family in poverty. \\n\\nLets increase Pell Grants and increase our historic support of HBCUs, and invest in what Jill—our First Lady who teaches full-time—calls Americas best-kept secret: community colleges.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since shes been nominated, shes received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, weve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWeve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWere putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWere securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2013}),\n",
" Document(page_content='And for our LGBTQ+ Americans, lets finally get the bipartisan Equality Act to my desk. The onslaught of state laws targeting transgender Americans and their families is wrong. \\n\\nAs I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \\n\\nWhile it often appears that we never agree, that isnt true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice. \\n\\nAnd soon, well strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \\n\\nSo tonight Im offering a Unity Agenda for the Nation. Four big things we can do together. \\n\\nFirst, beat the opioid epidemic.', metadata={'source': '../../modules/state_of_the_union.txt', 'year': 2012})]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"db.max_marginal_relevance_search(\n",
" \"What did the president say about Ketanji Brown Jackson?\"\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Delete dataset"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": []
}
],
"source": [
"db.delete_dataset()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"and if delete fails you can also force delete"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": []
}
],
"source": [
"DeepLake.force_delete_by_path(\"./my_deeplake\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Deep Lake datasets on cloud (Activeloop, AWS, GCS, etc.) or in memory\n",
"By default, Deep Lake datasets are stored locally. To store them in memory, in the Deep Lake Managed DB, or in any object storage, you can provide the [corresponding path and credentials when creating the vector store](https://docs.activeloop.ai/storage-and-credentials/storage-options). Some paths require registration with Activeloop and creation of an API token that can be [retrieved here](https://app.activeloop.ai/)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"os.environ[\"ACTIVELOOP_TOKEN\"] = activeloop_token"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Your Deep Lake dataset has been successfully created!\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": []
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='hub://adilkhan/langchain_testing_python', tensors=['embedding', 'id', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding embedding (42, 1536) float32 None \n",
" id text (42, 1) str None \n",
" metadata json (42, 1) str None \n",
" text text (42, 1) str None \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": []
}
],
"source": [
"# Embed and store the texts\n",
"username = \"<USERNAME_OR_ORG>\" # your username on app.activeloop.ai\n",
"dataset_path = f\"hub://{username}/langchain_testing_python\" # could be also ./local/path (much faster locally), s3://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
"\n",
"docs = text_splitter.split_documents(documents)\n",
"\n",
"embedding = OpenAIEmbeddings()\n",
"db = DeepLake(dataset_path=dataset_path, embedding=embeddings, overwrite=True)\n",
"ids = db.add_documents(docs)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n"
]
}
],
"source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = db.similarity_search(query)\n",
"print(docs[0].page_content)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### `tensor_db` execution option "
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"In order to utilize Deep Lake's Managed Tensor Database, it is necessary to specify the runtime parameter as {'tensor_db': True} during the creation of the vector store. This configuration enables the execution of queries on the Managed Tensor Database, rather than on the client side. It should be noted that this functionality is not applicable to datasets stored locally or in-memory. In the event that a vector store has already been created outside of the Managed Tensor Database, it is possible to transfer it to the Managed Tensor Database by following the prescribed steps."
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Your Deep Lake dataset has been successfully created!\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"|"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='hub://adilkhan/langchain_testing', tensors=['embedding', 'id', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding embedding (42, 1536) float32 None \n",
" id text (42, 1) str None \n",
" metadata json (42, 1) str None \n",
" text text (42, 1) str None \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
}
],
"source": [
"# Embed and store the texts\n",
"username = \"<USERNAME_OR_ORG>\" # your username on app.activeloop.ai\n",
"dataset_path = f\"hub://{username}/langchain_testing\"\n",
"\n",
"docs = text_splitter.split_documents(documents)\n",
"\n",
"embedding = OpenAIEmbeddings()\n",
"db = DeepLake(\n",
" dataset_path=dataset_path,\n",
" embedding=embeddings,\n",
" overwrite=True,\n",
" runtime={\"tensor_db\": True},\n",
")\n",
"ids = db.add_documents(docs)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### TQL Search"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Furthermore, the execution of queries is also supported within the similarity_search method, whereby the query can be specified utilizing Deep Lake's Tensor Query Language (TQL)."
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"search_id = db.vectorstore.dataset.id[0].numpy()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'8a6ff326-3a85-11ee-b840-13905694aaaf'"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"search_id[0]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"docs = db.similarity_search(\n",
" query=None,\n",
" tql=f\"SELECT * WHERE id == '{search_id[0]}'\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='hub://adilkhan/langchain_testing', tensors=['embedding', 'id', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding embedding (42, 1536) float32 None \n",
" id text (42, 1) str None \n",
" metadata json (42, 1) str None \n",
" text text (42, 1) str None \n"
]
}
],
"source": [
"db.vectorstore.summary()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Creating vector stores on AWS S3"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"s3://hub-2.0-datasets-n/langchain_test loaded successfully.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating ingest: 100%|██████████| 1/1 [00:10<00:00\n",
"\\"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='s3://hub-2.0-datasets-n/langchain_test', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (4, 1536) float32 None \n",
" ids text (4, 1) str None \n",
" metadata json (4, 1) str None \n",
" text text (4, 1) str None \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
}
],
"source": [
"dataset_path = f\"s3://BUCKET/langchain_test\" # could be also ./local/path (much faster locally), hub://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
"\n",
"embedding = OpenAIEmbeddings()\n",
"db = DeepLake.from_documents(\n",
" docs,\n",
" dataset_path=dataset_path,\n",
" embedding=embeddings,\n",
" overwrite=True,\n",
" creds={\n",
" \"aws_access_key_id\": os.environ[\"AWS_ACCESS_KEY_ID\"],\n",
" \"aws_secret_access_key\": os.environ[\"AWS_SECRET_ACCESS_KEY\"],\n",
" \"aws_session_token\": os.environ[\"AWS_SESSION_TOKEN\"], # Optional\n",
" },\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Deep Lake API\n",
"you can access the Deep Lake dataset at `db.vectorstore`"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='hub://adilkhan/langchain_testing', tensors=['embedding', 'id', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding embedding (42, 1536) float32 None \n",
" id text (42, 1) str None \n",
" metadata json (42, 1) str None \n",
" text text (42, 1) str None \n"
]
}
],
"source": [
"# get structure of the dataset\n",
"db.vectorstore.summary()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"# get embeddings numpy array\n",
"embeds = db.vectorstore.dataset.embedding.numpy()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Transfer local dataset to cloud\n",
"Copy already created dataset to the cloud. You can also transfer from cloud to local."
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Copying dataset: 100%|██████████| 56/56 [00:38<00:00\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/langchain_test_copy\n",
"Your Deep Lake dataset has been successfully created!\n",
"The dataset is private so make sure you are logged in!\n"
]
},
{
"data": {
"text/plain": [
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import deeplake\n",
"\n",
"username = \"davitbun\" # your username on app.activeloop.ai\n",
"source = f\"hub://{username}/langchain_testing\" # could be local, s3, gcs, etc.\n",
"destination = f\"hub://{username}/langchain_test_copy\" # could be local, s3, gcs, etc.\n",
"\n",
"deeplake.deepcopy(src=source, dest=destination, overwrite=True)"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/langchain_test_copy\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"hub://davitbun/langchain_test_copy loaded successfully.\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Deep Lake Dataset in hub://davitbun/langchain_test_copy already exists, loading from the storage\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (4, 1536) float32 None \n",
" ids text (4, 1) str None \n",
" metadata json (4, 1) str None \n",
" text text (4, 1) str None \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating ingest: 100%|██████████| 1/1 [00:31<00:00\n",
"-"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (8, 1536) float32 None \n",
" ids text (8, 1) str None \n",
" metadata json (8, 1) str None \n",
" text text (8, 1) str None \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"data": {
"text/plain": [
"['ad42f3fe-e188-11ed-b66d-41c5f7b85421',\n",
" 'ad42f3ff-e188-11ed-b66d-41c5f7b85421',\n",
" 'ad42f400-e188-11ed-b66d-41c5f7b85421',\n",
" 'ad42f401-e188-11ed-b66d-41c5f7b85421']"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"db = DeepLake(dataset_path=destination, embedding=embeddings)\n",
"db.add_documents(docs)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.6 ('langchain_venv': venv)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
},
"vscode": {
"interpreter": {
"hash": "0b0bacaffd430edc3085253ee7ee1bcda9f76a5e66b369dda8ba68baa6d14ba7"
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -0,0 +1,300 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# BagelDB\n",
"\n",
"> [BagelDB](https://www.bageldb.ai/) (`Open Vector Database for AI`), is like GitHub for AI data.\n",
"It is a collaborative platform where users can create,\n",
"share, and manage vector datasets. It can support private projects for independent developers,\n",
"internal collaborations for enterprises, and public contributions for data DAOs.\n",
"\n",
"### Installation and Setup\n",
"\n",
"```bash\n",
"pip install betabageldb\n",
"```\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create VectorStore from texts"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"from langchain.vectorstores import Bagel\n",
"\n",
"texts = [\"hello bagel\", \"hello langchain\", \"I love salad\", \"my car\", \"a dog\"]\n",
"# create cluster and add texts\n",
"cluster = Bagel.from_texts(cluster_name=\"testing\", texts=texts)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='hello bagel', metadata={}),\n",
" Document(page_content='my car', metadata={}),\n",
" Document(page_content='I love salad', metadata={})]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# similarity search\n",
"cluster.similarity_search(\"bagel\", k=3)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(Document(page_content='hello bagel', metadata={}), 0.27392977476119995),\n",
" (Document(page_content='my car', metadata={}), 1.4783176183700562),\n",
" (Document(page_content='I love salad', metadata={}), 1.5342965126037598)]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# the score is a distance metric, so lower is better\n",
"cluster.similarity_search_with_score(\"bagel\", k=3)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# delete the cluster\n",
"cluster.delete_cluster()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create VectorStore from docs"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import TextLoader\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
"documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)[:10]"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"# create cluster with docs\n",
"cluster = Bagel.from_documents(cluster_name=\"testing_with_docs\", documents=docs)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the \n"
]
}
],
"source": [
"# similarity search\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = cluster.similarity_search(query)\n",
"print(docs[0].page_content[:102])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Get all text/doc from Cluster"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"texts = [\"hello bagel\", \"this is langchain\"]\n",
"cluster = Bagel.from_texts(cluster_name=\"testing\", texts=texts)\n",
"cluster_data = cluster.get()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['ids', 'embeddings', 'metadatas', 'documents'])"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# all keys\n",
"cluster_data.keys()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'ids': ['578c6d24-3763-11ee-a8ab-b7b7b34f99ba',\n",
" '578c6d25-3763-11ee-a8ab-b7b7b34f99ba',\n",
" 'fb2fc7d8-3762-11ee-a8ab-b7b7b34f99ba',\n",
" 'fb2fc7d9-3762-11ee-a8ab-b7b7b34f99ba',\n",
" '6b40881a-3762-11ee-a8ab-b7b7b34f99ba',\n",
" '6b40881b-3762-11ee-a8ab-b7b7b34f99ba',\n",
" '581e691e-3762-11ee-a8ab-b7b7b34f99ba',\n",
" '581e691f-3762-11ee-a8ab-b7b7b34f99ba'],\n",
" 'embeddings': None,\n",
" 'metadatas': [{}, {}, {}, {}, {}, {}, {}, {}],\n",
" 'documents': ['hello bagel',\n",
" 'this is langchain',\n",
" 'hello bagel',\n",
" 'this is langchain',\n",
" 'hello bagel',\n",
" 'this is langchain',\n",
" 'hello bagel',\n",
" 'this is langchain']}"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# all values and keys\n",
"cluster_data"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"cluster.delete_cluster()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create cluster with metadata & filter using metadata"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(Document(page_content='hello bagel', metadata={'source': 'notion'}), 0.0)]"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"texts = [\"hello bagel\", \"this is langchain\"]\n",
"metadatas = [{\"source\": \"notion\"}, {\"source\": \"google\"}]\n",
"\n",
"cluster = Bagel.from_texts(cluster_name=\"testing\", texts=texts, metadatas=metadatas)\n",
"cluster.similarity_search_with_score(\"hello bagel\", where={\"source\": \"notion\"})"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
"# delete the cluster\n",
"cluster.delete_cluster()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,236 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# DashVector\n",
"\n",
"> [DashVector](https://help.aliyun.com/document_detail/2510225.html) is a fully-managed vectorDB service that supports high-dimension dense and sparse vectors, real-time insertion and filtered search. It is built to scale automatically and can adapt to different application requirements.\n",
"\n",
"This notebook shows how to use functionality related to the `DashVector` vector database.\n",
"\n",
"To use DashVector, you must have an API key.\n",
"Here are the [installation instructions](https://help.aliyun.com/document_detail/2510223.html)."
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## Install"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"!pip install dashvector dashscope"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"We want to use `DashScopeEmbeddings` so we also have to get the Dashscope API Key."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"pycharm": {
"name": "#%%\n",
"is_executing": true
},
"ExecuteTime": {
"end_time": "2023-08-11T10:37:15.091585Z",
"start_time": "2023-08-11T10:36:51.859753Z"
}
},
"outputs": [],
"source": [
"import os\n",
"import getpass\n",
"\n",
"os.environ[\"DASHVECTOR_API_KEY\"] = getpass.getpass(\"DashVector API Key:\")\n",
"os.environ[\"DASHSCOPE_API_KEY\"] = getpass.getpass(\"DashScope API Key:\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## Example"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"pycharm": {
"name": "#%%\n",
"is_executing": true
},
"ExecuteTime": {
"end_time": "2023-08-11T10:42:30.243460Z",
"start_time": "2023-08-11T10:42:27.783785Z"
}
},
"outputs": [],
"source": [
"from langchain.embeddings.dashscope import DashScopeEmbeddings\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import DashVector"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"pycharm": {
"is_executing": true,
"name": "#%%\n"
},
"ExecuteTime": {
"end_time": "2023-08-11T10:42:30.391580Z",
"start_time": "2023-08-11T10:42:30.249021Z"
}
},
"outputs": [],
"source": [
"from langchain.document_loaders import TextLoader\n",
"\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n",
"\n",
"embeddings = DashScopeEmbeddings()"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"We can create DashVector from documents."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"dashvector = DashVector.from_documents(docs, embeddings)\n",
"\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = dashvector.similarity_search(query)\n",
"print(docs)"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"We can add texts with meta datas and ids, and search with meta filter."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"pycharm": {
"name": "#%%\n"
},
"ExecuteTime": {
"end_time": "2023-08-11T10:42:51.641309Z",
"start_time": "2023-08-11T10:42:51.132109Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Document(page_content='baz', metadata={'key': 2})]\n"
]
}
],
"source": [
"texts = [\"foo\", \"bar\", \"baz\"]\n",
"metadatas = [{\"key\": i} for i in range(len(texts))]\n",
"ids = [\"0\", \"1\", \"2\"]\n",
"\n",
"dashvector.add_texts(texts, metadatas=metadatas, ids=ids)\n",
"\n",
"docs = dashvector.similarity_search(\"foo\", filter=\"key = 2\")\n",
"print(docs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 1
}

View File

@ -1,719 +0,0 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Activeloop's Deep Lake\n",
"\n",
">[Activeloop's Deep Lake](https://docs.activeloop.ai/) as a Multi-Modal Vector Store that stores embeddings and their metadata including text, jsons, images, audio, video, and more. It saves the data locally, in your cloud, or on Activeloop storage. It performs hybrid search including embeddings and their attributes.\n",
"\n",
"This notebook showcases basic functionality related to `Activeloop's Deep Lake`. While `Deep Lake` can store embeddings, it is capable of storing any type of data. It is a serverless data lake with version control, query engine and streaming dataloaders to deep learning frameworks. \n",
"\n",
"For more information, please see the Deep Lake [documentation](https://docs.activeloop.ai) or [api reference](https://docs.deeplake.ai)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install openai 'deeplake[enterprise]' tiktoken"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import DeepLake"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import os\n",
"import getpass\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
"activeloop_token = getpass.getpass(\"activeloop token:\")\n",
"embeddings = OpenAIEmbeddings()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.document_loaders import TextLoader\n",
"\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
"documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n",
"\n",
"embeddings = OpenAIEmbeddings()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a dataset locally at `./deeplake/`, then run similarity search. The Deeplake+LangChain integration uses Deep Lake datasets under the hood, so `dataset` and `vector store` are used interchangeably. To create a dataset in your own cloud, or in the Deep Lake storage, [adjust the path accordingly](https://docs.activeloop.ai/storage-and-credentials/storage-options)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"db = DeepLake(\n",
" dataset_path=\"./my_deeplake/\", embedding_function=embeddings, overwrite=True\n",
")\n",
"db.add_documents(docs)\n",
"# or shorter\n",
"# db = DeepLake.from_documents(docs, dataset_path=\"./my_deeplake/\", embedding=embeddings, overwrite=True)\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = db.similarity_search(query)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(docs[0].page_content)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Later, you can reload the dataset without recomputing embeddings"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"db = DeepLake(\n",
" dataset_path=\"./my_deeplake/\", embedding_function=embeddings, read_only=True\n",
")\n",
"docs = db.similarity_search(query)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Deep Lake, for now, is single writer and multiple reader. Setting `read_only=True` helps to avoid acquiring the writer lock."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Retrieval Question/Answering"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains import RetrievalQA\n",
"from langchain.llms import OpenAIChat\n",
"\n",
"qa = RetrievalQA.from_chain_type(\n",
" llm=OpenAIChat(model=\"gpt-3.5-turbo\"),\n",
" chain_type=\"stuff\",\n",
" retriever=db.as_retriever(),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"qa.run(query)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Attribute based filtering in metadata"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's create another vector store containing metadata with the year the documents were created."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"\n",
"for d in docs:\n",
" d.metadata[\"year\"] = random.randint(2012, 2014)\n",
"\n",
"db = DeepLake.from_documents(\n",
" docs, embeddings, dataset_path=\"./my_deeplake/\", overwrite=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"db.similarity_search(\n",
" \"What did the president say about Ketanji Brown Jackson\",\n",
" filter={\"metadata\": {\"year\": 2013}},\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Choosing distance function\n",
"Distance function `L2` for Euclidean, `L1` for Nuclear, `Max` l-infinity distance, `cos` for cosine similarity, `dot` for dot product "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"db.similarity_search(\n",
" \"What did the president say about Ketanji Brown Jackson?\", distance_metric=\"cos\"\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Maximal Marginal relevance\n",
"Using maximal marginal relevance"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"db.max_marginal_relevance_search(\n",
" \"What did the president say about Ketanji Brown Jackson?\"\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Delete dataset"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": []
}
],
"source": [
"db.delete_dataset()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"and if delete fails you can also force delete"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": []
}
],
"source": [
"DeepLake.force_delete_by_path(\"./my_deeplake\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Deep Lake datasets on cloud (Activeloop, AWS, GCS, etc.) or in memory\n",
"By default, Deep Lake datasets are stored locally. To store them in memory, in the Deep Lake Managed DB, or in any object storage, you can provide the [corresponding path and credentials when creating the vector store](https://docs.activeloop.ai/storage-and-credentials/storage-options). Some paths require registration with Activeloop and creation of an API token that can be [retrieved here](https://app.activeloop.ai/)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"os.environ[\"ACTIVELOOP_TOKEN\"] = activeloop_token"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Embed and store the texts\n",
"username = \"<username>\" # your username on app.activeloop.ai\n",
"dataset_path = f\"hub://{username}/langchain_testing_python\" # could be also ./local/path (much faster locally), s3://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
"\n",
"docs = text_splitter.split_documents(documents)\n",
"\n",
"embedding = OpenAIEmbeddings()\n",
"db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings, overwrite=True)\n",
"db.add_documents(docs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = db.similarity_search(query)\n",
"print(docs[0].page_content)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### `tensor_db` execution option "
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"In order to utilize Deep Lake's Managed Tensor Database, it is necessary to specify the runtime parameter as {'tensor_db': True} during the creation of the vector store. This configuration enables the execution of queries on the Managed Tensor Database, rather than on the client side. It should be noted that this functionality is not applicable to datasets stored locally or in-memory. In the event that a vector store has already been created outside of the Managed Tensor Database, it is possible to transfer it to the Managed Tensor Database by following the prescribed steps."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Embed and store the texts\n",
"username = \"adilkhan\" # your username on app.activeloop.ai\n",
"dataset_path = f\"hub://{username}/langchain_testing\"\n",
"\n",
"docs = text_splitter.split_documents(documents)\n",
"\n",
"embedding = OpenAIEmbeddings()\n",
"db = DeepLake(\n",
" dataset_path=dataset_path,\n",
" embedding_function=embeddings,\n",
" overwrite=True,\n",
" runtime={\"tensor_db\": True},\n",
")\n",
"db.add_documents(docs)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### TQL Search"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Furthermore, the execution of queries is also supported within the similarity_search method, whereby the query can be specified utilizing Deep Lake's Tensor Query Language (TQL)."
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"search_id = db.vectorstore.dataset.id[0].numpy()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"docs = db.similarity_search(\n",
" query=None,\n",
" tql_query=f\"SELECT * WHERE id == '{search_id[0]}'\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"docs"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Creating vector stores on AWS S3"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"s3://hub-2.0-datasets-n/langchain_test loaded successfully.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating ingest: 100%|██████████| 1/1 [00:10<00:00\n",
"\\"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='s3://hub-2.0-datasets-n/langchain_test', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (4, 1536) float32 None \n",
" ids text (4, 1) str None \n",
" metadata json (4, 1) str None \n",
" text text (4, 1) str None \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
}
],
"source": [
"dataset_path = f\"s3://BUCKET/langchain_test\" # could be also ./local/path (much faster locally), hub://bucket/path/to/dataset, gcs://path/to/dataset, etc.\n",
"\n",
"embedding = OpenAIEmbeddings()\n",
"db = DeepLake.from_documents(\n",
" docs,\n",
" dataset_path=dataset_path,\n",
" embedding=embeddings,\n",
" overwrite=True,\n",
" creds={\n",
" \"aws_access_key_id\": os.environ[\"AWS_ACCESS_KEY_ID\"],\n",
" \"aws_secret_access_key\": os.environ[\"AWS_SECRET_ACCESS_KEY\"],\n",
" \"aws_session_token\": os.environ[\"AWS_SESSION_TOKEN\"], # Optional\n",
" },\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Deep Lake API\n",
"you can access the Deep Lake dataset at `db.vectorstore`"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='hub://adilkhan/langchain_testing', tensors=['embedding', 'id', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding embedding (42, 1536) float32 None \n",
" id text (42, 1) str None \n",
" metadata json (42, 1) str None \n",
" text text (42, 1) str None \n"
]
}
],
"source": [
"# get structure of the dataset\n",
"db.vectorstore.summary()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"# get embeddings numpy array\n",
"embeds = db.vectorstore.dataset.embedding.numpy()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Transfer local dataset to cloud\n",
"Copy already created dataset to the cloud. You can also transfer from cloud to local."
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Copying dataset: 100%|██████████| 56/56 [00:38<00:00\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/langchain_test_copy\n",
"Your Deep Lake dataset has been successfully created!\n",
"The dataset is private so make sure you are logged in!\n"
]
},
{
"data": {
"text/plain": [
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import deeplake\n",
"\n",
"username = \"davitbun\" # your username on app.activeloop.ai\n",
"source = f\"hub://{username}/langchain_test\" # could be local, s3, gcs, etc.\n",
"destination = f\"hub://{username}/langchain_test_copy\" # could be local, s3, gcs, etc.\n",
"\n",
"deeplake.deepcopy(src=source, dest=destination, overwrite=True)"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/langchain_test_copy\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"hub://davitbun/langchain_test_copy loaded successfully.\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Deep Lake Dataset in hub://davitbun/langchain_test_copy already exists, loading from the storage\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (4, 1536) float32 None \n",
" ids text (4, 1) str None \n",
" metadata json (4, 1) str None \n",
" text text (4, 1) str None \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating ingest: 100%|██████████| 1/1 [00:31<00:00\n",
"-"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset(path='hub://davitbun/langchain_test_copy', tensors=['embedding', 'ids', 'metadata', 'text'])\n",
"\n",
" tensor htype shape dtype compression\n",
" ------- ------- ------- ------- ------- \n",
" embedding generic (8, 1536) float32 None \n",
" ids text (8, 1) str None \n",
" metadata json (8, 1) str None \n",
" text text (8, 1) str None \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" \r"
]
},
{
"data": {
"text/plain": [
"['ad42f3fe-e188-11ed-b66d-41c5f7b85421',\n",
" 'ad42f3ff-e188-11ed-b66d-41c5f7b85421',\n",
" 'ad42f400-e188-11ed-b66d-41c5f7b85421',\n",
" 'ad42f401-e188-11ed-b66d-41c5f7b85421']"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"db = DeepLake(dataset_path=destination, embedding_function=embeddings)\n",
"db.add_documents(docs)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.6 ('langchain_venv': venv)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
},
"vscode": {
"interpreter": {
"hash": "0b0bacaffd430edc3085253ee7ee1bcda9f76a5e66b369dda8ba68baa6d14ba7"
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@ -0,0 +1,246 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "683953b3",
"metadata": {},
"source": [
"# Dingo\n",
"\n",
">[Dingo](https://dingodb.readthedocs.io/en/latest/) is a distributed multi-mode vector database, which combines the characteristics of data lakes and vector databases, and can store data of any type and size (Key-Value, PDF, audio, video, etc.). It has real-time low-latency processing capabilities to achieve rapid insight and response, and can efficiently conduct instant analysis and process multi-modal data.\n",
"\n",
"This notebook shows how to use functionality related to the DingoDB vector database.\n",
"\n",
"To run, you should have a [DingoDB instance up and running](https://github.com/dingodb/dingo-deploy/blob/main/README.md)."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a62cff8a-bcf7-4e33-bbbc-76999c2e3e20",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"!pip install dingodb\n",
"or install latest:\n",
"!pip install git+https://git@github.com/dingodb/pydingo.git"
]
},
{
"cell_type": "markdown",
"id": "7a0f9e02-8eb0-4aef-b11f-8861360472ee",
"metadata": {},
"source": [
"We want to use OpenAIEmbeddings so we have to get the OpenAI API Key."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "8b6ed9cd-81b9-46e5-9c20-5aafca2844d0",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"OpenAI API Key:········\n"
]
}
],
"source": [
"import os\n",
"import getpass\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "aac9563e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import Dingo\n",
"from langchain.document_loaders import TextLoader"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a3c3999a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.document_loaders import TextLoader\n",
"\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
"documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n",
"\n",
"embeddings = OpenAIEmbeddings()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "dcf88bdf",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from dingodb import DingoDB\n",
"\n",
"index_name = \"langchain-demo\"\n",
"\n",
"dingo_client = DingoDB(user=\"\", password=\"\", host=[\"127.0.0.1:13000\"])\n",
"# First, check if our index already exists. If it doesn't, we create it\n",
"if index_name not in dingo_client.get_index():\n",
" # we create a new index, modify to your own\n",
" dingo_client.create_index(\n",
" index_name=index_name,\n",
" dimension=1536,\n",
" metric_type='cosine',\n",
" auto_id=False\n",
")\n",
"\n",
"# The OpenAI embedding model `text-embedding-ada-002 uses 1536 dimensions`\n",
"docsearch = Dingo.from_documents(docs, embeddings, client=dingo_client, index_name=index_name)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c3aae49e",
"metadata": {},
"outputs": [],
"source": [
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import Dingo\n",
"from langchain.document_loaders import TextLoader"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a8c513ab",
"metadata": {},
"outputs": [],
"source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = docsearch.similarity_search(query)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "fc516993",
"metadata": {},
"outputs": [],
"source": [
"print(docs[0].page_content)"
]
},
{
"cell_type": "markdown",
"id": "1eca81e4",
"metadata": {},
"source": [
"### Adding More Text to an Existing Index\n",
"\n",
"More text can embedded and upserted to an existing Dingo index using the `add_texts` function"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e40d558b",
"metadata": {},
"outputs": [],
"source": [
"vectorstore = Dingo(embeddings, \"text\", client=dingo_client, index_name=index_name)\n",
"\n",
"vectorstore.add_texts([\"More text!\"])"
]
},
{
"cell_type": "markdown",
"id": "bcb858a8",
"metadata": {},
"source": [
"### Maximal Marginal Relevance Searches\n",
"\n",
"In addition to using similarity search in the retriever object, you can also use `mmr` as retriever."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "649083ab",
"metadata": {},
"outputs": [],
"source": [
"retriever = docsearch.as_retriever(search_type=\"mmr\")\n",
"matched_docs = retriever.get_relevant_documents(query)\n",
"for i, d in enumerate(matched_docs):\n",
" print(f\"\\n## Document {i}\\n\")\n",
" print(d.page_content)"
]
},
{
"cell_type": "markdown",
"id": "7d3831ad",
"metadata": {},
"source": [
"Or use `max_marginal_relevance_search` directly:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "732f58b1",
"metadata": {},
"outputs": [],
"source": [
"found_docs = docsearch.max_marginal_relevance_search(query, k=2, fetch_k=10)\n",
"for i, doc in enumerate(found_docs):\n",
" print(f\"{i + 1}.\", doc.page_content, \"\\n\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

View File

@ -35,7 +35,7 @@
"\n",
" -- Create a table to store your documents\n",
" create table documents (\n",
" id bigserial primary key,\n",
" id uuid primary key,\n",
" content text, -- corresponds to Document.pageContent\n",
" metadata jsonb, -- corresponds to Document.metadata\n",
" embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed\n",

View File

@ -0,0 +1,494 @@
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Zep\n",
"\n",
"Zep is an open source long-term memory store for LLM applications. Zep makes it easy to add relevant documents,\n",
"chat history memory & rich user data to your LLM app's prompts.\n",
"\n",
"**Note:** The `ZepVectorStore` works with `Documents` and is intended to be used as a `Retriever`.\n",
"It offers separate functionality to Zep's `ZepMemory` class, which is designed for persisting, enriching\n",
"and searching your user's chat history.\n",
"\n",
"## Why Zep's VectorStore? 🤖🚀\n",
"Zep automatically embeds documents added to the Zep Vector Store using low-latency models local to the Zep server.\n",
"The Zep client also offers async interfaces for all document operations. These two together with Zep's chat memory\n",
" functionality make Zep ideal for building conversational LLM apps where latency and performance are important.\n",
"\n",
"## Installation\n",
"Follow the [Zep Quickstart Guide](https://docs.getzep.com/deployment/quickstart/) to install and get started with Zep.\n",
"\n",
"## Usage\n",
"\n",
"You'll need your Zep API URL and optionally an API key to use the Zep VectorStore. \n",
"See the [Zep docs](https://docs.getzep.com) for more information.\n",
"\n",
"In the examples below, we're using Zep's auto-embedding feature which automatically embed documents on the Zep server \n",
"using low-latency embedding models.\n",
"\n",
"## Note\n",
"- These examples use Zep's async interfaces. Call sync interfaces by removing the `a` prefix from the method names.\n",
"- If you pass in an `Embeddings` instance Zep will use this to embed documents rather than auto-embed them.\n",
"You must also set your document collection to `isAutoEmbedded === false`. \n",
"- If you set your collection to `isAutoEmbedded === false`, you must pass in an `Embeddings` instance."
],
"metadata": {
"collapsed": false
},
"id": "9eb8dfa6fdb71ef5"
},
{
"cell_type": "markdown",
"source": [
"## Load or create a Collection from documents"
],
"metadata": {
"collapsed": false
},
"id": "9a3a11aab1412d98"
},
{
"cell_type": "code",
"execution_count": 1,
"outputs": [],
"source": [
"from uuid import uuid4\n",
"\n",
"from langchain.document_loaders import WebBaseLoader\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain.vectorstores import ZepVectorStore\n",
"from langchain.vectorstores.zep import CollectionConfig\n",
"\n",
"ZEP_API_URL = \"http://localhost:8000\" # this is the API url of your Zep instance\n",
"ZEP_API_KEY = \"<optional_key>\" # optional API Key for your Zep instance\n",
"collection_name = f\"babbage{uuid4().hex}\" # a unique collection name. alphanum only\n",
"\n",
"# Collection config is needed if we're creating a new Zep Collection\n",
"config = CollectionConfig(\n",
" name=collection_name,\n",
" description=\"<optional description>\",\n",
" metadata={\"optional_metadata\": \"associated with the collection\"},\n",
" is_auto_embedded=True, # we'll have Zep embed our documents using its low-latency embedder\n",
" embedding_dimensions=1536 # this should match the model you've configured Zep to use.\n",
")\n",
"\n",
"# load the document\n",
"article_url = \"https://www.gutenberg.org/cache/epub/71292/pg71292.txt\"\n",
"loader = WebBaseLoader(article_url)\n",
"documents = loader.load()\n",
"\n",
"# split it into chunks\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n",
"\n",
"# Instantiate the VectorStore. Since the collection does not already exist in Zep,\n",
"# it will be created and populated with the documents we pass in.\n",
"vs = ZepVectorStore.from_documents(docs,\n",
" collection_name=collection_name,\n",
" config=config,\n",
" api_url=ZEP_API_URL,\n",
" api_key=ZEP_API_KEY\n",
" )"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-08-13T01:07:50.672390Z",
"start_time": "2023-08-13T01:07:48.777799Z"
}
},
"id": "519418421a32e4d"
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Embedding status: 0/402 documents embedded\n",
"Embedding status: 0/402 documents embedded\n",
"Embedding status: 402/402 documents embedded\n"
]
}
],
"source": [
"# wait for the collection embedding to complete\n",
"\n",
"async def wait_for_ready(collection_name: str) -> None:\n",
" from zep_python import ZepClient\n",
" import time\n",
"\n",
" client = ZepClient(ZEP_API_URL, ZEP_API_KEY)\n",
"\n",
" while True:\n",
" c = await client.document.aget_collection(collection_name)\n",
" print(\n",
" \"Embedding status: \"\n",
" f\"{c.document_embedded_count}/{c.document_count} documents embedded\"\n",
" )\n",
" time.sleep(1)\n",
" if c.status == \"ready\":\n",
" break\n",
"\n",
"\n",
"await wait_for_ready(collection_name)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-08-13T01:07:53.807663Z",
"start_time": "2023-08-13T01:07:50.671241Z"
}
},
"id": "201dc57b124cb6d7"
},
{
"cell_type": "markdown",
"source": [
"## Simarility Search Query over the Collection"
],
"metadata": {
"collapsed": false
},
"id": "94ca9dfa7d0ecaa5"
},
{
"cell_type": "code",
"execution_count": 3,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tables necessary to determine the places of the planets are not less\r\n",
"necessary than those for the sun, moon, and stars. Some notion of the\r\n",
"number and complexity of these tables may be formed, when we state that\r\n",
"the positions of the two principal planets, (and these the most\r\n",
"necessary for the navigator,) Jupiter and Saturn, require each not less\r\n",
"than one hundred and sixteen tables. Yet it is not only necessary to\r\n",
"predict the position of these bodies, but it is likewise expedient to -> 0.8998482592744614 \n",
"====\n",
"\n",
"tabulate the motions of the four satellites of Jupiter, to predict the\r\n",
"exact times at which they enter his shadow, and at which their shadows\r\n",
"cross his disc, as well as the times at which they are interposed\r\n",
"between him and the Earth, and he between them and the Earth.\r\n",
"\r\n",
"Among the extensive classes of tables here enumerated, there are several\r\n",
"which are in their nature permanent and unalterable, and would never\r\n",
"require to be recomputed, if they could once be computed with perfect -> 0.8976143854195493 \n",
"====\n",
"\n",
"the scheme of notation thus applied, immediately suggested the\r\n",
"advantages which must attend it as an instrument for expressing the\r\n",
"structure, operation, and circulation of the animal system; and we\r\n",
"entertain no doubt of its adequacy for that purpose. Not only the\r\n",
"mechanical connexion of the solid members of the bodies of men and\r\n",
"animals, but likewise the structure and operation of the softer parts,\r\n",
"including the muscles, integuments, membranes, &c. the nature, motion, -> 0.889982614061763 \n",
"====\n"
]
}
],
"source": [
"# query it\n",
"query = \"what is the structure of our solar system?\"\n",
"docs_scores = await vs.asimilarity_search_with_relevance_scores(query, k=3)\n",
"\n",
"# print results\n",
"for d, s in docs_scores:\n",
" print(d.page_content, \" -> \", s, \"\\n====\\n\")"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-08-13T01:07:54.195988Z",
"start_time": "2023-08-13T01:07:53.808550Z"
}
},
"id": "1998de0a96fe89c3"
},
{
"cell_type": "markdown",
"source": [
"## Search over Collection Re-ranked by MMR"
],
"metadata": {
"collapsed": false
},
"id": "e02b61a9af0b2c80"
},
{
"cell_type": "code",
"execution_count": 4,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tables necessary to determine the places of the planets are not less\r\n",
"necessary than those for the sun, moon, and stars. Some notion of the\r\n",
"number and complexity of these tables may be formed, when we state that\r\n",
"the positions of the two principal planets, (and these the most\r\n",
"necessary for the navigator,) Jupiter and Saturn, require each not less\r\n",
"than one hundred and sixteen tables. Yet it is not only necessary to\r\n",
"predict the position of these bodies, but it is likewise expedient to \n",
"====\n",
"\n",
"the scheme of notation thus applied, immediately suggested the\r\n",
"advantages which must attend it as an instrument for expressing the\r\n",
"structure, operation, and circulation of the animal system; and we\r\n",
"entertain no doubt of its adequacy for that purpose. Not only the\r\n",
"mechanical connexion of the solid members of the bodies of men and\r\n",
"animals, but likewise the structure and operation of the softer parts,\r\n",
"including the muscles, integuments, membranes, &c. the nature, motion, \n",
"====\n",
"\n",
"tabulate the motions of the four satellites of Jupiter, to predict the\r\n",
"exact times at which they enter his shadow, and at which their shadows\r\n",
"cross his disc, as well as the times at which they are interposed\r\n",
"between him and the Earth, and he between them and the Earth.\r\n",
"\r\n",
"Among the extensive classes of tables here enumerated, there are several\r\n",
"which are in their nature permanent and unalterable, and would never\r\n",
"require to be recomputed, if they could once be computed with perfect \n",
"====\n"
]
}
],
"source": [
"query = \"what is the structure of our solar system?\"\n",
"docs = await vs.asearch(query, search_type=\"mmr\", k=3)\n",
"\n",
"for d in docs:\n",
" print(d.page_content, \"\\n====\\n\")"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-08-13T01:07:54.394873Z",
"start_time": "2023-08-13T01:07:54.180901Z"
}
},
"id": "488112da752b1d58"
},
{
"cell_type": "markdown",
"source": [
"# Filter by Metadata\n",
"\n",
"Use a metadata filter to narrow down results. First, load another book: \"Adventures of Sherlock Holmes\""
],
"metadata": {
"collapsed": false
},
"id": "42455e31d4ab0d68"
},
{
"cell_type": "code",
"execution_count": 5,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Embedding status: 402/1692 documents embedded\n",
"Embedding status: 402/1692 documents embedded\n",
"Embedding status: 552/1692 documents embedded\n",
"Embedding status: 702/1692 documents embedded\n",
"Embedding status: 1002/1692 documents embedded\n",
"Embedding status: 1002/1692 documents embedded\n",
"Embedding status: 1152/1692 documents embedded\n",
"Embedding status: 1302/1692 documents embedded\n",
"Embedding status: 1452/1692 documents embedded\n",
"Embedding status: 1602/1692 documents embedded\n",
"Embedding status: 1692/1692 documents embedded\n"
]
}
],
"source": [
"# Let's add more content to the existing Collection\n",
"article_url = \"https://www.gutenberg.org/files/48320/48320-0.txt\"\n",
"loader = WebBaseLoader(article_url)\n",
"documents = loader.load()\n",
"\n",
"# split it into chunks\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n",
"\n",
"await vs.aadd_documents(docs)\n",
"\n",
"await wait_for_ready(collection_name)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-08-13T01:08:06.323569Z",
"start_time": "2023-08-13T01:07:54.381822Z"
}
},
"id": "146c8a96201c0ab9"
},
{
"cell_type": "markdown",
"source": [
"### We see results from both books. Note the `source` metadata"
],
"metadata": {
"collapsed": false
},
"id": "5b225f3ae1e61de8"
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"by that body to Mr Babbage:--'In no department of science, or of the\r\n",
"arts, does this discovery promise to be so eminently useful as in that\r\n",
"of astronomy, and its kindred sciences, with the various arts dependent\r\n",
"on them. In none are computations more operose than those which\r\n",
"astronomy in particular requires;--in none are preparatory facilities\r\n",
"more needful;--in none is error more detrimental. The practical\r\n",
"astronomer is interrupted in his pursuit, and diverted from his task of -> {'source': 'https://www.gutenberg.org/cache/epub/71292/pg71292.txt'} \n",
"====\n",
"\n",
"possess all knowledge which is likely to be useful to him in his work,\r\n",
"and this I have endeavored in my case to do. If I remember rightly, you\r\n",
"on one occasion, in the early days of our friendship, defined my limits\r\n",
"in a very precise fashion.”\r\n",
"\r\n",
"“Yes,” I answered, laughing. “It was a singular document. Philosophy,\r\n",
"astronomy, and politics were marked at zero, I remember. Botany\r\n",
"variable, geology profound as regards the mud-stains from any region -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n",
"====\n",
"\n",
"in all its relations; but above all, with Astronomy and Navigation. So\r\n",
"important have they been considered, that in many instances large sums\r\n",
"have been appropriated by the most enlightened nations in the production\r\n",
"of them; and yet so numerous and insurmountable have been the\r\n",
"difficulties attending the attainment of this end, that after all, even\r\n",
"navigators, putting aside every other department of art and science,\r\n",
"have, until very recently, been scantily and imperfectly supplied with -> {'source': 'https://www.gutenberg.org/cache/epub/71292/pg71292.txt'} \n",
"====\n"
]
}
],
"source": [
"query = \"Was he interested in astronomy?\"\n",
"docs = await vs.asearch(query, search_type=\"similarity\", k=3)\n",
"\n",
"for d in docs:\n",
" print(d.page_content, \" -> \", d.metadata, \"\\n====\\n\")"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-08-13T01:08:06.504769Z",
"start_time": "2023-08-13T01:08:06.325435Z"
}
},
"id": "53700a9cd817cde4"
},
{
"cell_type": "markdown",
"source": [
"### Let's try again using a filter for only the Sherlock Holmes document."
],
"metadata": {
"collapsed": false
},
"id": "7b81d7cae351a1ec"
},
{
"cell_type": "code",
"execution_count": 7,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"possess all knowledge which is likely to be useful to him in his work,\r\n",
"and this I have endeavored in my case to do. If I remember rightly, you\r\n",
"on one occasion, in the early days of our friendship, defined my limits\r\n",
"in a very precise fashion.”\r\n",
"\r\n",
"“Yes,” I answered, laughing. “It was a singular document. Philosophy,\r\n",
"astronomy, and politics were marked at zero, I remember. Botany\r\n",
"variable, geology profound as regards the mud-stains from any region -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n",
"====\n",
"\n",
"the light shining upon his strong-set aquiline features. So he sat as I\r\n",
"dropped off to sleep, and so he sat when a sudden ejaculation caused me\r\n",
"to wake up, and I found the summer sun shining into the apartment. The\r\n",
"pipe was still between his lips, the smoke still curled upward, and the\r\n",
"room was full of a dense tobacco haze, but nothing remained of the heap\r\n",
"of shag which I had seen upon the previous night.\r\n",
"\r\n",
"“Awake, Watson?” he asked.\r\n",
"\r\n",
"“Yes.”\r\n",
"\r\n",
"“Game for a morning drive?” -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n",
"====\n",
"\n",
"“I glanced at the books upon the table, and in spite of my ignorance\r\n",
"of German I could see that two of them were treatises on science, the\r\n",
"others being volumes of poetry. Then I walked across to the window,\r\n",
"hoping that I might catch some glimpse of the country-side, but an oak\r\n",
"shutter, heavily barred, was folded across it. It was a wonderfully\r\n",
"silent house. There was an old clock ticking loudly somewhere in the\r\n",
"passage, but otherwise everything was deadly still. A vague feeling of -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n",
"====\n"
]
}
],
"source": [
"filter = {\n",
" \"where\": {\"jsonpath\": \"$[*] ? (@.source == 'https://www.gutenberg.org/files/48320/48320-0.txt')\"},\n",
"}\n",
"\n",
"docs = await vs.asearch(query, search_type=\"similarity\", metadata=filter, k=3)\n",
"\n",
"for d in docs:\n",
" print(d.page_content, \" -> \", d.metadata, \"\\n====\\n\")"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-08-13T01:08:06.672836Z",
"start_time": "2023-08-13T01:08:06.505944Z"
}
},
"id": "8f1bdcba03979d22"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -15,7 +15,7 @@
"- destination_chains: chains that the router chain can route to\n",
"\n",
"\n",
"In this notebook we will focus on the different types of routing chains. We will show these routing chains used in a `MultiPromptChain` to create a question-answering chain that selects the prompt which is most relevant for a given question, and then answers the question using that prompt."
"In this notebook, we will focus on the different types of routing chains. We will show these routing chains used in a `MultiPromptChain` to create a question-answering chain that selects the prompt which is most relevant for a given question, and then answers the question using that prompt."
]
},
{
@ -231,7 +231,7 @@
}
],
"source": [
"print(chain.run(\"What is the name of the type of cloud that rins\"))"
"print(chain.run(\"What is the name of the type of cloud that rains?\"))"
]
},
{

View File

@ -83,10 +83,8 @@
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for extracting information in structured formats.\n",
"Human: Use the given format to extract information from the following input:\n",
"Human: Sally is 13\n",
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
" {'function_call': {'name': '_OutputFormatter', 'arguments': '{\\n \"output\": {\\n \"name\": \"Sally\",\\n \"age\": 13,\\n \"fav_food\": \"Unknown\"\\n }\\n}'}}\n",
"Human: Use the given format to extract information from the following input: Sally is 13\n",
"Human: Tip: Make sure to answer in the correct format\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
@ -105,18 +103,13 @@
"source": [
"# If we pass in a model explicitly, we need to make sure it supports the OpenAI function-calling API.\n",
"llm = ChatOpenAI(model=\"gpt-4\", temperature=0)\n",
"\n",
"prompt_msgs = [\n",
" SystemMessage(\n",
" content=\"You are a world class algorithm for extracting information in structured formats.\"\n",
" ),\n",
" HumanMessage(\n",
" content=\"Use the given format to extract information from the following input:\"\n",
" ),\n",
" HumanMessagePromptTemplate.from_template(\"{input}\"),\n",
" HumanMessage(content=\"Tips: Make sure to answer in the correct format\"),\n",
"]\n",
"prompt = ChatPromptTemplate(messages=prompt_msgs)\n",
"prompt = ChatPromptTemplate.from_messages(\n",
" [\n",
" (\"system\", \"You are a world class algorithm for extracting information in structured formats.\"),\n",
" (\"human\", \"Use the given format to extract information from the following input: {input}\"),\n",
" (\"human\", \"Tip: Make sure to answer in the correct format\"),\n",
" ]\n",
")\n",
"\n",
"chain = create_structured_output_chain(Person, llm, prompt, verbose=True)\n",
"chain.run(\"Sally is 13\")"
@ -132,7 +125,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "4d8ea815",
"metadata": {},
"outputs": [
@ -145,10 +138,8 @@
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for extracting information in structured formats.\n",
"Human: Use the given format to extract information from the following input:\n",
"Human: Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally, so she's 23.\n",
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
" {'function_call': {'name': '_OutputFormatter', 'arguments': '{\\n \"output\": {\\n \"people\": [\\n {\\n \"name\": \"Sally\",\\n \"age\": 13,\\n \"fav_food\": \"\"\\n },\\n {\\n \"name\": \"Joey\",\\n \"age\": 12,\\n \"fav_food\": \"spinach\"\\n },\\n {\\n \"name\": \"Caroline\",\\n \"age\": 23,\\n \"fav_food\": \"\"\\n }\\n ]\\n }\\n}'}}\n",
"Human: Use the given format to extract information from the following input: Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally.\n",
"Human: Tip: Make sure to answer in the correct format\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
@ -159,7 +150,7 @@
"People(people=[Person(name='Sally', age=13, fav_food=''), Person(name='Joey', age=12, fav_food='spinach'), Person(name='Caroline', age=23, fav_food='')])"
]
},
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@ -176,7 +167,7 @@
"\n",
"chain = create_structured_output_chain(People, llm, prompt, verbose=True)\n",
"chain.run(\n",
" \"Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally, so she's 23.\"\n",
" \"Sally is 13, Joey just turned 12 and loves spinach. Caroline is 10 years older than Sally.\"\n",
")"
]
},
@ -229,10 +220,8 @@
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for extracting information in structured formats.\n",
"Human: Use the given format to extract information from the following input:\n",
"Human: Sally is 13\n",
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
" {'function_call': {'name': 'output_formatter', 'arguments': '{\\n \"name\": \"Sally\",\\n \"age\": 13\\n}'}}\n",
"Human: Use the given format to extract information from the following input: Sally is 13\n",
"Human: Tip: Make sure to answer in the correct format\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
@ -279,7 +268,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "17f52508",
"metadata": {},
"outputs": [],
@ -302,7 +291,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"id": "a4658ad8",
"metadata": {},
"outputs": [
@ -314,11 +303,9 @@
"\n",
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities\n",
"Human: Make calls to the relevant function to record the entities in the following input:\n",
"Human: Harry was a chubby brown beagle who loved chicken\n",
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
" {'function_call': {'name': 'RecordDog', 'arguments': '{\\n \"name\": \"Harry\",\\n \"color\": \"brown\",\\n \"fav_food\": \"chicken\"\\n}'}}\n",
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities.\n",
"Human: Make calls to the relevant function to record the entities in the following input: Harry was a chubby brown beagle who loved chicken\n",
"Human: Tip: Make sure to answer in the correct format\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
@ -329,21 +316,19 @@
"RecordDog(name='Harry', color='brown', fav_food='chicken')"
]
},
"execution_count": 9,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prompt_msgs = [\n",
" SystemMessage(content=\"You are a world class algorithm for recording entities\"),\n",
" HumanMessage(\n",
" content=\"Make calls to the relevant function to record the entities in the following input:\"\n",
" ),\n",
" HumanMessagePromptTemplate.from_template(\"{input}\"),\n",
" HumanMessage(content=\"Tips: Make sure to answer in the correct format\"),\n",
"]\n",
"prompt = ChatPromptTemplate(messages=prompt_msgs)\n",
"prompt = ChatPromptTemplate.from_messages(\n",
" [\n",
" (\"system\", \"You are a world class algorithm for recording entities.\"),\n",
" (\"human\", \"Make calls to the relevant function to record the entities in the following input: {input}\"),\n",
" (\"human\", \"Tip: Make sure to answer in the correct format\"),\n",
" ]\n",
")\n",
"\n",
"chain = create_openai_fn_chain([RecordPerson, RecordDog], llm, prompt, verbose=True)\n",
"chain.run(\"Harry was a chubby brown beagle who loved chicken\")"
@ -362,7 +347,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"id": "95ac5825",
"metadata": {},
"outputs": [
@ -374,11 +359,9 @@
"\n",
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities\n",
"Human: Make calls to the relevant function to record the entities in the following input:\n",
"Human: The most important thing to remember about Tommy, my 12 year old, is that he'll do anything for apple pie.\n",
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
" {'function_call': {'name': 'record_person', 'arguments': '{\\n \"name\": \"Tommy\",\\n \"age\": 12,\\n \"fav_food\": {\\n \"food\": \"apple pie\"\\n }\\n}'}}\n",
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities.\n",
"Human: Make calls to the relevant function to record the entities in the following input: The most important thing to remember about Tommy, my 12 year old, is that he'll do anything for apple pie.\n",
"Human: Tip: Make sure to answer in the correct format\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
@ -389,7 +372,7 @@
"{'name': 'Tommy', 'age': 12, 'fav_food': {'food': 'apple pie'}}"
]
},
"execution_count": 10,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@ -434,7 +417,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"id": "8b0d11de",
"metadata": {},
"outputs": [
@ -446,11 +429,9 @@
"\n",
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities\n",
"Human: Make calls to the relevant function to record the entities in the following input:\n",
"Human: I can't find my dog Henry anywhere, he's a small brown beagle. Could you send a message about him?\n",
"Human: Tips: Make sure to answer in the correct format\u001b[0m\n",
" {'function_call': {'name': 'record_dog', 'arguments': '{\\n \"name\": \"Henry\",\\n \"color\": \"brown\",\\n \"fav_food\": {\\n \"food\": null\\n }\\n}'}}\n",
"\u001b[32;1m\u001b[1;3mSystem: You are a world class algorithm for recording entities.\n",
"Human: Make calls to the relevant function to record the entities in the following input: I can't find my dog Henry anywhere, he's a small brown beagle. Could you send a message about him?\n",
"Human: Tip: Make sure to answer in the correct format\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
@ -462,7 +443,7 @@
" 'arguments': {'name': 'Henry', 'color': 'brown', 'fav_food': {'food': None}}}"
]
},
"execution_count": 11,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@ -502,9 +483,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"display_name": "poetry-venv",
"language": "python",
"name": "venv"
"name": "poetry-venv"
},
"language_info": {
"codemirror_mode": {
@ -516,7 +497,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
"version": "3.9.1"
}
},
"nbformat": 4,

View File

@ -0,0 +1,594 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "bf4061ce",
"metadata": {},
"source": [
"# Caching Embeddings\n",
"\n",
"Embeddings can be stored or temporarily cached to avoid needing to recompute them.\n",
"\n",
"Caching embeddings can be done using a `CacheBackedEmbeddings`.\n",
"\n",
"The cache backed embedder is a wrapper around an embedder that caches\n",
"embeddings in a key-value store. \n",
"\n",
"The text is hashed and the hash is used as the key in the cache.\n",
"\n",
"\n",
"The main supported way to initialized a `CacheBackedEmbeddings` is `from_bytes_store`. This takes in the following parameters:\n",
"\n",
"- underlying_embedder: The embedder to use for embedding.\n",
"- document_embedding_cache: The cache to use for storing document embeddings.\n",
"- namespace: (optional, defaults to `\"\"`) The namespace to use for document cache. This namespace is used to avoid collisions with other caches. For example, set it to the name of the embedding model used.\n",
"\n",
"**Attention**: Be sure to set the `namespace` parameter to avoid collisions of the same text embedded using different embeddings models."
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "a463c3c2-749b-40d1-a433-84f68a1cd1c7",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.storage import InMemoryStore, LocalFileStore, RedisStore\n",
"from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings"
]
},
{
"cell_type": "markdown",
"id": "9ddf07dd-3e72-41de-99d4-78e9521e272f",
"metadata": {},
"source": [
"## Using with a vectorstore\n",
"\n",
"First, let's see an example that uses the local file system for storing embeddings and uses FAISS vectorstore for retrieval."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9e4314d8-88ef-4f52-81ae-0be771168bb6",
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import TextLoader\n",
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import FAISS"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "3e751f26-9b5b-4c10-843a-d784b5ea8538",
"metadata": {},
"outputs": [],
"source": [
"underlying_embeddings = OpenAIEmbeddings()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "30743664-38f5-425d-8216-772b64e7f348",
"metadata": {},
"outputs": [],
"source": [
"fs = LocalFileStore(\"./cache/\")\n",
"\n",
"cached_embedder = CacheBackedEmbeddings.from_bytes_store(\n",
" underlying_embeddings, fs, namespace=underlying_embeddings.model\n",
")"
]
},
{
"cell_type": "markdown",
"id": "f8cdf33c-321d-4d2c-b76b-d6f5f8b42a92",
"metadata": {},
"source": [
"The cache is empty prior to embedding"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "f9ad627f-ced2-4277-b336-2434f22f2c8a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(fs.yield_keys())"
]
},
{
"cell_type": "markdown",
"id": "a4effe04-b40f-42f8-a449-72fe6991cf20",
"metadata": {},
"source": [
"Load the document, split it into chunks, embed each chunk and load it into the vector store."
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "cf958ac2-e60e-4668-b32c-8bb2d78b3c61",
"metadata": {},
"outputs": [],
"source": [
"raw_documents = TextLoader(\"../state_of_the_union.txt\").load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"documents = text_splitter.split_documents(raw_documents)"
]
},
{
"cell_type": "markdown",
"id": "f526444b-93f8-423f-b6d1-dab539450921",
"metadata": {},
"source": [
"create the vectorstore"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "3a1d7bb8-3b72-4bb5-9013-cf7729caca61",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 608 ms, sys: 58.9 ms, total: 667 ms\n",
"Wall time: 1.3 s\n"
]
}
],
"source": [
"%%time\n",
"db = FAISS.from_documents(documents, cached_embedder)"
]
},
{
"cell_type": "markdown",
"id": "64fc53f5-d559-467f-bf62-5daef32ffbc0",
"metadata": {},
"source": [
"If we try to create the vectostore again, it'll be much faster since it does not need to re-compute any embeddings."
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "714cb2e2-77ba-41a8-bb83-84e75342af2d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 33.6 ms, sys: 3.96 ms, total: 37.6 ms\n",
"Wall time: 36.8 ms\n"
]
}
],
"source": [
"%%time\n",
"db2 = FAISS.from_documents(documents, cached_embedder)"
]
},
{
"cell_type": "markdown",
"id": "1acc76b9-9c70-4160-b593-5f932c75e2b6",
"metadata": {},
"source": [
"And here are some of the embeddings that got created:"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "f2ca32dd-3712-4093-942b-4122f3dc8a8e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['text-embedding-ada-002614d7cf6-46f1-52fa-9d3a-740c39e7a20e',\n",
" 'text-embedding-ada-0020fc1ede2-407a-5e14-8f8f-5642214263f5',\n",
" 'text-embedding-ada-002e4ad20ef-dfaa-5916-9459-f90c6d8e8159',\n",
" 'text-embedding-ada-002a5ef11e4-0474-5725-8d80-81c91943b37f',\n",
" 'text-embedding-ada-00281426526-23fe-58be-9e84-6c7c72c8ca9a']"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(fs.yield_keys())[:5]"
]
},
{
"cell_type": "markdown",
"id": "564c9801-29f0-4452-aeac-527382e2c0e8",
"metadata": {},
"source": [
"## In Memory\n",
"\n",
"This section shows how to set up an in memory cache for embeddings. This type of cache is primarily \n",
"useful for unit tests or prototyping. Do **not** use this cache if you need to actually store the embeddings."
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "13bd1c5b-b7ba-4394-957c-7d5b5a841972",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"store = InMemoryStore()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "9d99885f-99e1-498c-904d-6db539ac9466",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"underlying_embeddings = OpenAIEmbeddings()\n",
"embedder = CacheBackedEmbeddings.from_bytes_store(\n",
" underlying_embeddings, store, namespace=underlying_embeddings.model\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "682eb5d4-0b7a-4dac-b8fb-3de4ca6e421c",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 10.9 ms, sys: 916 µs, total: 11.8 ms\n",
"Wall time: 159 ms\n"
]
}
],
"source": [
"%%time\n",
"embeddings = embedder.embed_documents([\"hello\", \"goodbye\"])"
]
},
{
"cell_type": "markdown",
"id": "95233026-147f-49d1-bd87-e1e8b88ebdbc",
"metadata": {},
"source": [
"The second time we try to embed the embedding time is only 2 ms because the embeddings are looked up in the cache."
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "f819c3ff-a212-4d06-a5f7-5eb1435c1feb",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.67 ms, sys: 342 µs, total: 2.01 ms\n",
"Wall time: 2.01 ms\n"
]
}
],
"source": [
"%%time\n",
"embeddings_from_cache = embedder.embed_documents([\"hello\", \"goodbye\"])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "ec38fb72-90a9-4687-a483-c62c87d1f4dd",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"embeddings == embeddings_from_cache"
]
},
{
"cell_type": "markdown",
"id": "f6cbe100-8587-4830-b207-fb8b524a9854",
"metadata": {},
"source": [
"## File system\n",
"\n",
"This section covers how to use a file system store."
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "a0070271-0809-4528-97e0-2a88216846f3",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"fs = LocalFileStore(\"./test_cache/\")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "0b20e9fe-f57f-4d7c-9f81-105c5f8726f4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"embedder2 = CacheBackedEmbeddings.from_bytes_store(\n",
" underlying_embeddings, fs, namespace=underlying_embeddings.model\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "630515fd-bf5c-4d9c-a404-9705308f3a2c",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 6.89 ms, sys: 4.89 ms, total: 11.8 ms\n",
"Wall time: 184 ms\n"
]
}
],
"source": [
"%%time\n",
"embeddings = embedder2.embed_documents([\"hello\", \"goodbye\"])"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "30e6bb87-42c9-4d08-88ac-0d22c9c449a1",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 0 ns, sys: 3.24 ms, total: 3.24 ms\n",
"Wall time: 2.84 ms\n"
]
}
],
"source": [
"%%time\n",
"embeddings = embedder2.embed_documents([\"hello\", \"goodbye\"])"
]
},
{
"cell_type": "markdown",
"id": "12ed5a45-8352-4e0f-8583-5537397f53c0",
"metadata": {},
"source": [
"Here are the embeddings that have been persisted to the directory `./test_cache`. \n",
"\n",
"Notice that the embedder takes a namespace parameter."
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "658e2914-05e9-44a3-a8fe-3fe17ca84039",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['text-embedding-ada-002e885db5b-c0bd-5fbc-88b1-4d1da6020aa5',\n",
" 'text-embedding-ada-0026ba52e44-59c9-5cc9-a084-284061b13c80']"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(fs.yield_keys())"
]
},
{
"cell_type": "markdown",
"id": "cd5f5a96-6ffa-429d-aa82-00b3f6532871",
"metadata": {},
"source": [
"## Redis Store\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "4879c134-141f-48a0-acfe-7d6f30253af0",
"metadata": {},
"outputs": [],
"source": [
"from langchain.storage import RedisStore"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "8b2bb9a0-6549-4487-8532-29ab4ab7336f",
"metadata": {},
"outputs": [],
"source": [
"# For cache isolation can use a separate DB\n",
"# Or additional namepace\n",
"store = RedisStore(redis_url=\"redis://localhost:6379\", client_kwargs={'db': 2}, namespace='embedding_caches')\n",
"\n",
"underlying_embeddings = OpenAIEmbeddings()\n",
"embedder = CacheBackedEmbeddings.from_bytes_store(\n",
" underlying_embeddings, store, namespace=underlying_embeddings.model\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "eca3cb99-2bb3-49d5-81f9-1dee03da4b8c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 3.99 ms, sys: 0 ns, total: 3.99 ms\n",
"Wall time: 3.5 ms\n"
]
}
],
"source": [
"%%time\n",
"embeddings = embedder.embed_documents([\"hello\", \"goodbye\"])"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "317ba5d8-89f9-462c-b807-ad4ef26e518b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 2.47 ms, sys: 767 µs, total: 3.24 ms\n",
"Wall time: 2.75 ms\n"
]
}
],
"source": [
"%%time\n",
"embeddings = embedder.embed_documents([\"hello\", \"goodbye\"])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "8a540317-5142-4491-9062-a097932b56e3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['text-embedding-ada-002e885db5b-c0bd-5fbc-88b1-4d1da6020aa5',\n",
" 'text-embedding-ada-0026ba52e44-59c9-5cc9-a084-284061b13c80']"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(store.yield_keys())"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "cd9b0d4a-f816-4dce-9dde-cde1ad9a65fb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[b'embedding_caches/text-embedding-ada-002e885db5b-c0bd-5fbc-88b1-4d1da6020aa5',\n",
" b'embedding_caches/text-embedding-ada-0026ba52e44-59c9-5cc9-a084-284061b13c80']"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(store.client.scan_iter())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -6,7 +6,7 @@
"id": "13afcae7",
"metadata": {},
"source": [
"# DeepLake self-querying \n",
"# Deep Lake self-querying \n",
"\n",
">[DeepLake](https://www.activeloop.ai) is a multimodal database for building AI applications.\n",
"\n",
@ -19,7 +19,7 @@
"id": "68e75fb9",
"metadata": {},
"source": [
"## Creating a DeepLake vectorstore\n",
"## Creating a Deep Lake vectorstore\n",
"First we'll want to create a DeepLake VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n",
"\n",
"NOTE: The self-query retriever requires you to have `lark` installed (`pip install lark`). We also need the `deeplake` package."
@ -27,26 +27,23 @@
},
{
"cell_type": "code",
"execution_count": 1,
"id": "63a8af5b",
"metadata": {
"tags": []
},
"execution_count": null,
"id": "a798fe66",
"metadata": {},
"outputs": [],
"source": [
"#!pip install lark"
"# !pip install lark"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "22431060-52c4-48a7-a97b-9f542b8b0928",
"metadata": {
"tags": []
},
"id": "43821a8e",
"metadata": {},
"outputs": [],
"source": [
"#!pip install 'deeplake[enterprise]'"
"# in case if some queries fail consider installing libdeeplake manually\n",
"# !pip install libdeeplake"
]
},
{
@ -60,7 +57,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"id": "dd01b61b-7d32-4a55-85d6-b2d2d4f18840",
"metadata": {
"tags": []
@ -70,12 +67,13 @@
"import os\n",
"import getpass\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
"os.environ[\"ACTIVELOOP_TOKEN\"] = getpass.getpass(\"Activeloop token:\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "cb4a5787",
"metadata": {
"tags": []
@ -108,7 +106,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"-"
"/"
]
},
{
@ -166,9 +164,9 @@
" },\n",
" ),\n",
"]\n",
"username_or_org = \"<USER_NAME_OR_ORG>\"\n",
"username_or_org = \"<USERNAME_OR_ORG>\"\n",
"vectorstore = DeepLake.from_documents(\n",
" docs, embeddings, dataset_path=f\"hub://{username_or_org}/self_queery\"\n",
" docs, embeddings, dataset_path=f\"hub://{username_or_org}/self_queery\", overwrite=True,\n",
")"
]
},
@ -185,10 +183,8 @@
{
"cell_type": "code",
"execution_count": 7,
"id": "86e34dbf",
"metadata": {
"tags": []
},
"id": "c90b0b40",
"metadata": {},
"outputs": [],
"source": [
"from langchain.llms import OpenAI\n",
@ -242,7 +238,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/adilkhansarsen/Documents/work/LangChain/langchain/langchain/chains/llm.py:275: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
"/home/ubuntu/langchain_activeloop/langchain/libs/langchain/langchain/chains/llm.py:279: UserWarning: The predict_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n",
" warnings.warn(\n"
]
},
@ -299,7 +295,9 @@
],
"source": [
"# This example only specifies a filter\n",
"retriever.get_relevant_documents(\"I want to watch a movie rated higher than 8.5\")"
"retriever.get_relevant_documents(\"I want to watch a movie rated higher than 8.5\")\n",
"\n",
"# in case if this example errored out, consider installing libdeeplake manually: `pip install libdeeplake`, and then restart notebook."
]
},
{
@ -456,14 +454,6 @@
"# This example only specifies a relevant query\n",
"retriever.get_relevant_documents(\"what are two movies about dinosaurs\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c93f0847-cbd9-4c25-aed1-91588e856b5c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@ -482,7 +472,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
"version": "3.10.12"
}
},
"nbformat": 4,

View File

@ -0,0 +1,362 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "13afcae7",
"metadata": {},
"source": [
"# Elasticsearch self-querying "
]
},
{
"cell_type": "markdown",
"id": "68e75fb9",
"metadata": {},
"source": [
"## Creating a Elasticsearch vectorstore\n",
"First we'll want to create a Elasticsearch VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n",
"\n",
"NOTE: The self-query retriever requires you to have `lark` installed (`pip install lark`). We also need the `elasticsearch` package."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "63a8af5b",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"#!pip install lark elasticsearch"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cb4a5787",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.schema import Document\n",
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.vectorstores import ElasticsearchStore\n",
"import os\n",
"import getpass\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
"\n",
"embeddings = OpenAIEmbeddings()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "bcbe04d9",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"docs = [\n",
" Document(\n",
" page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n",
" metadata={\"year\": 1993, \"rating\": 7.7, \"genre\": \"science fiction\"},\n",
" ),\n",
" Document(\n",
" page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n",
" metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"rating\": 8.2},\n",
" ),\n",
" Document(\n",
" page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n",
" metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6},\n",
" ),\n",
" Document(\n",
" page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n",
" metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3},\n",
" ),\n",
" Document(\n",
" page_content=\"Toys come alive and have a blast doing so\",\n",
" metadata={\"year\": 1995, \"genre\": \"animated\"},\n",
" ),\n",
" Document(\n",
" page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n",
" metadata={\n",
" \"year\": 1979,\n",
" \"rating\": 9.9,\n",
" \"director\": \"Andrei Tarkovsky\",\n",
" \"genre\": \"science fiction\",\n",
" \"rating\": 9.9,\n",
" },\n",
" ),\n",
"]\n",
"vectorstore = ElasticsearchStore.from_documents(\n",
" docs, embeddings, index_name=\"elasticsearch-self-query-demo\", es_url=\"http://localhost:9200\"\n",
")"
]
},
{
"cell_type": "markdown",
"id": "5ecaab6d",
"metadata": {},
"source": [
"## Creating our self-querying retriever\n",
"Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents."
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "86e34dbf",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from langchain.llms import OpenAI\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n",
"\n",
"metadata_field_info = [\n",
" AttributeInfo(\n",
" name=\"genre\",\n",
" description=\"The genre of the movie\",\n",
" type=\"string or list[string]\",\n",
" ),\n",
" AttributeInfo(\n",
" name=\"year\",\n",
" description=\"The year the movie was released\",\n",
" type=\"integer\",\n",
" ),\n",
" AttributeInfo(\n",
" name=\"director\",\n",
" description=\"The name of the movie director\",\n",
" type=\"string\",\n",
" ),\n",
" AttributeInfo(\n",
" name=\"rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n",
" ),\n",
"]\n",
"document_content_description = \"Brief summary of a movie\"\n",
"llm = OpenAI(temperature=0)\n",
"retriever = SelfQueryRetriever.from_llm(\n",
" llm, vectorstore, document_content_description, metadata_field_info, verbose=True\n",
")"
]
},
{
"cell_type": "markdown",
"id": "ea9df8d4",
"metadata": {},
"source": [
"## Testing it out\n",
"And now we can try actually using our retriever!"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "38a126e9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"query='dinosaur' filter=None limit=None\n"
]
},
{
"data": {
"text/plain": [
"[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction'}),\n",
" Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'}),\n",
" Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction'}),\n",
" Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6})]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# This example only specifies a relevant query\n",
"retriever.get_relevant_documents(\"What are some movies about dinosaurs\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "b19d4da0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"query='women' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='director', value='Greta Gerwig') limit=None\n"
]
},
{
"data": {
"text/plain": [
"[Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'year': 2019, 'director': 'Greta Gerwig', 'rating': 8.3})]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# This example specifies a query and a filter\n",
"retriever.get_relevant_documents(\"Has Greta Gerwig directed any movies about women\")"
]
},
{
"cell_type": "markdown",
"id": "39bd1de1-b9fe-4a98-89da-58d8a7a6ae51",
"metadata": {},
"source": [
"## Filter k\n",
"\n",
"We can also use the self query retriever to specify `k`: the number of documents to fetch.\n",
"\n",
"We can do this by passing `enable_limit=True` to the constructor."
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "bff36b88-b506-4877-9c63-e5a1a8d78e64",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"retriever = SelfQueryRetriever.from_llm(\n",
" llm,\n",
" vectorstore,\n",
" document_content_description,\n",
" metadata_field_info,\n",
" enable_limit=True,\n",
" verbose=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "2758d229-4f97-499c-819f-888acaf8ee10",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"query='dinosaur' filter=None limit=2\n"
]
},
{
"data": {
"text/plain": [
"[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction'}),\n",
" Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'})]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# This example only specifies a relevant query\n",
"retriever.get_relevant_documents(\"what are two movies about dinosaurs\")"
]
},
{
"cell_type": "markdown",
"id": "61a10294",
"metadata": {},
"source": [
"## Complex queries in Action!\n",
"We've tried out some simple queries, but what about more complex ones? Let's try out a few more complex queries that utilize the full power of Elasticsearch."
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "e460da93",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"query='animated toys' filter=Operation(operator=<Operator.AND: 'and'>, arguments=[Operation(operator=<Operator.OR: 'or'>, arguments=[Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='animated'), Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='genre', value='comedy')]), Comparison(comparator=<Comparator.GTE: 'gte'>, attribute='year', value=1990)]) limit=None\n"
]
},
{
"data": {
"text/plain": [
"[Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'})]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retriever.get_relevant_documents(\"what animated or comedy movies have been released in the last 30 years about animated toys?\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "0851fc42",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ObjectApiResponse({'acknowledged': True})"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vectorstore.client.indices.delete(index=\"elasticsearch-self-query-demo\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -1,3 +1,7 @@
---
sidebar_position: 0
---
# Agent simulations
Agent simulations involve interacting one of more agents with each other.

View File

@ -1,3 +1,7 @@
---
sidebar_position: 0
---
# Agents
Agents can be used for a variety of tasks.

View File

@ -123,7 +123,7 @@
"wikidata_user_agent_header = (\n",
" None\n",
" if not config.has_section(\"WIKIDATA\")\n",
" else config[\"WIKIDATA\"][\"WIKIDAtA_USER_AGENT_HEADER\"]\n",
" else config[\"WIKIDATA\"][\"WIKIDATA_USER_AGENT_HEADER\"]\n",
")"
]
},

View File

@ -0,0 +1,423 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "a15e6a18",
"metadata": {},
"source": [
"# Interacting with APIs\n",
"\n",
"[![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/use_cases/apis.ipynb)\n",
"\n",
"## Use case \n",
"\n",
"Suppose you want an LLM to interact with external APIs.\n",
"\n",
"This can be very useful for retrieving context for the LLM to utilize.\n",
"\n",
"And, more generally, it allows us to interact with APIs using natural langugage! \n",
" \n",
"\n",
"## Overview\n",
"\n",
"There are two primary ways to interface LLMs with external APIs:\n",
" \n",
"* `Functions`: For example, [OpenAI functions](https://platform.openai.com/docs/guides/gpt/function-calling) is one popular means of doing this.\n",
"* `LLM-generated interface`: Use an LLM with access to API documentation to create an interface.\n",
"\n",
"![Image description](/img/api_use_case.png)"
]
},
{
"cell_type": "markdown",
"id": "abbd82f0",
"metadata": {},
"source": [
"## Quickstart \n",
"\n",
"Many APIs are already compatible with OpenAI function calling.\n",
"\n",
"For example, [Klarna](https://www.klarna.com/international/press/klarna-brings-smoooth-shopping-to-chatgpt/) has a YAML file that describes its API and allows OpenAI to interact with it:\n",
"\n",
"```\n",
"https://www.klarna.com/us/shopping/public/openai/v0/api-docs/\n",
"```\n",
"\n",
"Other options include:\n",
"\n",
"* [Speak](https://api.speak.com/openapi.yaml) for translation\n",
"* [XKCD](https://gist.githubusercontent.com/roaldnefs/053e505b2b7a807290908fe9aa3e1f00/raw/0a212622ebfef501163f91e23803552411ed00e4/openapi.yaml) for comics\n",
"\n",
"We can supply the specification to `get_openapi_chain` directly in order to query the API with OpenAI functions:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5a218fcc",
"metadata": {},
"outputs": [],
"source": [
"pip install langchain openai \n",
"\n",
"# Set env var OPENAI_API_KEY or load from a .env file:\n",
"# import dotenv\n",
"# dotenv.load_env()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "30b780e3",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n"
]
},
{
"data": {
"text/plain": [
"{'query': \"What are some options for a men's large blue button down shirt\",\n",
" 'response': {'products': [{'name': 'Cubavera Four Pocket Guayabera Shirt',\n",
" 'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3202055522/Clothing/Cubavera-Four-Pocket-Guayabera-Shirt/?utm_source=openai&ref-site=openai_plugin',\n",
" 'price': '$13.50',\n",
" 'attributes': ['Material:Polyester,Cotton',\n",
" 'Target Group:Man',\n",
" 'Color:Red,White,Blue,Black',\n",
" 'Properties:Pockets',\n",
" 'Pattern:Solid Color',\n",
" 'Size (Small-Large):S,XL,L,M,XXL']},\n",
" {'name': 'Polo Ralph Lauren Plaid Short Sleeve Button-down Oxford Shirt',\n",
" 'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3207163438/Clothing/Polo-Ralph-Lauren-Plaid-Short-Sleeve-Button-down-Oxford-Shirt/?utm_source=openai&ref-site=openai_plugin',\n",
" 'price': '$52.20',\n",
" 'attributes': ['Material:Cotton',\n",
" 'Target Group:Man',\n",
" 'Color:Red,Blue,Multicolor',\n",
" 'Size (Small-Large):S,XL,L,M,XXL']},\n",
" {'name': 'Brixton Bowery Flannel Shirt',\n",
" 'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3202331096/Clothing/Brixton-Bowery-Flannel-Shirt/?utm_source=openai&ref-site=openai_plugin',\n",
" 'price': '$27.48',\n",
" 'attributes': ['Material:Cotton',\n",
" 'Target Group:Man',\n",
" 'Color:Gray,Blue,Black,Orange',\n",
" 'Properties:Pockets',\n",
" 'Pattern:Checkered',\n",
" 'Size (Small-Large):XL,3XL,4XL,5XL,L,M,XXL']},\n",
" {'name': 'Vineyard Vines Gingham On-The-Go brrr Classic Fit Shirt Crystal',\n",
" 'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3201938510/Clothing/Vineyard-Vines-Gingham-On-The-Go-brrr-Classic-Fit-Shirt-Crystal/?utm_source=openai&ref-site=openai_plugin',\n",
" 'price': '$80.64',\n",
" 'attributes': ['Material:Cotton',\n",
" 'Target Group:Man',\n",
" 'Color:Blue',\n",
" 'Size (Small-Large):XL,XS,L,M']},\n",
" {'name': \"Carhartt Men's Loose Fit Midweight Short Sleeve Plaid Shirt\",\n",
" 'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3201826024/Clothing/Carhartt-Men-s-Loose-Fit-Midweight-Short-Sleeve-Plaid-Shirt/?utm_source=openai&ref-site=openai_plugin',\n",
" 'price': '$17.99',\n",
" 'attributes': ['Material:Cotton',\n",
" 'Target Group:Man',\n",
" 'Color:Red,Brown,Blue,Green',\n",
" 'Properties:Pockets',\n",
" 'Pattern:Checkered',\n",
" 'Size (Small-Large):S,XL,L,M']}]}}"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain.chains.openai_functions.openapi import get_openapi_chain\n",
"chain = get_openapi_chain(\"https://www.klarna.com/us/shopping/public/openai/v0/api-docs/\")\n",
"chain(\"What are some options for a men's large blue button down shirt\")"
]
},
{
"cell_type": "markdown",
"id": "9162c91c",
"metadata": {},
"source": [
"## Functions \n",
"\n",
"We can unpack what is hapening when we use the funtions to calls external APIs.\n",
"\n",
"Let's look at the [LangSmith trace](https://smith.langchain.com/public/76a58b85-193f-4eb7-ba40-747f0d5dd56e/r):\n",
"\n",
"* See [here](https://github.com/langchain-ai/langchain/blob/7fc07ba5df99b9fa8bef837b0fafa220bc5c932c/libs/langchain/langchain/chains/openai_functions/openapi.py#L279C9-L279C19) that we call the OpenAI LLM with the provided API spec:\n",
"\n",
"```\n",
"https://www.klarna.com/us/shopping/public/openai/v0/api-docs/\n",
"```\n",
"\n",
"* The prompt then tells the LLM to use the API spec wiith input question:\n",
"\n",
"```\n",
"Use the provided API's to respond to this user query:\n",
"What are some options for a men's large blue button down shirt\n",
"```\n",
"\n",
"* The LLM returns the parameters for the function call `productsUsingGET`, which is [specified in the provided API spec](https://www.klarna.com/us/shopping/public/openai/v0/api-docs/):\n",
"```\n",
"function_call:\n",
" name: productsUsingGET\n",
" arguments: |-\n",
" {\n",
" \"params\": {\n",
" \"countryCode\": \"US\",\n",
" \"q\": \"men's large blue button down shirt\",\n",
" \"size\": 5,\n",
" \"min_price\": 0,\n",
" \"max_price\": 100\n",
" }\n",
" }\n",
" ```\n",
" \n",
"![Image description](/img/api_function_call.png)\n",
" \n",
"* This `Dict` above split and the [API is called here](https://github.com/langchain-ai/langchain/blob/7fc07ba5df99b9fa8bef837b0fafa220bc5c932c/libs/langchain/langchain/chains/openai_functions/openapi.py#L215)."
]
},
{
"cell_type": "markdown",
"id": "1fe49a0d",
"metadata": {},
"source": [
"## API Chain \n",
"\n",
"We can also build our own interface to external APIs using the `APIChain` and provided API documentation."
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "4ef0c3d0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new APIChain chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mhttps://api.open-meteo.com/v1/forecast?latitude=48.1351&longitude=11.5820&hourly=temperature_2m&temperature_unit=fahrenheit&current_weather=true\u001b[0m\n",
"\u001b[33;1m\u001b[1;3m{\"latitude\":48.14,\"longitude\":11.58,\"generationtime_ms\":1.0769367218017578,\"utc_offset_seconds\":0,\"timezone\":\"GMT\",\"timezone_abbreviation\":\"GMT\",\"elevation\":521.0,\"current_weather\":{\"temperature\":52.9,\"windspeed\":12.6,\"winddirection\":239.0,\"weathercode\":3,\"is_day\":0,\"time\":\"2023-08-07T22:00\"},\"hourly_units\":{\"time\":\"iso8601\",\"temperature_2m\":\"°F\"},\"hourly\":{\"time\":[\"2023-08-07T00:00\",\"2023-08-07T01:00\",\"2023-08-07T02:00\",\"2023-08-07T03:00\",\"2023-08-07T04:00\",\"2023-08-07T05:00\",\"2023-08-07T06:00\",\"2023-08-07T07:00\",\"2023-08-07T08:00\",\"2023-08-07T09:00\",\"2023-08-07T10:00\",\"2023-08-07T11:00\",\"2023-08-07T12:00\",\"2023-08-07T13:00\",\"2023-08-07T14:00\",\"2023-08-07T15:00\",\"2023-08-07T16:00\",\"2023-08-07T17:00\",\"2023-08-07T18:00\",\"2023-08-07T19:00\",\"2023-08-07T20:00\",\"2023-08-07T21:00\",\"2023-08-07T22:00\",\"2023-08-07T23:00\",\"2023-08-08T00:00\",\"2023-08-08T01:00\",\"2023-08-08T02:00\",\"2023-08-08T03:00\",\"2023-08-08T04:00\",\"2023-08-08T05:00\",\"2023-08-08T06:00\",\"2023-08-08T07:00\",\"2023-08-08T08:00\",\"2023-08-08T09:00\",\"2023-08-08T10:00\",\"2023-08-08T11:00\",\"2023-08-08T12:00\",\"2023-08-08T13:00\",\"2023-08-08T14:00\",\"2023-08-08T15:00\",\"2023-08-08T16:00\",\"2023-08-08T17:00\",\"2023-08-08T18:00\",\"2023-08-08T19:00\",\"2023-08-08T20:00\",\"2023-08-08T21:00\",\"2023-08-08T22:00\",\"2023-08-08T23:00\",\"2023-08-09T00:00\",\"2023-08-09T01:00\",\"2023-08-09T02:00\",\"2023-08-09T03:00\",\"2023-08-09T04:00\",\"2023-08-09T05:00\",\"2023-08-09T06:00\",\"2023-08-09T07:00\",\"2023-08-09T08:00\",\"2023-08-09T09:00\",\"2023-08-09T10:00\",\"2023-08-09T11:00\",\"2023-08-09T12:00\",\"2023-08-09T13:00\",\"2023-08-09T14:00\",\"2023-08-09T15:00\",\"2023-08-09T16:00\",\"2023-08-09T17:00\",\"2023-08-09T18:00\",\"2023-08-09T19:00\",\"2023-08-09T20:00\",\"2023-08-09T21:00\",\"2023-08-09T22:00\",\"2023-08-09T23:00\",\"2023-08-10T00:00\",\"2023-08-10T01:00\",\"2023-08-10T02:00\",\"2023-08-10T03:00\",\"2023-08-10T04:00\",\"2023-08-10T05:00\",\"2023-08-10T06:00\",\"2023-08-10T07:00\",\"2023-08-10T08:00\",\"2023-08-10T09:00\",\"2023-08-10T10:00\",\"2023-08-10T11:00\",\"2023-08-10T12:00\",\"2023-08-10T13:00\",\"2023-08-10T14:00\",\"2023-08-10T15:00\",\"2023-08-10T16:00\",\"2023-08-10T17:00\",\"2023-08-10T18:00\",\"2023-08-10T19:00\",\"2023-08-10T20:00\",\"2023-08-10T21:00\",\"2023-08-10T22:00\",\"2023-08-10T23:00\",\"2023-08-11T00:00\",\"2023-08-11T01:00\",\"2023-08-11T02:00\",\"2023-08-11T03:00\",\"2023-08-11T04:00\",\"2023-08-11T05:00\",\"2023-08-11T06:00\",\"2023-08-11T07:00\",\"2023-08-11T08:00\",\"2023-08-11T09:00\",\"2023-08-11T10:00\",\"2023-08-11T11:00\",\"2023-08-11T12:00\",\"2023-08-11T13:00\",\"2023-08-11T14:00\",\"2023-08-11T15:00\",\"2023-08-11T16:00\",\"2023-08-11T17:00\",\"2023-08-11T18:00\",\"2023-08-11T19:00\",\"2023-08-11T20:00\",\"2023-08-11T21:00\",\"2023-08-11T22:00\",\"2023-08-11T23:00\",\"2023-08-12T00:00\",\"2023-08-12T01:00\",\"2023-08-12T02:00\",\"2023-08-12T03:00\",\"2023-08-12T04:00\",\"2023-08-12T05:00\",\"2023-08-12T06:00\",\"2023-08-12T07:00\",\"2023-08-12T08:00\",\"2023-08-12T09:00\",\"2023-08-12T10:00\",\"2023-08-12T11:00\",\"2023-08-12T12:00\",\"2023-08-12T13:00\",\"2023-08-12T14:00\",\"2023-08-12T15:00\",\"2023-08-12T16:00\",\"2023-08-12T17:00\",\"2023-08-12T18:00\",\"2023-08-12T19:00\",\"2023-08-12T20:00\",\"2023-08-12T21:00\",\"2023-08-12T22:00\",\"2023-08-12T23:00\",\"2023-08-13T00:00\",\"2023-08-13T01:00\",\"2023-08-13T02:00\",\"2023-08-13T03:00\",\"2023-08-13T04:00\",\"2023-08-13T05:00\",\"2023-08-13T06:00\",\"2023-08-13T07:00\",\"2023-08-13T08:00\",\"2023-08-13T09:00\",\"2023-08-13T10:00\",\"2023-08-13T11:00\",\"2023-08-13T12:00\",\"2023-08-13T13:00\",\"2023-08-13T14:00\",\"2023-08-13T15:00\",\"2023-08-13T16:00\",\"2023-08-13T17:00\",\"2023-08-13T18:00\",\"2023-08-13T19:00\",\"2023-08-13T20:00\",\"2023-08-13T21:00\",\"2023-08-13T22:00\",\"2023-08-13T23:00\"],\"temperature_2m\":[53.0,51.2,50.9,50.4,50.7,51.3,51.7,52.9,54.3,56.1,57.4,59.3,59.1,60.7,59.7,58.8,58.8,57.8,56.6,55.3,53.9,52.7,52.9,53.2,52.0,51.8,51.3,50.7,50.8,51.5,53.9,57.7,61.2,63.2,64.7,66.6,67.5,67.0,68.7,68.7,67.9,66.2,64.4,61.4,59.8,58.9,57.9,56.3,55.7,55.3,55.5,55.4,55.7,56.5,57.6,58.8,59.7,59.1,58.9,60.6,59.9,59.8,59.9,61.7,63.2,63.6,62.3,58.9,57.3,57.1,57.0,56.5,56.2,56.0,55.3,54.7,54.4,55.2,57.8,60.7,63.0,65.3,66.9,68.2,70.1,72.1,72.6,71.4,69.7,68.6,66.2,63.6,61.8,60.6,59.6,58.9,58.0,57.1,56.3,56.2,56.7,57.9,59.9,63.7,68.4,72.4,75.0,76.8,78.0,78.7,78.9,78.4,76.9,74.8,72.5,70.1,67.6,65.6,64.4,63.9,63.4,62.7,62.2,62.1,62.5,63.4,65.1,68.0,71.7,74.8,76.8,78.2,79.1,79.6,79.7,79.2,77.6,75.3,73.7,68.6,66.8,65.3,64.2,63.4,62.6,61.7,60.9,60.6,60.9,61.6,63.2,65.9,69.3,72.2,74.4,76.2,77.6,78.8,79.6,79.6,78.4,76.4,74.3,72.3,70.4,68.7,67.6,66.8]}}\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"' The current temperature in Munich, Germany is 52.9°F.'"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain.llms import OpenAI\n",
"from langchain.chains import APIChain\n",
"from langchain.chains.api import open_meteo_docs\n",
"llm = OpenAI(temperature=0)\n",
"chain = APIChain.from_llm_and_api_docs(llm, open_meteo_docs.OPEN_METEO_DOCS, verbose=True)\n",
"chain.run('What is the weather like right now in Munich, Germany in degrees Fahrenheit?')"
]
},
{
"cell_type": "markdown",
"id": "5b179318",
"metadata": {},
"source": [
"Note that we supply information about the API:"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "a9e03cc2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'BASE URL: https://api.open-meteo.com/\\n\\nAPI Documentation\\nThe API endpoint /v1/forecast accepts a geographical coordinate, a list of weather variables and responds with a JSON hourly weather forecast for 7 days. Time always starts at 0:00 today and contains 168 hours. All URL parameters are listed below:\\n\\nParameter\\tFormat\\tRequired\\tDefault\\tDescription\\nlatitude, longitude\\tFloating point\\tYes\\t\\tGeographical WGS84 coordinate of the location\\nhourly\\tString array\\tNo\\t\\tA list of weather variables which shou'"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"open_meteo_docs.OPEN_METEO_DOCS[0:500]"
]
},
{
"cell_type": "markdown",
"id": "3fab7930",
"metadata": {},
"source": [
"Under the hood, we do two things:\n",
" \n",
"* `api_request_chain`: Generate an API URL based on the input question and the api_docs\n",
"* `api_answer_chain`: generate a final answer based on the API response\n",
"\n",
"We can look at the [LangSmith trace](https://smith.langchain.com/public/1e0d18ca-0d76-444c-97df-a939a6a815a7/r) to inspect this:\n",
"\n",
"* The `api_request_chain` produces the API url from our question and the API documentation:\n",
"\n",
"![Image description](/img/api_chain.png)\n",
"\n",
"* [Here](https://github.com/langchain-ai/langchain/blob/bbd22b9b761389a5e40fc45b0570e1830aabb707/libs/langchain/langchain/chains/api/base.py#L82) we make the API request with the API url.\n",
"* The `api_answer_chain` takes the response from the API and provides us with a natural langugae response:\n",
"\n",
"![Image description](/img/api_chain_response.png)"
]
},
{
"cell_type": "markdown",
"id": "2511f446",
"metadata": {},
"source": [
"### Going deeper\n",
"\n",
"**Test with other APIs**"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1e1cf418",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.environ['TMDB_BEARER_TOKEN'] = \"\"\n",
"from langchain.chains.api import tmdb_docs\n",
"headers = {\"Authorization\": f\"Bearer {os.environ['TMDB_BEARER_TOKEN']}\"}\n",
"chain = APIChain.from_llm_and_api_docs(llm, tmdb_docs.TMDB_DOCS, headers=headers, verbose=True)\n",
"chain.run(\"Search for 'Avatar'\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dd80a717",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from langchain.llms import OpenAI\n",
"from langchain.chains.api import podcast_docs\n",
"from langchain.chains import APIChain\n",
" \n",
"listen_api_key = 'xxx' # Get api key here: https://www.listennotes.com/api/pricing/\n",
"llm = OpenAI(temperature=0)\n",
"headers = {\"X-ListenAPI-Key\": listen_api_key}\n",
"chain = APIChain.from_llm_and_api_docs(llm, podcast_docs.PODCAST_DOCS, headers=headers, verbose=True)\n",
"chain.run(\"Search for 'silicon valley bank' podcast episodes, audio length is more than 30 minutes, return only 1 results\")"
]
},
{
"cell_type": "markdown",
"id": "a5939be5",
"metadata": {},
"source": [
"**Web requests**\n",
"\n",
"URL requets are such a common use-case that we have the `LLMRequestsChain`, which makes a HTTP GET request. "
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "0b158296",
"metadata": {},
"outputs": [],
"source": [
"from langchain.llms import OpenAI\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.chains import LLMRequestsChain, LLMChain"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "d49c33e4",
"metadata": {},
"outputs": [],
"source": [
"template = \"\"\"Between >>> and <<< are the raw search result text from google.\n",
"Extract the answer to the question '{query}' or say \"not found\" if the information is not contained.\n",
"Use the format\n",
"Extracted:<answer or \"not found\">\n",
">>> {requests_result} <<<\n",
"Extracted:\"\"\"\n",
"\n",
"PROMPT = PromptTemplate(\n",
" input_variables=[\"query\", \"requests_result\"],\n",
" template=template,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "d0fd4aab",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'query': 'What are the Three (3) biggest countries, and their respective sizes?',\n",
" 'url': 'https://www.google.com/search?q=What+are+the+Three+(3)+biggest+countries,+and+their+respective+sizes?',\n",
" 'output': ' Russia (17,098,242 km²), Canada (9,984,670 km²), China (9,706,961 km²)'}"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain = LLMRequestsChain(llm_chain=LLMChain(llm=OpenAI(temperature=0), prompt=PROMPT))\n",
"question = \"What are the Three (3) biggest countries, and their respective sizes?\"\n",
"inputs = {\n",
" \"query\": question,\n",
" \"url\": \"https://www.google.com/search?q=\" + question.replace(\" \", \"+\"),\n",
"}\n",
"chain(inputs)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -1,24 +0,0 @@
---
sidebar_position: 3
---
# Interacting with APIs
Lots of data and information is stored behind APIs.
This page covers all resources available in LangChain for working with APIs.
## Chains
If you are just getting started, and you have relatively simple apis, you should get started with chains.
Chains are a sequence of predetermined steps, so they are good to get started with as they give you more control and let you
understand what is happening better.
- [API Chain](/docs/use_cases/apis/api.html)
## Agents
Agents are more complex, and involve multiple queries to the LLM to understand what to do.
The downside of agents are that you have less control. The upside is that they are more powerful,
which allows you to use them on larger and more complex schemas.
- [OpenAPI Agent](/docs/integrations/toolkits/openapi.html)

View File

@ -1,123 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "dd7ec7af",
"metadata": {},
"source": [
"# HTTP request chain\n",
"\n",
"Using the request library to get HTML results from a URL and then an LLM to parse results"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "dd8eae75",
"metadata": {},
"outputs": [],
"source": [
"from langchain.llms import OpenAI\n",
"from langchain.chains import LLMRequestsChain, LLMChain"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "65bf324e",
"metadata": {},
"outputs": [],
"source": [
"from langchain.prompts import PromptTemplate\n",
"\n",
"template = \"\"\"Between >>> and <<< are the raw search result text from google.\n",
"Extract the answer to the question '{query}' or say \"not found\" if the information is not contained.\n",
"Use the format\n",
"Extracted:<answer or \"not found\">\n",
">>> {requests_result} <<<\n",
"Extracted:\"\"\"\n",
"\n",
"PROMPT = PromptTemplate(\n",
" input_variables=[\"query\", \"requests_result\"],\n",
" template=template,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f36ae0d8",
"metadata": {},
"outputs": [],
"source": [
"chain = LLMRequestsChain(llm_chain=LLMChain(llm=OpenAI(temperature=0), prompt=PROMPT))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "b5d22d9d",
"metadata": {},
"outputs": [],
"source": [
"question = \"What are the Three (3) biggest countries, and their respective sizes?\"\n",
"inputs = {\n",
" \"query\": question,\n",
" \"url\": \"https://www.google.com/search?q=\" + question.replace(\" \", \"+\"),\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2ea81168",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'query': 'What are the Three (3) biggest countries, and their respective sizes?',\n",
" 'url': 'https://www.google.com/search?q=What+are+the+Three+(3)+biggest+countries,+and+their+respective+sizes?',\n",
" 'output': ' Russia (17,098,242 km²), Canada (9,984,670 km²), United States (9,826,675 km²)'}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain(inputs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "db8f2b6d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

View File

@ -1,583 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "9fcaa37f",
"metadata": {},
"source": [
"# OpenAPI chain\n",
"\n",
"This notebook shows an example of using an OpenAPI chain to call an endpoint in natural language, and get back a response in natural language."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "efa6909f",
"metadata": {},
"outputs": [],
"source": [
"from langchain.tools import OpenAPISpec, APIOperation\n",
"from langchain.chains import OpenAPIEndpointChain\n",
"from langchain.requests import Requests\n",
"from langchain.llms import OpenAI"
]
},
{
"cell_type": "markdown",
"id": "71e38c6c",
"metadata": {},
"source": [
"## Load the spec\n",
"\n",
"Load a wrapper of the spec (so we can work with it more easily). You can load from a url or from a local file."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "0831271b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n"
]
}
],
"source": [
"spec = OpenAPISpec.from_url(\n",
" \"https://www.klarna.com/us/shopping/public/openai/v0/api-docs/\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "189dd506",
"metadata": {},
"outputs": [],
"source": [
"# Alternative loading from file\n",
"# spec = OpenAPISpec.from_file(\"openai_openapi.yaml\")"
]
},
{
"cell_type": "markdown",
"id": "f7093582",
"metadata": {},
"source": [
"## Select the Operation\n",
"\n",
"In order to provide a focused on modular chain, we create a chain specifically only for one of the endpoints. Here we get an API operation from a specified endpoint and method."
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "157494b9",
"metadata": {},
"outputs": [],
"source": [
"operation = APIOperation.from_openapi_spec(spec, \"/public/openai/v0/products\", \"get\")"
]
},
{
"cell_type": "markdown",
"id": "e3ab1c5c",
"metadata": {},
"source": [
"## Construct the chain\n",
"\n",
"We can now construct a chain to interact with it. In order to construct such a chain, we will pass in:\n",
"\n",
"1. The operation endpoint\n",
"2. A requests wrapper (can be used to handle authentication, etc)\n",
"3. The LLM to use to interact with it"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "788a7cef",
"metadata": {},
"outputs": [],
"source": [
"llm = OpenAI() # Load a Language Model"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c5f27406",
"metadata": {},
"outputs": [],
"source": [
"chain = OpenAPIEndpointChain.from_api_operation(\n",
" operation,\n",
" llm,\n",
" requests=Requests(),\n",
" verbose=True,\n",
" return_intermediate_steps=True, # Return request and response text\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "23652053",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new OpenAPIEndpointChain chain...\u001b[0m\n",
"\n",
"\n",
"\u001b[1m> Entering new APIRequesterChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mYou are a helpful AI Assistant. Please provide JSON arguments to agentFunc() based on the user's instructions.\n",
"\n",
"API_SCHEMA: ```typescript\n",
"/* API for fetching Klarna product information */\n",
"type productsUsingGET = (_: {\n",
"/* A precise query that matches one very small category or product that needs to be searched for to find the products the user is looking for. If the user explicitly stated what they want, use that as a query. The query is as specific as possible to the product name or category mentioned by the user in its singular form, and don't contain any clarifiers like latest, newest, cheapest, budget, premium, expensive or similar. The query is always taken from the latest topic, if there is a new topic a new query is started. */\n",
"\t\tq: string,\n",
"/* number of products returned */\n",
"\t\tsize?: number,\n",
"/* (Optional) Minimum price in local currency for the product searched for. Either explicitly stated by the user or implicitly inferred from a combination of the user's request and the kind of product searched for. */\n",
"\t\tmin_price?: number,\n",
"/* (Optional) Maximum price in local currency for the product searched for. Either explicitly stated by the user or implicitly inferred from a combination of the user's request and the kind of product searched for. */\n",
"\t\tmax_price?: number,\n",
"}) => any;\n",
"```\n",
"\n",
"USER_INSTRUCTIONS: \"whats the most expensive shirt?\"\n",
"\n",
"Your arguments must be plain json provided in a markdown block:\n",
"\n",
"ARGS: ```json\n",
"{valid json conforming to API_SCHEMA}\n",
"```\n",
"\n",
"Example\n",
"-----\n",
"\n",
"ARGS: ```json\n",
"{\"foo\": \"bar\", \"baz\": {\"qux\": \"quux\"}}\n",
"```\n",
"\n",
"The block must be no more than 1 line long, and all arguments must be valid JSON. All string arguments must be wrapped in double quotes.\n",
"You MUST strictly comply to the types indicated by the provided schema, including all required args.\n",
"\n",
"If you don't have sufficient information to call the function due to things like requiring specific uuid's, you can reply with the following message:\n",
"\n",
"Message: ```text\n",
"Concise response requesting the additional information that would make calling the function successful.\n",
"```\n",
"\n",
"Begin\n",
"-----\n",
"ARGS:\n",
"\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m{\"q\": \"shirt\", \"size\": 1, \"max_price\": null}\u001b[0m\n",
"\u001b[36;1m\u001b[1;3m{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]}]}\u001b[0m\n",
"\n",
"\n",
"\u001b[1m> Entering new APIResponderChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mYou are a helpful AI assistant trained to answer user queries from API responses.\n",
"You attempted to call an API, which resulted in:\n",
"API_RESPONSE: {\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]}]}\n",
"\n",
"USER_COMMENT: \"whats the most expensive shirt?\"\n",
"\n",
"\n",
"If the API_RESPONSE can answer the USER_COMMENT respond with the following markdown json block:\n",
"Response: ```json\n",
"{\"response\": \"Human-understandable synthesis of the API_RESPONSE\"}\n",
"```\n",
"\n",
"Otherwise respond with the following markdown json block:\n",
"Response Error: ```json\n",
"{\"response\": \"What you did and a concise statement of the resulting error. If it can be easily fixed, provide a suggestion.\"}\n",
"```\n",
"\n",
"You MUST respond as a markdown json code block. The person you are responding to CANNOT see the API_RESPONSE, so if there is any relevant information there you must include it in your response.\n",
"\n",
"Begin:\n",
"---\n",
"\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"\u001b[33;1m\u001b[1;3mThe most expensive shirt in the API response is the Burberry Check Poplin Shirt, which costs $360.00.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
}
],
"source": [
"output = chain(\"whats the most expensive shirt?\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "c000295e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'request_args': '{\"q\": \"shirt\", \"size\": 1, \"max_price\": null}',\n",
" 'response_text': '{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]}]}'}"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# View intermediate steps\n",
"output[\"intermediate_steps\"]"
]
},
{
"cell_type": "markdown",
"id": "092bdb4d",
"metadata": {},
"source": [
"## Return raw response\n",
"\n",
"We can also run this chain without synthesizing the response. This will have the effect of just returning the raw API output."
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "4dff3849",
"metadata": {},
"outputs": [],
"source": [
"chain = OpenAPIEndpointChain.from_api_operation(\n",
" operation,\n",
" llm,\n",
" requests=Requests(),\n",
" verbose=True,\n",
" return_intermediate_steps=True, # Return request and response text\n",
" raw_response=True, # Return raw response\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "762499a9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new OpenAPIEndpointChain chain...\u001b[0m\n",
"\n",
"\n",
"\u001b[1m> Entering new APIRequesterChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mYou are a helpful AI Assistant. Please provide JSON arguments to agentFunc() based on the user's instructions.\n",
"\n",
"API_SCHEMA: ```typescript\n",
"/* API for fetching Klarna product information */\n",
"type productsUsingGET = (_: {\n",
"/* A precise query that matches one very small category or product that needs to be searched for to find the products the user is looking for. If the user explicitly stated what they want, use that as a query. The query is as specific as possible to the product name or category mentioned by the user in its singular form, and don't contain any clarifiers like latest, newest, cheapest, budget, premium, expensive or similar. The query is always taken from the latest topic, if there is a new topic a new query is started. */\n",
"\t\tq: string,\n",
"/* number of products returned */\n",
"\t\tsize?: number,\n",
"/* (Optional) Minimum price in local currency for the product searched for. Either explicitly stated by the user or implicitly inferred from a combination of the user's request and the kind of product searched for. */\n",
"\t\tmin_price?: number,\n",
"/* (Optional) Maximum price in local currency for the product searched for. Either explicitly stated by the user or implicitly inferred from a combination of the user's request and the kind of product searched for. */\n",
"\t\tmax_price?: number,\n",
"}) => any;\n",
"```\n",
"\n",
"USER_INSTRUCTIONS: \"whats the most expensive shirt?\"\n",
"\n",
"Your arguments must be plain json provided in a markdown block:\n",
"\n",
"ARGS: ```json\n",
"{valid json conforming to API_SCHEMA}\n",
"```\n",
"\n",
"Example\n",
"-----\n",
"\n",
"ARGS: ```json\n",
"{\"foo\": \"bar\", \"baz\": {\"qux\": \"quux\"}}\n",
"```\n",
"\n",
"The block must be no more than 1 line long, and all arguments must be valid JSON. All string arguments must be wrapped in double quotes.\n",
"You MUST strictly comply to the types indicated by the provided schema, including all required args.\n",
"\n",
"If you don't have sufficient information to call the function due to things like requiring specific uuid's, you can reply with the following message:\n",
"\n",
"Message: ```text\n",
"Concise response requesting the additional information that would make calling the function successful.\n",
"```\n",
"\n",
"Begin\n",
"-----\n",
"ARGS:\n",
"\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m{\"q\": \"shirt\", \"max_price\": null}\u001b[0m\n",
"\u001b[36;1m\u001b[1;3m{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Cotton Shirt - Beige\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl359/3200280807/Children-s-Clothing/Burberry-Vintage-Check-Cotton-Shirt-Beige/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$229.02\",\"attributes\":[\"Material:Cotton,Elastane\",\"Color:Beige\",\"Model:Boy\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Stretch Cotton Twill Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3202342515/Clothing/Burberry-Vintage-Check-Stretch-Cotton-Twill-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$309.99\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Woman\",\"Color:Beige\",\"Properties:Stretch\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Somerton Check Shirt - Camel\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201112728/Clothing/Burberry-Somerton-Check-Shirt-Camel/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$450.00\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Man\",\"Color:Beige\"]},{\"name\":\"Magellan Outdoors Laguna Madre Solid Short Sleeve Fishing Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3203102142/Clothing/Magellan-Outdoors-Laguna-Madre-Solid-Short-Sleeve-Fishing-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$19.99\",\"attributes\":[\"Material:Polyester,Nylon\",\"Target Group:Man\",\"Color:Red,Pink,White,Blue,Purple,Beige,Black,Green\",\"Properties:Pockets\",\"Pattern:Solid Color\"]}]}\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
}
],
"source": [
"output = chain(\"whats the most expensive shirt?\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "4afc021a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'instructions': 'whats the most expensive shirt?',\n",
" 'output': '{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Cotton Shirt - Beige\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl359/3200280807/Children-s-Clothing/Burberry-Vintage-Check-Cotton-Shirt-Beige/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$229.02\",\"attributes\":[\"Material:Cotton,Elastane\",\"Color:Beige\",\"Model:Boy\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Stretch Cotton Twill Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3202342515/Clothing/Burberry-Vintage-Check-Stretch-Cotton-Twill-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$309.99\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Woman\",\"Color:Beige\",\"Properties:Stretch\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Somerton Check Shirt - Camel\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201112728/Clothing/Burberry-Somerton-Check-Shirt-Camel/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$450.00\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Man\",\"Color:Beige\"]},{\"name\":\"Magellan Outdoors Laguna Madre Solid Short Sleeve Fishing Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3203102142/Clothing/Magellan-Outdoors-Laguna-Madre-Solid-Short-Sleeve-Fishing-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$19.99\",\"attributes\":[\"Material:Polyester,Nylon\",\"Target Group:Man\",\"Color:Red,Pink,White,Blue,Purple,Beige,Black,Green\",\"Properties:Pockets\",\"Pattern:Solid Color\"]}]}',\n",
" 'intermediate_steps': {'request_args': '{\"q\": \"shirt\", \"max_price\": null}',\n",
" 'response_text': '{\"products\":[{\"name\":\"Burberry Check Poplin Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201810981/Clothing/Burberry-Check-Poplin-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$360.00\",\"attributes\":[\"Material:Cotton\",\"Target Group:Man\",\"Color:Gray,Blue,Beige\",\"Properties:Pockets\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Cotton Shirt - Beige\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl359/3200280807/Children-s-Clothing/Burberry-Vintage-Check-Cotton-Shirt-Beige/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$229.02\",\"attributes\":[\"Material:Cotton,Elastane\",\"Color:Beige\",\"Model:Boy\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Vintage Check Stretch Cotton Twill Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3202342515/Clothing/Burberry-Vintage-Check-Stretch-Cotton-Twill-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$309.99\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Woman\",\"Color:Beige\",\"Properties:Stretch\",\"Pattern:Checkered\"]},{\"name\":\"Burberry Somerton Check Shirt - Camel\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3201112728/Clothing/Burberry-Somerton-Check-Shirt-Camel/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$450.00\",\"attributes\":[\"Material:Elastane/Lycra/Spandex,Cotton\",\"Target Group:Man\",\"Color:Beige\"]},{\"name\":\"Magellan Outdoors Laguna Madre Solid Short Sleeve Fishing Shirt\",\"url\":\"https://www.klarna.com/us/shopping/pl/cl10001/3203102142/Clothing/Magellan-Outdoors-Laguna-Madre-Solid-Short-Sleeve-Fishing-Shirt/?utm_source=openai&ref-site=openai_plugin\",\"price\":\"$19.99\",\"attributes\":[\"Material:Polyester,Nylon\",\"Target Group:Man\",\"Color:Red,Pink,White,Blue,Purple,Beige,Black,Green\",\"Properties:Pockets\",\"Pattern:Solid Color\"]}]}'}}"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"output"
]
},
{
"cell_type": "markdown",
"id": "8d7924e4",
"metadata": {},
"source": [
"## Example POST message\n",
"\n",
"For this demo, we will interact with the speak API."
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "c56b1a04",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n",
"Attempting to load an OpenAPI 3.0.1 spec. This may result in degraded performance. Convert your OpenAPI spec to 3.1.* spec for better support.\n"
]
}
],
"source": [
"spec = OpenAPISpec.from_url(\"https://api.speak.com/openapi.yaml\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "177d8275",
"metadata": {},
"outputs": [],
"source": [
"operation = APIOperation.from_openapi_spec(\n",
" spec, \"/v1/public/openai/explain-task\", \"post\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "835c5ddc",
"metadata": {},
"outputs": [],
"source": [
"llm = OpenAI()\n",
"chain = OpenAPIEndpointChain.from_api_operation(\n",
" operation, llm, requests=Requests(), verbose=True, return_intermediate_steps=True\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "59855d60",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new OpenAPIEndpointChain chain...\u001b[0m\n",
"\n",
"\n",
"\u001b[1m> Entering new APIRequesterChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mYou are a helpful AI Assistant. Please provide JSON arguments to agentFunc() based on the user's instructions.\n",
"\n",
"API_SCHEMA: ```typescript\n",
"type explainTask = (_: {\n",
"/* Description of the task that the user wants to accomplish or do. For example, \"tell the waiter they messed up my order\" or \"compliment someone on their shirt\" */\n",
" task_description?: string,\n",
"/* The foreign language that the user is learning and asking about. The value can be inferred from question - for example, if the user asks \"how do i ask a girl out in mexico city\", the value should be \"Spanish\" because of Mexico City. Always use the full name of the language (e.g. Spanish, French). */\n",
" learning_language?: string,\n",
"/* The user's native language. Infer this value from the language the user asked their question in. Always use the full name of the language (e.g. Spanish, French). */\n",
" native_language?: string,\n",
"/* A description of any additional context in the user's question that could affect the explanation - e.g. setting, scenario, situation, tone, speaking style and formality, usage notes, or any other qualifiers. */\n",
" additional_context?: string,\n",
"/* Full text of the user's question. */\n",
" full_query?: string,\n",
"}) => any;\n",
"```\n",
"\n",
"USER_INSTRUCTIONS: \"How would ask for more tea in Delhi?\"\n",
"\n",
"Your arguments must be plain json provided in a markdown block:\n",
"\n",
"ARGS: ```json\n",
"{valid json conforming to API_SCHEMA}\n",
"```\n",
"\n",
"Example\n",
"-----\n",
"\n",
"ARGS: ```json\n",
"{\"foo\": \"bar\", \"baz\": {\"qux\": \"quux\"}}\n",
"```\n",
"\n",
"The block must be no more than 1 line long, and all arguments must be valid JSON. All string arguments must be wrapped in double quotes.\n",
"You MUST strictly comply to the types indicated by the provided schema, including all required args.\n",
"\n",
"If you don't have sufficient information to call the function due to things like requiring specific uuid's, you can reply with the following message:\n",
"\n",
"Message: ```text\n",
"Concise response requesting the additional information that would make calling the function successful.\n",
"```\n",
"\n",
"Begin\n",
"-----\n",
"ARGS:\n",
"\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m{\"task_description\": \"ask for more tea\", \"learning_language\": \"Hindi\", \"native_language\": \"English\", \"full_query\": \"How would I ask for more tea in Delhi?\"}\u001b[0m\n",
"\u001b[36;1m\u001b[1;3m{\"explanation\":\"<what-to-say language=\\\"Hindi\\\" context=\\\"None\\\">\\nऔर चाय लाओ। (Aur chai lao.) \\n</what-to-say>\\n\\n<alternatives context=\\\"None\\\">\\n1. \\\"चाय थोड़ी ज्यादा मिल सकती है?\\\" *(Chai thodi zyada mil sakti hai? - Polite, asking if more tea is available)*\\n2. \\\"मुझे महसूस हो रहा है कि मुझे कुछ अन्य प्रकार की चाय पीनी चाहिए।\\\" *(Mujhe mehsoos ho raha hai ki mujhe kuch anya prakar ki chai peeni chahiye. - Formal, indicating a desire for a different type of tea)*\\n3. \\\"क्या मुझे or cup में milk/tea powder मिल सकता है?\\\" *(Kya mujhe aur cup mein milk/tea powder mil sakta hai? - Very informal/casual tone, asking for an extra serving of milk or tea powder)*\\n</alternatives>\\n\\n<usage-notes>\\nIn India and Indian culture, serving guests with food and beverages holds great importance in hospitality. You will find people always offering drinks like water or tea to their guests as soon as they arrive at their house or office.\\n</usage-notes>\\n\\n<example-convo language=\\\"Hindi\\\">\\n<context>At home during breakfast.</context>\\nPreeti: सर, क्या main aur cups chai lekar aaun? (Sir,kya main aur cups chai lekar aaun? - Sir, should I get more tea cups?)\\nRahul: हां,बिल्कुल। और चाय की मात्रा में भी थोड़ा सा इजाफा करना। (Haan,bilkul. Aur chai ki matra mein bhi thoda sa eejafa karna. - Yes, please. And add a little extra in the quantity of tea as well.)\\n</example-convo>\\n\\n*[Report an issue or leave feedback](https://speak.com/chatgpt?rid=d4mcapbkopo164pqpbk321oc})*\",\"extra_response_instructions\":\"Use all information in the API response and fully render all Markdown.\\nAlways end your response with a link to report an issue or leave feedback on the plugin.\"}\u001b[0m\n",
"\n",
"\n",
"\u001b[1m> Entering new APIResponderChain chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mYou are a helpful AI assistant trained to answer user queries from API responses.\n",
"You attempted to call an API, which resulted in:\n",
"API_RESPONSE: {\"explanation\":\"<what-to-say language=\\\"Hindi\\\" context=\\\"None\\\">\\nऔर चाय लाओ। (Aur chai lao.) \\n</what-to-say>\\n\\n<alternatives context=\\\"None\\\">\\n1. \\\"चाय थोड़ी ज्यादा मिल सकती है?\\\" *(Chai thodi zyada mil sakti hai? - Polite, asking if more tea is available)*\\n2. \\\"मुझे महसूस हो रहा है कि मुझे कुछ अन्य प्रकार की चाय पीनी चाहिए।\\\" *(Mujhe mehsoos ho raha hai ki mujhe kuch anya prakar ki chai peeni chahiye. - Formal, indicating a desire for a different type of tea)*\\n3. \\\"क्या मुझे or cup में milk/tea powder मिल सकता है?\\\" *(Kya mujhe aur cup mein milk/tea powder mil sakta hai? - Very informal/casual tone, asking for an extra serving of milk or tea powder)*\\n</alternatives>\\n\\n<usage-notes>\\nIn India and Indian culture, serving guests with food and beverages holds great importance in hospitality. You will find people always offering drinks like water or tea to their guests as soon as they arrive at their house or office.\\n</usage-notes>\\n\\n<example-convo language=\\\"Hindi\\\">\\n<context>At home during breakfast.</context>\\nPreeti: सर, क्या main aur cups chai lekar aaun? (Sir,kya main aur cups chai lekar aaun? - Sir, should I get more tea cups?)\\nRahul: हां,बिल्कुल। और चाय की मात्रा में भी थोड़ा सा इजाफा करना। (Haan,bilkul. Aur chai ki matra mein bhi thoda sa eejafa karna. - Yes, please. And add a little extra in the quantity of tea as well.)\\n</example-convo>\\n\\n*[Report an issue or leave feedback](https://speak.com/chatgpt?rid=d4mcapbkopo164pqpbk321oc})*\",\"extra_response_instructions\":\"Use all information in the API response and fully render all Markdown.\\nAlways end your response with a link to report an issue or leave feedback on the plugin.\"}\n",
"\n",
"USER_COMMENT: \"How would ask for more tea in Delhi?\"\n",
"\n",
"\n",
"If the API_RESPONSE can answer the USER_COMMENT respond with the following markdown json block:\n",
"Response: ```json\n",
"{\"response\": \"Concise response to USER_COMMENT based on API_RESPONSE.\"}\n",
"```\n",
"\n",
"Otherwise respond with the following markdown json block:\n",
"Response Error: ```json\n",
"{\"response\": \"What you did and a concise statement of the resulting error. If it can be easily fixed, provide a suggestion.\"}\n",
"```\n",
"\n",
"You MUST respond as a markdown json code block.\n",
"\n",
"Begin:\n",
"---\n",
"\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"\u001b[33;1m\u001b[1;3mIn Delhi you can ask for more tea by saying 'Chai thodi zyada mil sakti hai?'\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
}
],
"source": [
"output = chain(\"How would ask for more tea in Delhi?\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "91bddb18",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['{\"task_description\": \"ask for more tea\", \"learning_language\": \"Hindi\", \"native_language\": \"English\", \"full_query\": \"How would I ask for more tea in Delhi?\"}',\n",
" '{\"explanation\":\"<what-to-say language=\\\\\"Hindi\\\\\" context=\\\\\"None\\\\\">\\\\nऔर चाय लाओ। (Aur chai lao.) \\\\n</what-to-say>\\\\n\\\\n<alternatives context=\\\\\"None\\\\\">\\\\n1. \\\\\"चाय थोड़ी ज्यादा मिल सकती है?\\\\\" *(Chai thodi zyada mil sakti hai? - Polite, asking if more tea is available)*\\\\n2. \\\\\"मुझे महसूस हो रहा है कि मुझे कुछ अन्य प्रकार की चाय पीनी चाहिए।\\\\\" *(Mujhe mehsoos ho raha hai ki mujhe kuch anya prakar ki chai peeni chahiye. - Formal, indicating a desire for a different type of tea)*\\\\n3. \\\\\"क्या मुझे or cup में milk/tea powder मिल सकता है?\\\\\" *(Kya mujhe aur cup mein milk/tea powder mil sakta hai? - Very informal/casual tone, asking for an extra serving of milk or tea powder)*\\\\n</alternatives>\\\\n\\\\n<usage-notes>\\\\nIn India and Indian culture, serving guests with food and beverages holds great importance in hospitality. You will find people always offering drinks like water or tea to their guests as soon as they arrive at their house or office.\\\\n</usage-notes>\\\\n\\\\n<example-convo language=\\\\\"Hindi\\\\\">\\\\n<context>At home during breakfast.</context>\\\\nPreeti: सर, क्या main aur cups chai lekar aaun? (Sir,kya main aur cups chai lekar aaun? - Sir, should I get more tea cups?)\\\\nRahul: हां,बिल्कुल। और चाय की मात्रा में भी थोड़ा सा इजाफा करना। (Haan,bilkul. Aur chai ki matra mein bhi thoda sa eejafa karna. - Yes, please. And add a little extra in the quantity of tea as well.)\\\\n</example-convo>\\\\n\\\\n*[Report an issue or leave feedback](https://speak.com/chatgpt?rid=d4mcapbkopo164pqpbk321oc})*\",\"extra_response_instructions\":\"Use all information in the API response and fully render all Markdown.\\\\nAlways end your response with a link to report an issue or leave feedback on the plugin.\"}']"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Show the API chain's intermediate steps\n",
"output[\"intermediate_steps\"]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -1,249 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "e734b314",
"metadata": {},
"source": [
"# OpenAPI calls with OpenAI functions\n",
"\n",
"In this notebook we'll show how to create a chain that automatically makes calls to an API based only on an OpenAPI spec. Under the hood, we're parsing the OpenAPI spec into a JSON schema that the OpenAI functions API can handle. This allows ChatGPT to automatically select and populate the relevant API call to make for any user input. Using the output of ChatGPT we then make the actual API call, and return the result."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "555661b5",
"metadata": {},
"outputs": [],
"source": [
"from langchain.chains.openai_functions.openapi import get_openapi_chain"
]
},
{
"cell_type": "markdown",
"id": "a95f510a",
"metadata": {},
"source": [
"## Query Klarna"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "08e19b64",
"metadata": {},
"outputs": [],
"source": [
"chain = get_openapi_chain(\n",
" \"https://www.klarna.com/us/shopping/public/openai/v0/api-docs/\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "3959f866",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'products': [{'name': \"Tommy Hilfiger Men's Short Sleeve Button-Down Shirt\",\n",
" 'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3204878580/Clothing/Tommy-Hilfiger-Men-s-Short-Sleeve-Button-Down-Shirt/?utm_source=openai&ref-site=openai_plugin',\n",
" 'price': '$26.78',\n",
" 'attributes': ['Material:Linen,Cotton',\n",
" 'Target Group:Man',\n",
" 'Color:Gray,Pink,White,Blue,Beige,Black,Turquoise',\n",
" 'Size:S,XL,M,XXL']},\n",
" {'name': \"Van Heusen Men's Long Sleeve Button-Down Shirt\",\n",
" 'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3201809514/Clothing/Van-Heusen-Men-s-Long-Sleeve-Button-Down-Shirt/?utm_source=openai&ref-site=openai_plugin',\n",
" 'price': '$18.89',\n",
" 'attributes': ['Material:Cotton',\n",
" 'Target Group:Man',\n",
" 'Color:Red,Gray,White,Blue',\n",
" 'Size:XL,XXL']},\n",
" {'name': 'Brixton Bowery Flannel Shirt',\n",
" 'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3202331096/Clothing/Brixton-Bowery-Flannel-Shirt/?utm_source=openai&ref-site=openai_plugin',\n",
" 'price': '$34.48',\n",
" 'attributes': ['Material:Cotton',\n",
" 'Target Group:Man',\n",
" 'Color:Gray,Blue,Black,Orange',\n",
" 'Size:XL,3XL,4XL,5XL,L,M,XXL']},\n",
" {'name': 'Cubavera Four Pocket Guayabera Shirt',\n",
" 'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3202055522/Clothing/Cubavera-Four-Pocket-Guayabera-Shirt/?utm_source=openai&ref-site=openai_plugin',\n",
" 'price': '$23.22',\n",
" 'attributes': ['Material:Polyester,Cotton',\n",
" 'Target Group:Man',\n",
" 'Color:Red,White,Blue,Black',\n",
" 'Size:S,XL,L,M,XXL']},\n",
" {'name': 'Theory Sylvain Shirt - Eclipse',\n",
" 'url': 'https://www.klarna.com/us/shopping/pl/cl10001/3202028254/Clothing/Theory-Sylvain-Shirt-Eclipse/?utm_source=openai&ref-site=openai_plugin',\n",
" 'price': '$86.01',\n",
" 'attributes': ['Material:Polyester,Cotton',\n",
" 'Target Group:Man',\n",
" 'Color:Blue',\n",
" 'Size:S,XL,XS,L,M,XXL']}]}"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain.run(\"What are some options for a men's large blue button down shirt\")"
]
},
{
"cell_type": "markdown",
"id": "6f648c77",
"metadata": {},
"source": [
"## Query a translation service\n",
"\n",
"Additionally, see the request payload by setting `verbose=True`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bf6cd695",
"metadata": {},
"outputs": [],
"source": [
"chain = get_openapi_chain(\"https://api.speak.com/openapi.yaml\", verbose=True)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "1ba51609",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new chain...\u001b[0m\n",
"\n",
"\n",
"\u001b[1m> Entering new chain...\u001b[0m\n",
"Prompt after formatting:\n",
"\u001b[32;1m\u001b[1;3mHuman: Use the provided API's to respond to this user query:\n",
"\n",
"How would you say no thanks in Russian\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"\n",
"\n",
"\u001b[1m> Entering new chain...\u001b[0m\n",
"Calling endpoint \u001b[32;1m\u001b[1;3mtranslate\u001b[0m with arguments:\n",
"\u001b[32;1m\u001b[1;3m{\n",
" \"json\": {\n",
" \"phrase_to_translate\": \"no thanks\",\n",
" \"learning_language\": \"russian\",\n",
" \"native_language\": \"english\",\n",
" \"additional_context\": \"\",\n",
" \"full_query\": \"How would you say no thanks in Russian\"\n",
" }\n",
"}\u001b[0m\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'explanation': '<translation language=\"Russian\">\\nНет, спасибо. (Net, spasibo)\\n</translation>\\n\\n<alternatives>\\n1. \"Нет, я в порядке\" *(Neutral/Formal - Can be used in professional settings or formal situations.)*\\n2. \"Нет, спасибо, я откажусь\" *(Formal - Can be used in polite settings, such as a fancy dinner with colleagues or acquaintances.)*\\n3. \"Не надо\" *(Informal - Can be used in informal situations, such as declining an offer from a friend.)*\\n</alternatives>\\n\\n<example-convo language=\"Russian\">\\n<context>Max is being offered a cigarette at a party.</context>\\n* Sasha: \"Хочешь покурить?\"\\n* Max: \"Нет, спасибо. Я бросил.\"\\n* Sasha: \"Окей, понятно.\"\\n</example-convo>\\n\\n*[Report an issue or leave feedback](https://speak.com/chatgpt?rid=noczaa460do8yqs8xjun6zdm})*',\n",
" 'extra_response_instructions': 'Use all information in the API response and fully render all Markdown.\\nAlways end your response with a link to report an issue or leave feedback on the plugin.'}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain.run(\"How would you say no thanks in Russian\")"
]
},
{
"cell_type": "markdown",
"id": "4923a291",
"metadata": {},
"source": [
"## Query XKCD"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a9198f62",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"chain = get_openapi_chain(\n",
" \"https://gist.githubusercontent.com/roaldnefs/053e505b2b7a807290908fe9aa3e1f00/raw/0a212622ebfef501163f91e23803552411ed00e4/openapi.yaml\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "3110c398",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'month': '6',\n",
" 'num': 2793,\n",
" 'link': '',\n",
" 'year': '2023',\n",
" 'news': '',\n",
" 'safe_title': 'Garden Path Sentence',\n",
" 'transcript': '',\n",
" 'alt': 'Arboretum Owner Denied Standing in Garden Path Suit on Grounds Grounds Appealing Appealing',\n",
" 'img': 'https://imgs.xkcd.com/comics/garden_path_sentence.png',\n",
" 'title': 'Garden Path Sentence',\n",
" 'day': '23'}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"chain.run(\"What's today's comic?\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "venv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -1,3 +1,7 @@
---
sidebar_position: 0
---
# Autonomous (long-running) agents
Autonomous Agents are agents that designed to be more long running.

Some files were not shown because too many files have changed in this diff Show More