mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-13 06:16:26 +00:00
Compare commits
236 Commits
rlm/llama-
...
wfh/redire
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
22e0b5a45f | ||
|
|
648a590b79 | ||
|
|
18df1be6d3 | ||
|
|
f8bca156d4 | ||
|
|
30239b3025 | ||
|
|
cbbe3bd713 | ||
|
|
54a8df87b9 | ||
|
|
b485c3048b | ||
|
|
f2fc4173c3 | ||
|
|
6e26df32ba | ||
|
|
37e435bd00 | ||
|
|
3b8ee74e38 | ||
|
|
afd96b2460 | ||
|
|
58d7d86e51 | ||
|
|
a7c9bd30d4 | ||
|
|
491089754d | ||
|
|
b5a74fb973 | ||
|
|
71c418725f | ||
|
|
427f696fb0 | ||
|
|
b927277809 | ||
|
|
d4380339c1 | ||
|
|
d7bf7dc412 | ||
|
|
355ff09cce | ||
|
|
3dafbd852e | ||
|
|
c7a5504789 | ||
|
|
5f1c67b47c | ||
|
|
561ac17248 | ||
|
|
5569385ee1 | ||
|
|
b1c87da2b0 | ||
|
|
e17275ee57 | ||
|
|
63306899a2 | ||
|
|
7966af1e9c | ||
|
|
4c0e1e501c | ||
|
|
0eba80912f | ||
|
|
af2e4ce2cd | ||
|
|
85088dc5df | ||
|
|
4eecf90f33 | ||
|
|
2242e2160f | ||
|
|
b2ac835466 | ||
|
|
50a5c5bcf8 | ||
|
|
81ebcc161e | ||
|
|
fc42726ea0 | ||
|
|
897f791940 | ||
|
|
4d7cd6db5f | ||
|
|
f9a845b382 | ||
|
|
06e89c1caa | ||
|
|
738d93215d | ||
|
|
9a07032055 | ||
|
|
5426712311 | ||
|
|
f95bd0bcd9 | ||
|
|
f69155b4f7 | ||
|
|
a3c69cf41d | ||
|
|
324c86acd5 | ||
|
|
3f8f3de28e | ||
|
|
ad9e242a7a | ||
|
|
566ce06f4a | ||
|
|
c710c7303f | ||
|
|
cc6a20d3e6 | ||
|
|
86646ec555 | ||
|
|
02e51f4217 | ||
|
|
74fcfed4e2 | ||
|
|
641b71e2cd | ||
|
|
8d66b00c73 | ||
|
|
19400ba253 | ||
|
|
29270e0378 | ||
|
|
5b913003e0 | ||
|
|
4b15328767 | ||
|
|
e60e1cdf23 | ||
|
|
3efab8d3df | ||
|
|
d43a36c32a | ||
|
|
6b5a970949 | ||
|
|
b1644bc9ad | ||
|
|
13fef1e5d3 | ||
|
|
e37d51cab6 | ||
|
|
52a3e8a261 | ||
|
|
e2e05ad89e | ||
|
|
f2e8399cc8 | ||
|
|
5341b04d68 | ||
|
|
b82ad19ed2 | ||
|
|
e805f8e263 | ||
|
|
1f5c579ef4 | ||
|
|
240cc289e6 | ||
|
|
7fa82900cb | ||
|
|
2f03e71e67 | ||
|
|
781f274d19 | ||
|
|
a8f804a618 | ||
|
|
98cce7dcd3 | ||
|
|
b3e3a31240 | ||
|
|
9828701de1 | ||
|
|
9870bfb9cd | ||
|
|
6da158388b | ||
|
|
24c0b01c38 | ||
|
|
588237ef30 | ||
|
|
e8f29be350 | ||
|
|
a28e888b36 | ||
|
|
cafce9ed23 | ||
|
|
8c4e29240c | ||
|
|
2d2b097fab | ||
|
|
d762a6b51f | ||
|
|
6a51672164 | ||
|
|
c844aaa7a6 | ||
|
|
a05fed9369 | ||
|
|
c26deb6b38 | ||
|
|
ffa5625134 | ||
|
|
bdccb1215a | ||
|
|
d966ba63e2 | ||
|
|
ec362ecbe2 | ||
|
|
56a0165a4e | ||
|
|
cedfad541d | ||
|
|
b31475c622 | ||
|
|
d03d6f6fd9 | ||
|
|
8fb0a9594c | ||
|
|
4eeba88905 | ||
|
|
8c1678a8c7 | ||
|
|
d799963870 | ||
|
|
7bba1d911b | ||
|
|
2e65434568 | ||
|
|
b416f5c0c8 | ||
|
|
8f199239b8 | ||
|
|
2a03a0087d | ||
|
|
f7cc125cac | ||
|
|
16eb935469 | ||
|
|
c70bb0ec28 | ||
|
|
0f85671630 | ||
|
|
78c014399f | ||
|
|
f69d236a4a | ||
|
|
0024824a6e | ||
|
|
210de0c66b | ||
|
|
5cce6529a4 | ||
|
|
bcc3463ff4 | ||
|
|
7cbe872af8 | ||
|
|
9f2d908316 | ||
|
|
3c1547925a | ||
|
|
fbd792ac7c | ||
|
|
8bd7a9d18e | ||
|
|
ede45f535e | ||
|
|
393816e7bd | ||
|
|
0fb95ebe66 | ||
|
|
7c7ae34eeb | ||
|
|
d578efba35 | ||
|
|
8dbf4cbe80 | ||
|
|
b5cd1e0fed | ||
|
|
6eae6df76f | ||
|
|
f5faac8859 | ||
|
|
4b6e41a939 | ||
|
|
6092422e10 | ||
|
|
c906041aa8 | ||
|
|
880bf06290 | ||
|
|
9efc29e3d1 | ||
|
|
d6957921f0 | ||
|
|
db13fba7ea | ||
|
|
49ebbe4bcd | ||
|
|
171b0b183b | ||
|
|
c80e406e95 | ||
|
|
dd10cf945c | ||
|
|
8f8455b24d | ||
|
|
bbae8cb88f | ||
|
|
4454204455 | ||
|
|
318a21e267 | ||
|
|
e71f4760db | ||
|
|
a5450be32e | ||
|
|
8b8d2a6535 | ||
|
|
1b6947e56c | ||
|
|
7979cef06a | ||
|
|
23ef836b48 | ||
|
|
766bbd6c6b | ||
|
|
64eb5a6082 | ||
|
|
8a4670e127 | ||
|
|
b1f649bca5 | ||
|
|
6d3485e798 | ||
|
|
82a3c2a557 | ||
|
|
e80834d783 | ||
|
|
7fdb7439e0 | ||
|
|
5d47833ae1 | ||
|
|
b1bffea9c7 | ||
|
|
e01b00aa54 | ||
|
|
47499c6db4 | ||
|
|
f327535eda | ||
|
|
cf122b6269 | ||
|
|
fe1b9ee6b8 | ||
|
|
907c57e324 | ||
|
|
3103f07e03 | ||
|
|
b14d74dd4d | ||
|
|
8393ba9dab | ||
|
|
eb3d1fa93c | ||
|
|
3a4d4c940c | ||
|
|
97741d41c5 | ||
|
|
7f5713b80a | ||
|
|
cb642ef658 | ||
|
|
5e2d0cf54e | ||
|
|
9aaa0fdce0 | ||
|
|
00baddf34c | ||
|
|
f97d3a76e7 | ||
|
|
5edf819524 | ||
|
|
610f46d83a | ||
|
|
c1badc1fa2 | ||
|
|
0d01cede03 | ||
|
|
63921e327d | ||
|
|
aab01b55db | ||
|
|
0da5803f5a | ||
|
|
a28eea5767 | ||
|
|
fa0b8f3368 | ||
|
|
12a373810c | ||
|
|
d57d08fd01 | ||
|
|
4339d21cf1 | ||
|
|
1960ac8d25 | ||
|
|
2ab04a4e32 | ||
|
|
985873c497 | ||
|
|
709a67d9bf | ||
|
|
9731ce5a40 | ||
|
|
cacaf487c3 | ||
|
|
135cb86215 | ||
|
|
d04fe0d3ea | ||
|
|
30151c99c7 | ||
|
|
ade482c17e | ||
|
|
87da56fb1e | ||
|
|
adb21782b8 | ||
|
|
3e5cda3405 | ||
|
|
dc30edf51c | ||
|
|
dff00ea91e | ||
|
|
0f48e6c36e | ||
|
|
a0800c9f15 | ||
|
|
2bcf581a23 | ||
|
|
22b6549a34 | ||
|
|
dacf96895a | ||
|
|
c37be7f5fb | ||
|
|
e92e199ec1 | ||
|
|
90fd840fb1 | ||
|
|
47a6b4d674 | ||
|
|
c4c79da071 | ||
|
|
429de77b3b | ||
|
|
04fcd2d2e0 | ||
|
|
ef7f4aea32 | ||
|
|
224263aa24 | ||
|
|
dc4b037957 | ||
|
|
1fa5d94591 |
6
.github/CONTRIBUTING.md
vendored
6
.github/CONTRIBUTING.md
vendored
@@ -44,7 +44,7 @@ If you are adding an issue, please try to keep it focused on a single, modular b
|
||||
If two issues are related, or blocking, please link them rather than combining them.
|
||||
|
||||
We will try to keep these issues as up to date as possible, though
|
||||
with the rapid rate of develop in this field some may get out of date.
|
||||
with the rapid rate of development in this field some may get out of date.
|
||||
If you notice this happening, please let us know.
|
||||
|
||||
### 🙋Getting Help
|
||||
@@ -87,7 +87,7 @@ This will install all requirements for running the package, examples, linting, f
|
||||
|
||||
❗Note: If during installation you receive a `WheelFileValidationError` for `debugpy`, please make sure you are running Poetry v1.5.1. This bug was present in older versions of Poetry (e.g. 1.4.1) and has been resolved in newer releases. If you are still seeing this bug on v1.5.1, you may also try disabling "modern installation" (`poetry config installer.modern-installation false`) and re-installing requirements. See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details.
|
||||
|
||||
Now, you should be able to run the common tasks in the following section. To double check, run `make test`, all tests should pass. If they don't you may need to pip install additional dependencies, such as `numexpr` and `openapi_schema_pydantic`.
|
||||
Now assuming `make` and `pytest` are installed, you should be able to run the common tasks in the following section. To double check, run `make test` under `libs/langchain`, all tests should pass. If they don't, you may need to pip install additional dependencies, such as `numexpr` and `openapi_schema_pydantic`.
|
||||
|
||||
## ✅ Common Tasks
|
||||
|
||||
@@ -134,7 +134,7 @@ We recognize linting can be annoying - if you do not want to do it, please conta
|
||||
### Spellcheck
|
||||
|
||||
Spellchecking for this project is done via [codespell](https://github.com/codespell-project/codespell).
|
||||
Note that `codespell` finds common typos, so could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words.
|
||||
Note that `codespell` finds common typos, so it could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words.
|
||||
|
||||
To check spelling for this project:
|
||||
|
||||
|
||||
12
.github/workflows/_release.yml
vendored
12
.github/workflows/_release.yml
vendored
@@ -31,13 +31,15 @@ jobs:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install poetry
|
||||
run: pipx install "poetry==$POETRY_VERSION"
|
||||
- name: Set up Python 3.10
|
||||
uses: actions/setup-python@v4
|
||||
|
||||
- name: Set up Python + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: "poetry"
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: release
|
||||
|
||||
- name: Build project for distribution
|
||||
run: poetry build
|
||||
- name: Check Version
|
||||
|
||||
32
.github/workflows/langchain_experimental_ci.yml
vendored
32
.github/workflows/langchain_experimental_ci.yml
vendored
@@ -81,3 +81,35 @@ jobs:
|
||||
|
||||
- name: Run tests
|
||||
run: make test
|
||||
extended-tests:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ env.WORKDIR }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: Python ${{ matrix.python-version }} extended tests
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: libs/experimental
|
||||
cache-key: extended
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Running extended tests, installing dependencies with poetry..."
|
||||
poetry install -E extended_testing
|
||||
|
||||
- name: Run extended tests
|
||||
run: make extended_tests
|
||||
|
||||
@@ -228,7 +228,7 @@ Classes
|
||||
:toctree: {module}
|
||||
"""
|
||||
|
||||
for class_ in classes:
|
||||
for class_ in sorted(classes, key=lambda c: c["qualified_name"]):
|
||||
if not class_["is_public"]:
|
||||
continue
|
||||
|
||||
|
||||
@@ -341,7 +341,7 @@
|
||||
"HugeGraph QA Chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_hugegraph_qa",
|
||||
"GraphSparqlQAChain": "https://python.langchain.com/docs/use_cases/more/graph/graph_sparql_qa",
|
||||
"ArangoDB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_arangodb_qa",
|
||||
"Graph DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa",
|
||||
"Neo4j DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa",
|
||||
"How to use a SmartLLMChain": "https://python.langchain.com/docs/use_cases/more/self_check/smart_llm",
|
||||
"Multi-Agent Simulated Environment: Petting Zoo": "https://python.langchain.com/docs/use_cases/agent_simulations/petting_zoo",
|
||||
"Multi-agent decentralized speaker selection": "https://python.langchain.com/docs/use_cases/agent_simulations/multiagent_bidding",
|
||||
@@ -3202,10 +3202,10 @@
|
||||
"Graph QA": "https://python.langchain.com/docs/use_cases/more/graph/graph_qa"
|
||||
},
|
||||
"GraphCypherQAChain": {
|
||||
"Graph DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa"
|
||||
"Neo4j DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa"
|
||||
},
|
||||
"Neo4jGraph": {
|
||||
"Graph DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa"
|
||||
"Neo4j DB QA chain": "https://python.langchain.com/docs/use_cases/more/graph/graph_cypher_qa"
|
||||
},
|
||||
"LLMBashChain": {
|
||||
"Bash chain": "https://python.langchain.com/docs/use_cases/more/code_writing/llm_bash"
|
||||
|
||||
@@ -5,9 +5,10 @@
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<meta http-equiv="Refresh" content="0; url={{ redirect }}" />
|
||||
<meta name="Description" content="scikit-learn: machine learning in Python">
|
||||
<meta name="robots" content="follow, index">
|
||||
<meta name="Description" content="Python API reference for LangChain.">
|
||||
<link rel="canonical" href="{{ redirect }}" />
|
||||
<title>scikit-learn: machine learning in Python</title>
|
||||
<title>LangChain Python API Reference Documentation.</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>You will be automatically redirected to the <a href="{{ redirect }}">new location of this page</a>.</p>
|
||||
|
||||
14
docs/docs_skeleton/docs/expression_language/index.mdx
Normal file
14
docs/docs_skeleton/docs/expression_language/index.mdx
Normal file
@@ -0,0 +1,14 @@
|
||||
---
|
||||
sidebar_class_name: hidden
|
||||
---
|
||||
|
||||
# LangChain Expression Language (LCEL)
|
||||
|
||||
LangChain Expression Language or LCEL is a declarative way to easily compose chains together.
|
||||
Any chain constructed this way will automatically have full sync, async, and streaming support.
|
||||
|
||||
#### [Interface](/docs/expression_language/interface)
|
||||
The base interface shared by all LCEL objects
|
||||
|
||||
#### [Cookbook](/docs/expression_language/cookbook)
|
||||
Examples of common LCEL usage patterns
|
||||
@@ -42,23 +42,22 @@ Log and stream intermediate steps of any chain
|
||||
## Examples, ecosystem, and resources
|
||||
### [Use cases](/docs/use_cases/)
|
||||
Walkthroughs and best-practices for common end-to-end use cases, like:
|
||||
- [Chatbots](/docs/use_cases/chatbots/)
|
||||
- [Chatbots](/docs/use_cases/chatbots)
|
||||
- [Answering questions using sources](/docs/use_cases/question_answering/)
|
||||
- [Analyzing structured data](/docs/use_cases/tabular.html)
|
||||
- [Analyzing structured data](/docs/use_cases/sql)
|
||||
- and much more...
|
||||
|
||||
### [Guides](/docs/guides/)
|
||||
Learn best practices for developing with LangChain.
|
||||
|
||||
### [Ecosystem](/docs/ecosystem/)
|
||||
LangChain is part of a rich ecosystem of tools that integrate with our framework and build on top of it. Check out our growing list of [integrations](/docs/integrations/) and [dependent repos](/docs/ecosystem/dependents).
|
||||
LangChain is part of a rich ecosystem of tools that integrate with our framework and build on top of it. Check out our growing list of [integrations](/docs/integrations/) and [dependent repos](/docs/additional_resources/dependents).
|
||||
|
||||
### [Additional resources](/docs/additional_resources/)
|
||||
Our community is full of prolific developers, creative builders, and fantastic teachers. Check out [YouTube tutorials](/docs/additional_resources/youtube.html) for great tutorials from folks in the community, and [Gallery](https://github.com/kyrolabs/awesome-langchain) for a list of awesome LangChain projects, compiled by the folks at [KyroLabs](https://kyrolabs.com).
|
||||
|
||||
<h3><span style={{color:"#2e8555"}}> Support </span></h3>
|
||||
|
||||
Join us on [GitHub](https://github.com/hwchase17/langchain) or [Discord](https://discord.gg/6adMQxSpJS) to ask questions, share feedback, meet other developers building with LangChain, and dream about the future of LLM’s.
|
||||
### [Community](/docs/community)
|
||||
Head to the [Community navigator](/docs/community) to find places to ask questions, share feedback, meet other developers, and dream about the future of LLM’s.
|
||||
|
||||
## API reference
|
||||
|
||||
|
||||
@@ -1,7 +1,3 @@
|
||||
---
|
||||
sidebar_position: 6
|
||||
---
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
# Evaluation
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
# LangChain Expression Language
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
LangChain Expression Language is a declarative way to easily compose chains together.
|
||||
Any chain constructed this way will automatically have full sync, async, and streaming support.
|
||||
See guides below for how to interact with chains constructed this way as well as cookbook examples.
|
||||
|
||||
<DocCardList />
|
||||
@@ -1,6 +1,7 @@
|
||||
# Preventing harmful outputs
|
||||
# Moderation
|
||||
|
||||
One of the key concerns with using LLMs is that they may generate harmful or unethical text. This is an area of active research in the field. Here we present some built-in chains inspired by this research, which are intended to make the outputs of LLMs safer.
|
||||
|
||||
- [Moderation chain](/docs/use_cases/safety/moderation): Explicitly check if any output text is harmful and flag it.
|
||||
- [Constitutional chain](/docs/use_cases/safety/constitutional_chain): Prompt the model with a set of principles which should guide it's behavior.
|
||||
- [Moderation chain](/docs/guides/safety/moderation): Explicitly check if any output text is harmful and flag it.
|
||||
- [Constitutional chain](/docs/guides/safety/constitutional_chain): Prompt the model with a set of principles which should guide it's behavior.
|
||||
- [Amazon Comprehend moderation chain](/docs/guides/safety/amazon_comprehend_chain): Use [Amazon Comprehend](https://aws.amazon.com/comprehend/) to detect and handle PII and toxicity.
|
||||
|
||||
@@ -37,11 +37,11 @@ This agent is designed to be used in conversational settings.
|
||||
The prompt is designed to make the agent helpful and conversational.
|
||||
It uses the ReAct framework to decide which tool to use, and uses memory to remember the previous conversation interactions.
|
||||
|
||||
### [Self ask with search](/docs/modules/agents/agent_types/self_ask_with_search.html)
|
||||
### [Self-ask with search](/docs/modules/agents/agent_types/self_ask_with_search.html)
|
||||
|
||||
This agent utilizes a single tool that should be named `Intermediate Answer`.
|
||||
This tool should be able to lookup factual answers to questions. This agent
|
||||
is equivalent to the original [self ask with search paper](https://ofir.io/self-ask.pdf),
|
||||
is equivalent to the original [self-ask with search paper](https://ofir.io/self-ask.pdf),
|
||||
where a Google search API was provided as the tool.
|
||||
|
||||
### [ReAct document store](/docs/modules/agents/agent_types/react_docstore.html)
|
||||
@@ -54,4 +54,4 @@ This agent is equivalent to the
|
||||
original [ReAct paper](https://arxiv.org/pdf/2210.03629.pdf), specifically the Wikipedia example.
|
||||
|
||||
## [Plan-and-execute agents](/docs/modules/agents/agent_types/plan_and_execute.html)
|
||||
Plan and execute agents accomplish an objective by first planning what to do, then executing the sub tasks. This idea is largely inspired by [BabyAGI](https://github.com/yoheinakajima/babyagi) and then the ["Plan-and-Solve" paper](https://arxiv.org/abs/2305.04091).
|
||||
Plan-and-execute agents accomplish an objective by first planning what to do, then executing the sub tasks. This idea is largely inspired by [BabyAGI](https://github.com/yoheinakajima/babyagi) and then the ["Plan-and-Solve" paper](https://arxiv.org/abs/2305.04091).
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Plan and execute
|
||||
# Plan-and-execute
|
||||
|
||||
Plan and execute agents accomplish an objective by first planning what to do, then executing the sub tasks. This idea is largely inspired by [BabyAGI](https://github.com/yoheinakajima/babyagi) and then the ["Plan-and-Solve" paper](https://arxiv.org/abs/2305.04091).
|
||||
Plan-and-execute agents accomplish an objective by first planning what to do, then executing the sub tasks. This idea is largely inspired by [BabyAGI](https://github.com/yoheinakajima/babyagi) and then the ["Plan-and-Solve" paper](https://arxiv.org/abs/2305.04091).
|
||||
|
||||
The planning is almost always done by an LLM.
|
||||
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
# Custom LLM Agent
|
||||
# Custom LLM agent
|
||||
|
||||
This notebook goes through how to create your own custom LLM agent.
|
||||
|
||||
An LLM agent consists of three parts:
|
||||
|
||||
- PromptTemplate: This is the prompt template that can be used to instruct the language model on what to do
|
||||
- `PromptTemplate`: This is the prompt template that can be used to instruct the language model on what to do
|
||||
- LLM: This is the language model that powers the agent
|
||||
- `stop` sequence: Instructs the LLM to stop generating as soon as this string is found
|
||||
- OutputParser: This determines how to parse the LLMOutput into an AgentAction or AgentFinish object
|
||||
- `OutputParser`: This determines how to parse the LLM output into an `AgentAction` or `AgentFinish` object
|
||||
|
||||
import Example from "@snippets/modules/agents/how_to/custom_llm_agent.mdx"
|
||||
|
||||
|
||||
@@ -4,10 +4,10 @@ This notebook goes through how to create your own custom agent based on a chat m
|
||||
|
||||
An LLM chat agent consists of three parts:
|
||||
|
||||
- PromptTemplate: This is the prompt template that can be used to instruct the language model on what to do
|
||||
- ChatModel: This is the language model that powers the agent
|
||||
- `PromptTemplate`: This is the prompt template that can be used to instruct the language model on what to do
|
||||
- `ChatModel`: This is the language model that powers the agent
|
||||
- `stop` sequence: Instructs the LLM to stop generating as soon as this string is found
|
||||
- OutputParser: This determines how to parse the LLMOutput into an AgentAction or AgentFinish object
|
||||
- `OutputParser`: This determines how to parse the LLM output into an `AgentAction` or `AgentFinish` object
|
||||
|
||||
import Example from "@snippets/modules/agents/how_to/custom_llm_chat_agent.mdx"
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Conversation buffer memory
|
||||
# Conversation Buffer
|
||||
|
||||
This notebook shows how to use `ConversationBufferMemory`. This memory allows for storing of messages and then extracts the messages in a variable.
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Conversation buffer window memory
|
||||
# Conversation Buffer Window
|
||||
|
||||
`ConversationBufferWindowMemory` keeps a list of the interactions of the conversation over time. It only uses the last K interactions. This can be useful for keeping a sliding window of the most recent interactions, so the buffer does not get too large
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Entity memory
|
||||
# Entity
|
||||
|
||||
Entity Memory remembers given facts about specific entities in a conversation. It extracts information on entities (using an LLM) and builds up its knowledge about that entity over time (also using an LLM).
|
||||
|
||||
|
||||
@@ -4,5 +4,5 @@ sidebar_position: 2
|
||||
# Memory Types
|
||||
|
||||
There are many different types of memory.
|
||||
Each have their own parameters, their own return types, and are useful in different scenarios.
|
||||
Each has their own parameters, their own return types, and is useful in different scenarios.
|
||||
Please see their individual page for more detail on each one.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Conversation summary memory
|
||||
# Conversation Summary
|
||||
Now let's take a look at using a slightly more complex type of memory - `ConversationSummaryMemory`. This type of memory creates a summary of the conversation over time. This can be useful for condensing information from the conversation over time.
|
||||
Conversation summary memory summarizes the conversation as it happens and stores the current summary in memory. This memory can then be used to inject the summary of the conversation so far into a prompt/chain. This memory is most useful for longer conversations, where keeping the past message history in the prompt verbatim would take up too many tokens.
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Vector store-backed memory
|
||||
# Backed by a Vector Store
|
||||
|
||||
`VectorStoreRetrieverMemory` stores memories in a VectorDB and queries the top-K most "salient" docs every time it is called.
|
||||
|
||||
|
||||
@@ -44,6 +44,16 @@ module.exports = {
|
||||
id: "modules/index"
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "LangChain Expression Language",
|
||||
collapsed: true,
|
||||
items: [{ type: "autogenerated", dirName: "expression_language" } ],
|
||||
link: {
|
||||
type: 'doc',
|
||||
id: "expression_language/index"
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Guides",
|
||||
@@ -52,17 +62,7 @@ module.exports = {
|
||||
link: {
|
||||
type: 'generated-index',
|
||||
description: 'Design guides for key parts of the development process',
|
||||
slug: "guides",
|
||||
},
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Ecosystem",
|
||||
collapsed: true,
|
||||
items: [{ type: "autogenerated", dirName: "ecosystem" }],
|
||||
link: {
|
||||
type: 'generated-index',
|
||||
slug: "ecosystem",
|
||||
slug: "guides",
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -72,7 +72,7 @@ module.exports = {
|
||||
items: [{ type: "autogenerated", dirName: "additional_resources" }, { type: "link", label: "Gallery", href: "https://github.com/kyrolabs/awesome-langchain" }],
|
||||
link: {
|
||||
type: 'generated-index',
|
||||
slug: "additional_resources",
|
||||
slug: "additional_resources",
|
||||
},
|
||||
},
|
||||
'community'
|
||||
|
||||
BIN
docs/docs_skeleton/static/img/ReAct.png
Normal file
BIN
docs/docs_skeleton/static/img/ReAct.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 42 KiB |
BIN
docs/docs_skeleton/static/img/agents_use_case_1.png
Normal file
BIN
docs/docs_skeleton/static/img/agents_use_case_1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 236 KiB |
BIN
docs/docs_skeleton/static/img/agents_use_case_trace_1.png
Normal file
BIN
docs/docs_skeleton/static/img/agents_use_case_trace_1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 74 KiB |
BIN
docs/docs_skeleton/static/img/agents_use_case_trace_2.png
Normal file
BIN
docs/docs_skeleton/static/img/agents_use_case_trace_2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 166 KiB |
BIN
docs/docs_skeleton/static/img/agents_vs_chains.png
Normal file
BIN
docs/docs_skeleton/static/img/agents_vs_chains.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 42 KiB |
BIN
docs/docs_skeleton/static/img/oai_function_agent.png
Normal file
BIN
docs/docs_skeleton/static/img/oai_function_agent.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 177 KiB |
@@ -2952,6 +2952,46 @@
|
||||
"source": "/docs/modules/model_io/models/llms/integrations/writer",
|
||||
"destination": "/docs/integrations/llms/writer"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/llms/amazon_api_gateway_example",
|
||||
"destination": "/docs/integrations/llms/amazon_api_gateway"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/llms/azureml_endpoint_example",
|
||||
"destination": "/docs/integrations/llms/azure_ml"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/llms/azure_openai_example",
|
||||
"destination": "/docs/integrations/llms/azure_openai"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/llms/cerebriumai_example",
|
||||
"destination": "/docs/integrations/llms/cerebriumai"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/llms/deepinfra_example",
|
||||
"destination": "/docs/integrations/llms/deepinfra"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/llms/Fireworks",
|
||||
"destination": "/docs/integrations/llms/fireworks"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/llms/forefrontai_example",
|
||||
"destination": "/docs/integrations/llms/forefrontai"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/llms/gooseai_example",
|
||||
"destination": "/docs/integrations/llms/gooseai"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/llms/petals_example",
|
||||
"destination": "/docs/integrations/llms/petals"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/llms/pipelineai_example",
|
||||
"destination": "/docs/integrations/llms/pipelineai"
|
||||
},
|
||||
{
|
||||
"source": "/en/latest/modules/prompts.html",
|
||||
"destination": "/docs/modules/model_io/prompts"
|
||||
@@ -3547,6 +3587,18 @@
|
||||
{
|
||||
"source": "/en/latest/integrations/:path*",
|
||||
"destination": "/docs/integrations/providers/:path*"
|
||||
},
|
||||
{
|
||||
"source": "/docs/guides/expression_language(/?)",
|
||||
"destination": "/docs/expression_language/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/guides/expression_language/:path*",
|
||||
"destination": "/docs/expression_language/:path*"
|
||||
},
|
||||
{
|
||||
"source": "/docs/ecosystem/dependents",
|
||||
"destination": "/docs/additional_resources/dependents"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -51,7 +51,7 @@ Dependents stats for `langchain-ai/langchain`
|
||||
|[e2b-dev/e2b](https://github.com/e2b-dev/e2b) | 5365 |
|
||||
|[mage-ai/mage-ai](https://github.com/mage-ai/mage-ai) | 5352 |
|
||||
|[wenda-LLM/wenda](https://github.com/wenda-LLM/wenda) | 5192 |
|
||||
|[LangChain-Chinese-Getting-Started-Guide](https://github.com/liaokongVFX/LangChain-Chinese-Getting-Started-Guide) | 5129 |
|
||||
|[liaokongVFX/LangChain-Chinese-Getting-Started-Guide](https://github.com/liaokongVFX/LangChain-Chinese-Getting-Started-Guide) | 5129 |
|
||||
|[zilliztech/GPTCache](https://github.com/zilliztech/GPTCache) | 4993 |
|
||||
|[GreyDGL/PentestGPT](https://github.com/GreyDGL/PentestGPT) | 4831 |
|
||||
|[zauberzeug/nicegui](https://github.com/zauberzeug/nicegui) | 4824 |
|
||||
@@ -62,7 +62,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 3,
|
||||
"id": "d1850a1f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -72,7 +72,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 4,
|
||||
"id": "56d0669f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -170,6 +170,36 @@
|
||||
"chain.batch([{\"topic\": \"bears\"}, {\"topic\": \"cats\"}])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2434ab15",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can set the number of concurrent requests by using the `max_concurrency` parameter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a08522f6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[AIMessage(content=\"Why don't bears wear shoes?\\n\\nBecause they have bear feet!\", additional_kwargs={}, example=False),\n",
|
||||
" AIMessage(content=\"Why don't cats play poker in the wild?\\n\\nToo many cheetahs!\", additional_kwargs={}, example=False)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chain.batch([{\"topic\": \"bears\"}, {\"topic\": \"cats\"}], config={\"max_concurrency\": 5})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b960cbfe",
|
||||
@@ -8,7 +8,7 @@ Here's a few different tools and functionalities to aid in debugging.
|
||||
|
||||
## Tracing
|
||||
|
||||
Platforms with tracing capabilities like [LangSmith](/docs/guides/langsmith/) and [WandB](/docs/ecosystem/integrations/agent_with_wandb_tracing) are the most comprehensive solutions for debugging. These platforms make it easy to not only log and visualize LLM apps, but also to actively debug, test and refine them.
|
||||
Platforms with tracing capabilities like [LangSmith](/docs/guides/langsmith/) and [WandB](/docs/integrations/providers/wandb_tracing) are the most comprehensive solutions for debugging. These platforms make it easy to not only log and visualize LLM apps, but also to actively debug, test and refine them.
|
||||
|
||||
For anyone building production-grade LLM applications, we highly recommend using a platform like this.
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
"id": "b8982428",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Private, local, open source LLMs\n",
|
||||
"# Run LLMs locally\n",
|
||||
"\n",
|
||||
"## Use case\n",
|
||||
"\n",
|
||||
@@ -799,7 +799,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
1
docs/extras/guides/privacy/_category_.yml
Normal file
1
docs/extras/guides/privacy/_category_.yml
Normal file
@@ -0,0 +1 @@
|
||||
label: 'Privacy'
|
||||
451
docs/extras/guides/privacy/presidio_data_anonymization.ipynb
Normal file
451
docs/extras/guides/privacy/presidio_data_anonymization.ipynb
Normal file
@@ -0,0 +1,451 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Data anonymization with Microsoft Presidio\n",
|
||||
"\n",
|
||||
"[](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/guides/privacy/presidio_data_anonymization.ipynb)\n",
|
||||
"\n",
|
||||
"## Use case\n",
|
||||
"\n",
|
||||
"Data anonymization is crucial before passing information to a language model like GPT-4 because it helps protect privacy and maintain confidentiality. If data is not anonymized, sensitive information such as names, addresses, contact numbers, or other identifiers linked to specific individuals could potentially be learned and misused. Hence, by obscuring or removing this personally identifiable information (PII), data can be used freely without compromising individuals' privacy rights or breaching data protection laws and regulations.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"Anonynization consists of two steps:\n",
|
||||
"\n",
|
||||
"1. **Identification:** Identify all data fields that contain personally identifiable information (PII).\n",
|
||||
"2. **Replacement**: Replace all PIIs with pseudo values or codes that do not reveal any personal information about the individual but can be used for reference. We're not using regular encryption, because the language model won't be able to understand the meaning or context of the encrypted data.\n",
|
||||
"\n",
|
||||
"We use *Microsoft Presidio* together with *Faker* framework for anonymization purposes because of the wide range of functionalities they provide. The full implementation is available in `PresidioAnonymizer`.\n",
|
||||
"\n",
|
||||
"## Quickstart\n",
|
||||
"\n",
|
||||
"Below you will find the use case on how to leverage anonymization in LangChain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install necessary packages\n",
|
||||
"# ! pip install langchain langchain-experimental openai presidio-analyzer presidio-anonymizer spacy Faker\n",
|
||||
"# ! python -m spacy download en_core_web_lg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\\\n",
|
||||
"Let's see how PII anonymization works using a sample sentence:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'My name is Mrs. Rachel Chen DDS, call me at 849-829-7628x073 or email me at christopherfrey@example.org'"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_experimental.data_anonymizer import PresidioAnonymizer\n",
|
||||
"\n",
|
||||
"anonymizer = PresidioAnonymizer()\n",
|
||||
"\n",
|
||||
"anonymizer.anonymize(\n",
|
||||
" \"My name is Slim Shady, call me at 313-666-7440 or email me at real.slim.shady@gmail.com\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using with LangChain Expression Language\n",
|
||||
"\n",
|
||||
"With LCEL we can easily chain together anonymization with the rest of our application."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set env var OPENAI_API_KEY or load from a .env file:\n",
|
||||
"# import dotenv\n",
|
||||
"\n",
|
||||
"# dotenv.load_dotenv()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='You can find our super secret data at https://www.ross.com/', additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.prompts.prompt import PromptTemplate\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.schema.runnable import RunnablePassthrough\n",
|
||||
"\n",
|
||||
"template = \"\"\"According to this text, where can you find our super secret data?\n",
|
||||
"\n",
|
||||
"{anonymized_text}\n",
|
||||
"\n",
|
||||
"Answer:\"\"\"\n",
|
||||
"prompt = PromptTemplate.from_template(template)\n",
|
||||
"llm = ChatOpenAI()\n",
|
||||
"\n",
|
||||
"chain = {\"anonymized_text\": anonymizer.anonymize} | prompt | llm\n",
|
||||
"chain.invoke(\"You can find our super secret data at https://supersecretdata.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customization\n",
|
||||
"We can specify ``analyzed_fields`` to only anonymize particular types of data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'My name is Gabrielle Edwards, call me at 313-666-7440 or email me at real.slim.shady@gmail.com'"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"anonymizer = PresidioAnonymizer(analyzed_fields=[\"PERSON\"])\n",
|
||||
"\n",
|
||||
"anonymizer.anonymize(\n",
|
||||
" \"My name is Slim Shady, call me at 313-666-7440 or email me at real.slim.shady@gmail.com\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As can be observed, the name was correctly identified and replaced with another. The `analyzed_fields` attribute is responsible for what values are to be detected and substituted. We can add *PHONE_NUMBER* to the list:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'My name is Victoria Mckinney, call me at 713-549-8623 or email me at real.slim.shady@gmail.com'"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"anonymizer = PresidioAnonymizer(analyzed_fields=[\"PERSON\", \"PHONE_NUMBER\"])\n",
|
||||
"anonymizer.anonymize(\n",
|
||||
" \"My name is Slim Shady, call me at 313-666-7440 or email me at real.slim.shady@gmail.com\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\\\n",
|
||||
"If no analyzed_fields are specified, by default the anonymizer will detect all supported formats. Below is the full list of them:\n",
|
||||
"\n",
|
||||
"`['PERSON', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'IBAN_CODE', 'CREDIT_CARD', 'CRYPTO', 'IP_ADDRESS', 'LOCATION', 'DATE_TIME', 'NRP', 'MEDICAL_LICENSE', 'URL', 'US_BANK_NUMBER', 'US_DRIVER_LICENSE', 'US_ITIN', 'US_PASSPORT', 'US_SSN']`\n",
|
||||
"\n",
|
||||
"**Disclaimer:** We suggest carefully defining the private data to be detected - Presidio doesn't work perfectly and it sometimes makes mistakes, so it's better to have more control over the data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'My name is Billy Russo, call me at 970-996-9453x038 or email me at jamie80@example.org'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"anonymizer = PresidioAnonymizer()\n",
|
||||
"anonymizer.anonymize(\n",
|
||||
" \"My name is Slim Shady, call me at 313-666-7440 or email me at real.slim.shady@gmail.com\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\\\n",
|
||||
"It may be that the above list of detected fields is not sufficient. For example, the already available *PHONE_NUMBER* field does not support polish phone numbers and confuses it with another field:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'My polish phone number is EVIA70648911396944'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"anonymizer = PresidioAnonymizer()\n",
|
||||
"anonymizer.anonymize(\"My polish phone number is 666555444\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\\\n",
|
||||
"You can then write your own recognizers and add them to the pool of those present. How exactly to create recognizers is described in the [Presidio documentation](https://microsoft.github.io/presidio/samples/python/customizing_presidio_analyzer/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define the regex pattern in a Presidio `Pattern` object:\n",
|
||||
"from presidio_analyzer import Pattern, PatternRecognizer\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"polish_phone_numbers_pattern = Pattern(\n",
|
||||
" name=\"polish_phone_numbers_pattern\",\n",
|
||||
" regex=\"(?<!\\w)(\\(?(\\+|00)?48\\)?)?[ -]?\\d{3}[ -]?\\d{3}[ -]?\\d{3}(?!\\w)\",\n",
|
||||
" score=1,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Define the recognizer with one or more patterns\n",
|
||||
"polish_phone_numbers_recognizer = PatternRecognizer(\n",
|
||||
" supported_entity=\"POLISH_PHONE_NUMBER\", patterns=[polish_phone_numbers_pattern]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\\\n",
|
||||
"Now, we can add recognizer by calling `add_recognizer` method on the anonymizer:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"anonymizer.add_recognizer(polish_phone_numbers_recognizer)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\\\n",
|
||||
"And voilà! With the added pattern-based recognizer, the anonymizer now handles polish phone numbers."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"My polish phone number is <POLISH_PHONE_NUMBER>\n",
|
||||
"My polish phone number is <POLISH_PHONE_NUMBER>\n",
|
||||
"My polish phone number is <POLISH_PHONE_NUMBER>\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(anonymizer.anonymize(\"My polish phone number is 666555444\"))\n",
|
||||
"print(anonymizer.anonymize(\"My polish phone number is 666 555 444\"))\n",
|
||||
"print(anonymizer.anonymize(\"My polish phone number is +48 666 555 444\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\\\n",
|
||||
"The problem is - even though we recognize polish phone numbers now, we don't have a method (operator) that would tell how to substitute a given field - because of this, in the outpit we only provide string `<POLISH_PHONE_NUMBER>` We need to create a method to replace it correctly: "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'+48 533 220 543'"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from faker import Faker\n",
|
||||
"\n",
|
||||
"fake = Faker(locale=\"pl_PL\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def fake_polish_phone_number(_=None):\n",
|
||||
" return fake.phone_number()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"fake_polish_phone_number()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\\\n",
|
||||
"We used Faker to create pseudo data. Now we can create an operator and add it to the anonymizer. For complete information about operators and their creation, see the Presidio documentation for [simple](https://microsoft.github.io/presidio/tutorial/10_simple_anonymization/) and [custom](https://microsoft.github.io/presidio/tutorial/11_custom_anonymization/) anonymization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from presidio_anonymizer.entities import OperatorConfig\n",
|
||||
"\n",
|
||||
"new_operators = {\n",
|
||||
" \"POLISH_PHONE_NUMBER\": OperatorConfig(\n",
|
||||
" \"custom\", {\"lambda\": fake_polish_phone_number}\n",
|
||||
" )\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"anonymizer.add_operators(new_operators)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'My polish phone number is +48 692 715 636'"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"anonymizer.anonymize(\"My polish phone number is 666555444\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Future works\n",
|
||||
"\n",
|
||||
"- **deanonymization** - add the ability to reverse anonymization. For example, the workflow could look like this: `anonymize -> LLMChain -> deanonymize`. By doing this, we will retain anonymity in requests to, for example, OpenAI, and then be able restore the original data.\n",
|
||||
"- **instance anonymization** - at this point, each occurrence of PII is treated as a separate entity and separately anonymized. Therefore, two occurrences of the name John Doe in the text will be changed to two different names. It is therefore worth introducing support for full instance detection, so that repeated occurrences are treated as a single object."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -1,10 +1,10 @@
|
||||
# Pydantic Compatibility
|
||||
# Pydantic compatibility
|
||||
|
||||
- Pydantic v2 was released in June, 2023 (https://docs.pydantic.dev/2.0/blog/pydantic-v2-final/)
|
||||
- v2 contains has a number of breaking changes (https://docs.pydantic.dev/2.0/migration/)
|
||||
- Pydantic v2 and v1 are under the same package name, so both versions cannot be installed at the same time
|
||||
|
||||
## LangChain Pydantic Migration Plan
|
||||
## LangChain Pydantic migration plan
|
||||
|
||||
As of `langchain>=0.0.267`, LangChain will allow users to install either Pydantic V1 or V2.
|
||||
* Internally LangChain will continue to [use V1](https://docs.pydantic.dev/latest/migration/#continue-using-pydantic-v1-features).
|
||||
|
||||
1390
docs/extras/guides/safety/amazon_comprehend_chain.ipynb
Normal file
1390
docs/extras/guides/safety/amazon_comprehend_chain.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
63
docs/extras/integrations/callbacks/llmonitor.md
Normal file
63
docs/extras/integrations/callbacks/llmonitor.md
Normal file
@@ -0,0 +1,63 @@
|
||||
# LLMonitor
|
||||
|
||||
[LLMonitor](https://llmonitor.com) is an open-source observability platform that provides cost tracking, user tracking and powerful agent tracing.
|
||||
|
||||
<video controls width='100%' >
|
||||
<source src='https://llmonitor.com/videos/demo-annotated.mp4'/>
|
||||
</video>
|
||||
|
||||
## Setup
|
||||
Create an account on [llmonitor.com](https://llmonitor.com), create an `App`, and then copy the associated `tracking id`.
|
||||
Once you have it, set it as an environment variable by running:
|
||||
```bash
|
||||
export LLMONITOR_APP_ID="..."
|
||||
```
|
||||
|
||||
If you'd prefer not to set an environment variable, you can pass the key directly when initializing the callback handler:
|
||||
```python
|
||||
from langchain.callbacks import LLMonitorCallbackHandler
|
||||
|
||||
handler = LLMonitorCallbackHandler(app_id="...")
|
||||
```
|
||||
|
||||
## Usage with LLM/Chat models
|
||||
```python
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.callbacks import LLMonitorCallbackHandler
|
||||
|
||||
handler = LLMonitorCallbackHandler(app_id="...")
|
||||
|
||||
llm = OpenAI(
|
||||
callbacks=[handler],
|
||||
)
|
||||
|
||||
chat = ChatOpenAI(
|
||||
callbacks=[handler],
|
||||
metadata={"userId": "123"}, # you can assign user ids to models in the metadata
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
## Usage with agents
|
||||
```python
|
||||
from langchain.agents import load_tools, initialize_agent, AgentType
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.callbacks import LLMonitorCallbackHandler
|
||||
|
||||
handler = LLMonitorCallbackHandler(app_id="...")
|
||||
|
||||
llm = OpenAI(temperature=0)
|
||||
tools = load_tools(["serpapi", "llm-math"], llm=llm)
|
||||
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION)
|
||||
agent.run(
|
||||
"Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?",
|
||||
callbacks=[handler],
|
||||
metadata={
|
||||
"agentName": "Leo DiCaprio's girlfriend", # you can assign a custom agent in the metadata
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
## Support
|
||||
For any question or issue with integration you can reach out to the LLMonitor team on [Discord](http://discord.com/invite/8PafSG58kK) or via [email](mailto:vince@llmonitor.com).
|
||||
106
docs/extras/integrations/chat/bedrock.ipynb
Normal file
106
docs/extras/integrations/chat/bedrock.ipynb
Normal file
@@ -0,0 +1,106 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bf733a38-db84-4363-89e2-de6735c37230",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Bedrock Chat\n",
|
||||
"\n",
|
||||
"[Amazon Bedrock](https://aws.amazon.com/bedrock/) is a fully managed service that makes FMs from leading AI startups and Amazon available via an API, so you can choose from a wide range of FMs to find the model that is best suited for your use case"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d51edc81",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install boto3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "d4a7c55d-b235-4ca4-a579-c90cc9570da9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import BedrockChat\n",
|
||||
"from langchain.schema import HumanMessage"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "70cf04e8-423a-4ff6-8b09-f11fb711c817",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat = BedrockChat(model_id=\"anthropic.claude-v2\", model_kwargs={\"temperature\":0.1})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "8199ef8f-eb8b-4253-9ea0-6c24a013ca4c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\" Voici la traduction en français : J'adore programmer.\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"Translate this sentence from English to French. I love programming.\"\n",
|
||||
" )\n",
|
||||
"]\n",
|
||||
"chat(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c253883f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
325
docs/extras/integrations/chat_loaders/discord.ipynb
Normal file
325
docs/extras/integrations/chat_loaders/discord.ipynb
Normal file
@@ -0,0 +1,325 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c4ff9336-1cf3-459e-bd70-d1314c1da6a0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Discord\n",
|
||||
"\n",
|
||||
"This notebook shows how to create your own chat loader that works on copy-pasted messages (from dms) to a list of LangChain messages.\n",
|
||||
"\n",
|
||||
"The process has four steps:\n",
|
||||
"1. Create the chat .txt file by copying chats from the Discord app and pasting them in a file on your local computer\n",
|
||||
"2. Copy the chat loader definition from below to a local file.\n",
|
||||
"3. Initialize the `DiscordChatLoader` with the file path pointed to the text file.\n",
|
||||
"4. Call `loader.load()` (or `loader.lazy_load()`) to perform the conversion.\n",
|
||||
"\n",
|
||||
"## 1. Creat message dump\n",
|
||||
"\n",
|
||||
"Currently (2023/08/23) this loader only supports .txt files in the format generated by copying messages in the app to your clipboard and pasting in a file. Below is an example."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e4ccfdfa-6869-4d67-90a0-ab99f01b7553",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Overwriting discord_chats.txt\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%writefile discord_chats.txt\n",
|
||||
"talkingtower — 08/15/2023 11:10 AM\n",
|
||||
"Love music! Do you like jazz?\n",
|
||||
"reporterbob — 08/15/2023 9:27 PM\n",
|
||||
"Yes! Jazz is fantastic. Ever heard this one?\n",
|
||||
"Website\n",
|
||||
"Listen to classic jazz track...\n",
|
||||
"\n",
|
||||
"talkingtower — Yesterday at 5:03 AM\n",
|
||||
"Indeed! Great choice. 🎷\n",
|
||||
"reporterbob — Yesterday at 5:23 AM\n",
|
||||
"Thanks! How about some virtual sightseeing?\n",
|
||||
"Website\n",
|
||||
"Virtual tour of famous landmarks...\n",
|
||||
"\n",
|
||||
"talkingtower — Today at 2:38 PM\n",
|
||||
"Sounds fun! Let's explore.\n",
|
||||
"reporterbob — Today at 2:56 PM\n",
|
||||
"Enjoy the tour! See you around.\n",
|
||||
"talkingtower — Today at 3:00 PM\n",
|
||||
"Thank you! Goodbye! 👋\n",
|
||||
"reporterbob — Today at 3:02 PM\n",
|
||||
"Farewell! Happy exploring."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "359565a7-dad3-403c-a73c-6414b1295127",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Define chat loader\n",
|
||||
"\n",
|
||||
"LangChain currently does not support "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a429e0c4-4d7d-45f8-bbbb-c7fc5229f6af",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"import re\n",
|
||||
"from typing import Iterator, List\n",
|
||||
"\n",
|
||||
"from langchain import schema\n",
|
||||
"from langchain.chat_loaders import base as chat_loaders\n",
|
||||
"\n",
|
||||
"logger = logging.getLogger()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class DiscordChatLoader(chat_loaders.BaseChatLoader):\n",
|
||||
" \n",
|
||||
" def __init__(self, path: str):\n",
|
||||
" \"\"\"\n",
|
||||
" Initialize the Discord chat loader.\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" path: Path to the exported Discord chat text file.\n",
|
||||
" \"\"\"\n",
|
||||
" self.path = path\n",
|
||||
" self._message_line_regex = re.compile(\n",
|
||||
" r\"(.+?) — (\\w{3,9} \\d{1,2}(?:st|nd|rd|th)?(?:, \\d{4})? \\d{1,2}:\\d{2} (?:AM|PM)|Today at \\d{1,2}:\\d{2} (?:AM|PM)|Yesterday at \\d{1,2}:\\d{2} (?:AM|PM))\", # noqa\n",
|
||||
" flags=re.DOTALL,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" def _load_single_chat_session_from_txt(\n",
|
||||
" self, file_path: str\n",
|
||||
" ) -> chat_loaders.ChatSession:\n",
|
||||
" \"\"\"\n",
|
||||
" Load a single chat session from a text file.\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" file_path: Path to the text file containing the chat messages.\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" A `ChatSession` object containing the loaded chat messages.\n",
|
||||
" \"\"\"\n",
|
||||
" with open(file_path, \"r\", encoding=\"utf-8\") as file:\n",
|
||||
" lines = file.readlines()\n",
|
||||
"\n",
|
||||
" results: List[schema.BaseMessage] = []\n",
|
||||
" current_sender = None\n",
|
||||
" current_timestamp = None\n",
|
||||
" current_content = []\n",
|
||||
" for line in lines:\n",
|
||||
" if re.match(\n",
|
||||
" r\".+? — (\\d{2}/\\d{2}/\\d{4} \\d{1,2}:\\d{2} (?:AM|PM)|Today at \\d{1,2}:\\d{2} (?:AM|PM)|Yesterday at \\d{1,2}:\\d{2} (?:AM|PM))\", # noqa\n",
|
||||
" line,\n",
|
||||
" ):\n",
|
||||
" if current_sender and current_content:\n",
|
||||
" results.append(\n",
|
||||
" schema.HumanMessage(\n",
|
||||
" content=\"\".join(current_content).strip(),\n",
|
||||
" additional_kwargs={\n",
|
||||
" \"sender\": current_sender,\n",
|
||||
" \"events\": [{\"message_time\": current_timestamp}],\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" current_sender, current_timestamp = line.split(\" — \")[:2]\n",
|
||||
" current_content = [\n",
|
||||
" line[len(current_sender) + len(current_timestamp) + 4 :].strip()\n",
|
||||
" ]\n",
|
||||
" elif re.match(r\"\\[\\d{1,2}:\\d{2} (?:AM|PM)\\]\", line.strip()):\n",
|
||||
" results.append(\n",
|
||||
" schema.HumanMessage(\n",
|
||||
" content=\"\".join(current_content).strip(),\n",
|
||||
" additional_kwargs={\n",
|
||||
" \"sender\": current_sender,\n",
|
||||
" \"events\": [{\"message_time\": current_timestamp}],\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" current_timestamp = line.strip()[1:-1]\n",
|
||||
" current_content = []\n",
|
||||
" else:\n",
|
||||
" current_content.append(\"\\n\" + line.strip())\n",
|
||||
"\n",
|
||||
" if current_sender and current_content:\n",
|
||||
" results.append(\n",
|
||||
" schema.HumanMessage(\n",
|
||||
" content=\"\".join(current_content).strip(),\n",
|
||||
" additional_kwargs={\n",
|
||||
" \"sender\": current_sender,\n",
|
||||
" \"events\": [{\"message_time\": current_timestamp}],\n",
|
||||
" },\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" return chat_loaders.ChatSession(messages=results)\n",
|
||||
"\n",
|
||||
" def lazy_load(self) -> Iterator[chat_loaders.ChatSession]:\n",
|
||||
" \"\"\"\n",
|
||||
" Lazy load the messages from the chat file and yield them in the required format.\n",
|
||||
"\n",
|
||||
" Yields:\n",
|
||||
" A `ChatSession` object containing the loaded chat messages.\n",
|
||||
" \"\"\"\n",
|
||||
" yield self._load_single_chat_session_from_txt(self.path)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c8240393-48be-44d2-b0d6-52c215cd8ac2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Create loader\n",
|
||||
"\n",
|
||||
"We will point to the file we just wrote to disk."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "1268de40-b0e5-445d-9cd8-54856cd0293a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = DiscordChatLoader(\n",
|
||||
" path=\"./discord_chats.txt\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4928df4b-ae31-48a7-bd76-be3ecee1f3e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Load Messages\n",
|
||||
"\n",
|
||||
"Assuming the format is correct, the loader will convert the chats to langchain messages."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "c8a0836d-4a22-4790-bfe9-97f2145bb0d6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"from langchain.chat_loaders.base import ChatSession\n",
|
||||
"from langchain.chat_loaders.utils import (\n",
|
||||
" map_ai_messages,\n",
|
||||
" merge_chat_runs,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"raw_messages = loader.lazy_load()\n",
|
||||
"# Merge consecutive messages from the same sender into a single message\n",
|
||||
"merged_messages = merge_chat_runs(raw_messages)\n",
|
||||
"# Convert messages from \"talkingtower\" to AI messages\n",
|
||||
"messages: List[ChatSession] = list(map_ai_messages(merged_messages, sender=\"talkingtower\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "1913963b-c44e-4f7a-aba7-0423c9b8bd59",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'messages': [AIMessage(content='Love music! Do you like jazz?', additional_kwargs={'sender': 'talkingtower', 'events': [{'message_time': '08/15/2023 11:10 AM\\n'}]}, example=False),\n",
|
||||
" HumanMessage(content='Yes! Jazz is fantastic. Ever heard this one?\\nWebsite\\nListen to classic jazz track...', additional_kwargs={'sender': 'reporterbob', 'events': [{'message_time': '08/15/2023 9:27 PM\\n'}]}, example=False),\n",
|
||||
" AIMessage(content='Indeed! Great choice. 🎷', additional_kwargs={'sender': 'talkingtower', 'events': [{'message_time': 'Yesterday at 5:03 AM\\n'}]}, example=False),\n",
|
||||
" HumanMessage(content='Thanks! How about some virtual sightseeing?\\nWebsite\\nVirtual tour of famous landmarks...', additional_kwargs={'sender': 'reporterbob', 'events': [{'message_time': 'Yesterday at 5:23 AM\\n'}]}, example=False),\n",
|
||||
" AIMessage(content=\"Sounds fun! Let's explore.\", additional_kwargs={'sender': 'talkingtower', 'events': [{'message_time': 'Today at 2:38 PM\\n'}]}, example=False),\n",
|
||||
" HumanMessage(content='Enjoy the tour! See you around.', additional_kwargs={'sender': 'reporterbob', 'events': [{'message_time': 'Today at 2:56 PM\\n'}]}, example=False),\n",
|
||||
" AIMessage(content='Thank you! Goodbye! 👋', additional_kwargs={'sender': 'talkingtower', 'events': [{'message_time': 'Today at 3:00 PM\\n'}]}, example=False),\n",
|
||||
" HumanMessage(content='Farewell! Happy exploring.', additional_kwargs={'sender': 'reporterbob', 'events': [{'message_time': 'Today at 3:02 PM\\n'}]}, example=False)]}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8595a518-5c89-44aa-94a7-ca51e7e2a5fa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Next Steps\n",
|
||||
"\n",
|
||||
"You can then use these messages how you see fit, such as finetuning a model, few-shot example selection, or directly make predictions for the next message "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "08ff0a1e-fca0-4da3-aacd-d7401f99d946",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Thank you! Have a wonderful day! 🌟"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI()\n",
|
||||
"\n",
|
||||
"for chunk in llm.stream(messages[0]['messages']):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "50a5251f-074a-4a3c-a2b0-b1de85e0ac6a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
579
docs/extras/integrations/chat_loaders/facebook.ipynb
Normal file
579
docs/extras/integrations/chat_loaders/facebook.ipynb
Normal file
@@ -0,0 +1,579 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e4bd269b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Facebook Messenger\n",
|
||||
"\n",
|
||||
"This notebook shows how to load data from Facebook in a format you can finetune on. The overall steps are:\n",
|
||||
"\n",
|
||||
"1. Download your messenger data to disk.\n",
|
||||
"2. Create the Chat Loader and call `loader.load()` (or `loader.lazy_load()`) to perform the conversion.\n",
|
||||
"3. Optionally use `merge_chat_runs` to combine message from the same sender in sequence, and/or `map_ai_messages` to convert messages from the specified sender to the \"AIMessage\" class. Once you've done this, call `convert_messages_for_finetuning` to prepare your data for fine-tuning.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Once this has been done, you can fine-tune your model. To do so you would complete the following steps:\n",
|
||||
"\n",
|
||||
"4. Upload your messages to OpenAI and run a fine-tuning job.\n",
|
||||
"6. Use the resulting model in your LangChain app!\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Let's begin.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## 1. Download Data\n",
|
||||
"\n",
|
||||
"To download your own messenger data, following instructions [here](https://www.zapptales.com/en/download-facebook-messenger-chat-history-how-to/). IMPORTANT - make sure to download them in JSON format (not HTML).\n",
|
||||
"\n",
|
||||
"We are hosting an example dump at [this google drive link](https://drive.google.com/file/d/1rh1s1o2i7B-Sk1v9o8KNgivLVGwJ-osV/view?usp=sharing) that we will use in this walkthrough."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "647f2158-a42e-4634-b283-b8492caf542a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"File file.zip downloaded.\n",
|
||||
"File file.zip has been unzipped.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This uses some example data\n",
|
||||
"import requests\n",
|
||||
"import zipfile\n",
|
||||
"\n",
|
||||
"def download_and_unzip(url: str, output_path: str = 'file.zip') -> None:\n",
|
||||
" file_id = url.split('/')[-2]\n",
|
||||
" download_url = f'https://drive.google.com/uc?export=download&id={file_id}'\n",
|
||||
"\n",
|
||||
" response = requests.get(download_url)\n",
|
||||
" if response.status_code != 200:\n",
|
||||
" print('Failed to download the file.')\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" with open(output_path, 'wb') as file:\n",
|
||||
" file.write(response.content)\n",
|
||||
" print(f'File {output_path} downloaded.')\n",
|
||||
"\n",
|
||||
" with zipfile.ZipFile(output_path, 'r') as zip_ref:\n",
|
||||
" zip_ref.extractall()\n",
|
||||
" print(f'File {output_path} has been unzipped.')\n",
|
||||
"\n",
|
||||
"# URL of the file to download\n",
|
||||
"url = 'https://drive.google.com/file/d/1rh1s1o2i7B-Sk1v9o8KNgivLVGwJ-osV/view?usp=sharing'\n",
|
||||
"\n",
|
||||
"# Download and unzip\n",
|
||||
"download_and_unzip(url)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "48ef8bb1-fc28-453c-835a-94a552f05a91",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Create Chat Loader\n",
|
||||
"\n",
|
||||
"We have 2 different `FacebookMessengerChatLoader` classes, one for an entire directory of chats, and one to load individual files. We"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "a0869bc6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"directory_path = \"./hogwarts\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "0460bf25",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.facebook_messenger import (\n",
|
||||
" SingleFileFacebookMessengerChatLoader,\n",
|
||||
" FolderFacebookMessengerChatLoader,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "f61ee277",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = SingleFileFacebookMessengerChatLoader(\n",
|
||||
" path=\"./hogwarts/inbox/HermioneGranger/messages_Hermione_Granger.json\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "ec466ad7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[HumanMessage(content=\"Hi Hermione! How's your summer going so far?\", additional_kwargs={'sender': 'Harry Potter'}, example=False),\n",
|
||||
" HumanMessage(content=\"Harry! Lovely to hear from you. My summer is going well, though I do miss everyone. I'm spending most of my time going through my books and researching fascinating new topics. How about you?\", additional_kwargs={'sender': 'Hermione Granger'}, example=False),\n",
|
||||
" HumanMessage(content=\"I miss you all too. The Dursleys are being their usual unpleasant selves but I'm getting by. At least I can practice some spells in my room without them knowing. Let me know if you find anything good in your researching!\", additional_kwargs={'sender': 'Harry Potter'}, example=False)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat_session = loader.load()[0]\n",
|
||||
"chat_session[\"messages\"][:3]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "8a3ee473",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = FolderFacebookMessengerChatLoader(\n",
|
||||
" path=\"./hogwarts\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "9f41e122",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"9"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat_sessions = loader.load()\n",
|
||||
"len(chat_sessions)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d4aa3580-adc1-4b48-9bba-0e8e8d9f44ce",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Prepare for fine-tuning\n",
|
||||
"\n",
|
||||
"Calling `load()` returns all the chat messages we could extract as human messages. When conversing with chat bots, conversations typically follow a more strict alternating dialogue pattern relative to real conversations. \n",
|
||||
"\n",
|
||||
"You can choose to merge message \"runs\" (consecutive messages from the same sender) and select a sender to represent the \"AI\". The fine-tuned LLM will learn to generate these AI messages."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "5a78030d-b757-4bbe-8a6c-841056f46df7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.utils import (\n",
|
||||
" merge_chat_runs,\n",
|
||||
" map_ai_messages,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "ff35b028-78bf-4c5b-9ec6-939fe67de7f7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"merged_sessions = merge_chat_runs(chat_sessions)\n",
|
||||
"alternating_sessions = list(map_ai_messages(merged_sessions, \"Harry Potter\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "4b11906e-a496-4d01-9f0d-1938c14147bf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[AIMessage(content=\"Professor Snape, I was hoping I could speak with you for a moment about something that's been concerning me lately.\", additional_kwargs={'sender': 'Harry Potter'}, example=False),\n",
|
||||
" HumanMessage(content=\"What is it, Potter? I'm quite busy at the moment.\", additional_kwargs={'sender': 'Severus Snape'}, example=False),\n",
|
||||
" AIMessage(content=\"I apologize for the interruption, sir. I'll be brief. I've noticed some strange activity around the school grounds at night. I saw a cloaked figure lurking near the Forbidden Forest last night. I'm worried someone may be plotting something sinister.\", additional_kwargs={'sender': 'Harry Potter'}, example=False)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Now all of Harry Potter's messages will take the AI message class\n",
|
||||
"# which maps to the 'assistant' role in OpenAI's training format\n",
|
||||
"alternating_sessions[0]['messages'][:3]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d985478d-062e-47b9-ae9a-102f59be07c0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Now we can convert to OpenAI format dictionaries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "21372331",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.adapters.openai import convert_messages_for_finetuning"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"id": "92c5ae7a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Prepared 9 dialogues for training\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"training_data = convert_messages_for_finetuning(alternating_sessions)\n",
|
||||
"print(f\"Prepared {len(training_data)} dialogues for training\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "dfcbd181",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'role': 'assistant',\n",
|
||||
" 'content': \"Professor Snape, I was hoping I could speak with you for a moment about something that's been concerning me lately.\"},\n",
|
||||
" {'role': 'user',\n",
|
||||
" 'content': \"What is it, Potter? I'm quite busy at the moment.\"},\n",
|
||||
" {'role': 'assistant',\n",
|
||||
" 'content': \"I apologize for the interruption, sir. I'll be brief. I've noticed some strange activity around the school grounds at night. I saw a cloaked figure lurking near the Forbidden Forest last night. I'm worried someone may be plotting something sinister.\"}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"training_data[0][:3]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f1a9fd64-4f9f-42d3-b5dc-2a340e51e9e7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"OpenAI currently requires at least 10 training examples for a fine-tuning job, though they recommend between 50-100 for most tasks. Since we only have 9 chat sessions, we can subdivide them (optionally with some overlap) so that each training example is comprised of a portion of a whole conversation.\n",
|
||||
"\n",
|
||||
"Facebook chat sessions (1 per person) often span multiple days and conversations,\n",
|
||||
"so the long-range dependencies may not be that important to model anyhow."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"id": "13cd290a-b1e9-4686-bb5e-d99de8b8612b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"100"
|
||||
]
|
||||
},
|
||||
"execution_count": 42,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Our chat is alternating, we will make each datapoint a group of 8 messages,\n",
|
||||
"# with 2 messages overlapping\n",
|
||||
"chunk_size = 8\n",
|
||||
"overlap = 2\n",
|
||||
"\n",
|
||||
"training_examples = [\n",
|
||||
" conversation_messages[i: i + chunk_size] \n",
|
||||
" for conversation_messages in training_data\n",
|
||||
" for i in range(\n",
|
||||
" 0, len(conversation_messages) - chunk_size + 1, \n",
|
||||
" chunk_size - overlap)\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"len(training_examples)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cc8baf41-ff07-4492-96bd-b2472ee7cef9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Fine-tune the model\n",
|
||||
"\n",
|
||||
"It's time to fine-tune the model. Make sure you have `openai` installed\n",
|
||||
"and have set your `OPENAI_API_KEY` appropriately"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"id": "95ce3f63-3c80-44b2-9060-534ad74e16fa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# %pip install -U openai --quiet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"id": "ab9e28eb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"File file-zCyNBeg4snpbBL7VkvsuhCz8 ready afer 30.55 seconds.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from io import BytesIO\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"import openai\n",
|
||||
"\n",
|
||||
"# We will write the jsonl file in memory\n",
|
||||
"my_file = BytesIO()\n",
|
||||
"for m in training_examples:\n",
|
||||
" my_file.write((json.dumps({\"messages\": m}) + \"\\n\").encode('utf-8'))\n",
|
||||
"\n",
|
||||
"my_file.seek(0)\n",
|
||||
"training_file = openai.File.create(\n",
|
||||
" file=my_file,\n",
|
||||
" purpose='fine-tune'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# OpenAI audits each training file for compliance reasons.\n",
|
||||
"# This make take a few minutes\n",
|
||||
"status = openai.File.retrieve(training_file.id).status\n",
|
||||
"start_time = time.time()\n",
|
||||
"while status != \"processed\":\n",
|
||||
" print(f\"Status=[{status}]... {time.time() - start_time:.2f}s\", end=\"\\r\", flush=True)\n",
|
||||
" time.sleep(5)\n",
|
||||
" status = openai.File.retrieve(training_file.id).status\n",
|
||||
"print(f\"File {training_file.id} ready after {time.time() - start_time:.2f} seconds.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "759a7f51-fde9-4b75-aaa9-e600e6537bd1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"With the file ready, it's time to kick off a training job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"id": "3f451425",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"job = openai.FineTuningJob.create(\n",
|
||||
" training_file=training_file.id,\n",
|
||||
" model=\"gpt-3.5-turbo\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "489b23ef-5e14-42a9-bafb-44220ec6960b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Grab a cup of tea while your model is being prepared. This may take some time!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"id": "bac1637a-c087-4523-ade1-c47f9bf4c6f4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Status=[running]... 908.87s\r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"status = openai.FineTuningJob.retrieve(job.id).status\n",
|
||||
"start_time = time.time()\n",
|
||||
"while status != \"succeeded\":\n",
|
||||
" print(f\"Status=[{status}]... {time.time() - start_time:.2f}s\", end=\"\\r\", flush=True)\n",
|
||||
" time.sleep(5)\n",
|
||||
" job = openai.FineTuningJob.retrieve(job.id)\n",
|
||||
" status = job.status"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 66,
|
||||
"id": "535895e1-bc69-40e5-82ed-e24ed2baeeee",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ft:gpt-3.5-turbo-0613:personal::7rDwkaOq\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(job.fine_tuned_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "502ff73b-f9e9-49ce-ba45-401811e57946",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Use in LangChain\n",
|
||||
"\n",
|
||||
"You can use the resulting model ID directly the `ChatOpenAI` model class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 67,
|
||||
"id": "3925d60d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(\n",
|
||||
" model=job.fine_tuned_model,\n",
|
||||
" temperature=1,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 69,
|
||||
"id": "7190cf2e-ab34-4ceb-bdad-45f24f069c29",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import ChatPromptTemplate\n",
|
||||
"from langchain.schema.output_parser import StrOutputParser\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | model | StrOutputParser()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"id": "f02057e9-f914-40b1-9c9d-9432ff594b98",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The usual - Potions, Transfiguration, Defense Against the Dark Arts. What about you?"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for tok in chain.stream({\"input\": \"What classes are you taking?\"}):\n",
|
||||
" print(tok, end=\"\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "35331503-3cc6-4d64-955e-64afe6b5fef3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
179
docs/extras/integrations/chat_loaders/gmail.ipynb
Normal file
179
docs/extras/integrations/chat_loaders/gmail.ipynb
Normal file
@@ -0,0 +1,179 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b3d1705d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# GMail\n",
|
||||
"\n",
|
||||
"This loader goes over how to load data from GMail. There are many ways you could want to load data from GMail. This loader is currently fairly opionated in how to do so. The way it does it is it first looks for all messages that you have sent. It then looks for messages where you are responding to a previous email. It then fetches that previous email, and creates a training example of that email, followed by your email.\n",
|
||||
"\n",
|
||||
"Note that there are clear limitations here. For example, all examples created are only looking at the previous email for context.\n",
|
||||
"\n",
|
||||
"To use:\n",
|
||||
"\n",
|
||||
"- Set up a Google Developer Account: Go to the Google Developer Console, create a project, and enable the Gmail API for that project. This will give you a credentials.json file that you'll need later.\n",
|
||||
"\n",
|
||||
"- Install the Google Client Library: Run the following command to install the Google Client Library:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "84578039",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install --upgrade google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "be18f796",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os.path\n",
|
||||
"import base64\n",
|
||||
"import json\n",
|
||||
"import re\n",
|
||||
"import time\n",
|
||||
"from google.auth.transport.requests import Request\n",
|
||||
"from google.oauth2.credentials import Credentials\n",
|
||||
"from google_auth_oauthlib.flow import InstalledAppFlow\n",
|
||||
"from googleapiclient.discovery import build\n",
|
||||
"import logging\n",
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"creds = None\n",
|
||||
"# The file token.json stores the user's access and refresh tokens, and is\n",
|
||||
"# created automatically when the authorization flow completes for the first\n",
|
||||
"# time.\n",
|
||||
"if os.path.exists('email_token.json'):\n",
|
||||
" creds = Credentials.from_authorized_user_file('email_token.json', SCOPES)\n",
|
||||
"# If there are no (valid) credentials available, let the user log in.\n",
|
||||
"if not creds or not creds.valid:\n",
|
||||
" if creds and creds.expired and creds.refresh_token:\n",
|
||||
" creds.refresh(Request())\n",
|
||||
" else:\n",
|
||||
" flow = InstalledAppFlow.from_client_secrets_file( \n",
|
||||
" # your creds file here. Please create json file as here https://cloud.google.com/docs/authentication/getting-started\n",
|
||||
" 'creds.json', SCOPES)\n",
|
||||
" creds = flow.run_local_server(port=0)\n",
|
||||
" # Save the credentials for the next run\n",
|
||||
" with open('email_token.json', 'w') as token:\n",
|
||||
" token.write(creds.to_json())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "a2793ba0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.gmail import GMailLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "2154597f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GMailLoader(creds=creds, n=3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "0b7d11bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "74764bc7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"2"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Sometimes there can be errors which we silently ignore\n",
|
||||
"len(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "d9360a85",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.utils import (\n",
|
||||
" map_ai_messages,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "a9646f7a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# This makes messages sent by hchase@langchain.com the AI Messages\n",
|
||||
"# This means you will train an LLM to predict as if it's responding as hchase\n",
|
||||
"training_data = list(map_ai_messages(data, sender=\"Harrison Chase <hchase@langchain.com>\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d1a182f0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
420
docs/extras/integrations/chat_loaders/imessage.ipynb
Normal file
420
docs/extras/integrations/chat_loaders/imessage.ipynb
Normal file
@@ -0,0 +1,420 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "01fcfa2f-33a9-48f3-835a-b1956c394d6b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# iMessage\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the iMessage chat loader. This class helps convert iMessage conversations to LangChain chat messages.\n",
|
||||
"\n",
|
||||
"On MacOS, iMessage stores conversations in a sqlite database at `~/Library/Messages/chat.db` (at least for macOS Ventura 13.4). \n",
|
||||
"The `IMessageChatLoader` loads from this database file. \n",
|
||||
"\n",
|
||||
"1. Create the `IMessageChatLoader` with the file path pointed to `chat.db` database you'd like to process.\n",
|
||||
"2. Call `loader.load()` (or `loader.lazy_load()`) to perform the conversion. Optionally use `merge_chat_runs` to combine message from the same sender in sequence, and/or `map_ai_messages` to convert messages from the specified sender to the \"AIMessage\" class.\n",
|
||||
"\n",
|
||||
"## 1. Access Chat DB\n",
|
||||
"\n",
|
||||
"It's likely that your terminal is denied access to `~/Library/Messages`. To use this class, you can copy the DB to an accessible directory (e.g., Documents) and load from there. Alternatively (and not recommended), you can grant full disk access for your terminal emulator in System Settings > Securityand Privacy > Full Disk Access.\n",
|
||||
"\n",
|
||||
"We have created an example database you can use at [this linked drive file](https://drive.google.com/file/d/1NebNKqTA2NXApCmeH6mu0unJD2tANZzo/view?usp=sharing)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "036ce7e0-a38f-4cbe-89a6-a205ae7c23be",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"File chat.db downloaded.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This uses some example data\n",
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"def download_drive_file(url: str, output_path: str = 'chat.db') -> None:\n",
|
||||
" file_id = url.split('/')[-2]\n",
|
||||
" download_url = f'https://drive.google.com/uc?export=download&id={file_id}'\n",
|
||||
"\n",
|
||||
" response = requests.get(download_url)\n",
|
||||
" if response.status_code != 200:\n",
|
||||
" print('Failed to download the file.')\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" with open(output_path, 'wb') as file:\n",
|
||||
" file.write(response.content)\n",
|
||||
" print(f'File {output_path} downloaded.')\n",
|
||||
"\n",
|
||||
"url = 'https://drive.google.com/file/d/1NebNKqTA2NXApCmeH6mu0unJD2tANZzo/view?usp=sharing'\n",
|
||||
"\n",
|
||||
"# Download file to chat.db\n",
|
||||
"download_drive_file(url)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cf60f703-76f1-4602-a723-02c59535c1af",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Create the Chat Loader\n",
|
||||
"\n",
|
||||
"Provide the loader with the file path to the zip directory. You can optionally specify the user id that maps to an ai message as well an configure whether to merge message runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "4b8b432a-d2bc-49e1-b35f-761730a8fd6d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.imessage import IMessageChatLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "8ec6661b-0aca-48ae-9e2b-6412856c287b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = IMessageChatLoader(\n",
|
||||
" path=\"./chat.db\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8805a7c5-84b4-49f5-8989-0022f2054ace",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Load messages\n",
|
||||
"\n",
|
||||
"The `load()` (or `lazy_load`) methods return a list of \"ChatSessions\" that currently just contain a list of messages per loaded conversation. All messages are mapped to \"HumanMessage\" objects to start. \n",
|
||||
"\n",
|
||||
"You can optionally choose to merge message \"runs\" (consecutive messages from the same sender) and select a sender to represent the \"AI\". The fine-tuned LLM will learn to generate these AI messages."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "fcd69b3e-020d-4a15-8a0d-61c2d34e1ee1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"from langchain.chat_loaders.base import ChatSession\n",
|
||||
"from langchain.chat_loaders.utils import (\n",
|
||||
" map_ai_messages,\n",
|
||||
" merge_chat_runs,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"raw_messages = loader.lazy_load()\n",
|
||||
"# Merge consecutive messages from the same sender into a single message\n",
|
||||
"merged_messages = merge_chat_runs(raw_messages)\n",
|
||||
"# Convert messages from \"Tortoise\" to AI messages. Do you have a guess who these conversations are between?\n",
|
||||
"chat_sessions: List[ChatSession] = list(map_ai_messages(merged_messages, sender=\"Tortoise\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "370b8c26-c7a8-434c-a225-45c20ff14a03",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[AIMessage(content=\"Slow and steady, that's my motto.\", additional_kwargs={'message_time': 1693182723, 'sender': 'Tortoise'}, example=False),\n",
|
||||
" HumanMessage(content='Speed is key!', additional_kwargs={'message_time': 1693182753, 'sender': 'Hare'}, example=False),\n",
|
||||
" AIMessage(content='A balanced approach is more reliable.', additional_kwargs={'message_time': 1693182783, 'sender': 'Tortoise'}, example=False)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Now all of the Tortoise's messages will take the AI message class\n",
|
||||
"# which maps to the 'assistant' role in OpenAI's training format\n",
|
||||
"alternating_sessions[0]['messages'][:3]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "05208f9d-3193-4a8d-86a5-13df2c8197e5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Prepare for fine-tuning\n",
|
||||
"\n",
|
||||
"Now it's time to convert our chat messages to OpenAI dictionaries. We can use the `convert_messages_for_finetuning` utility to do so."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "8834861f-f37f-4c08-96c6-917269bf09b8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.adapters.openai import convert_messages_for_finetuning"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "ce7ab0f9-6e6a-4a1c-8b86-c635251d437e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Prepared 10 dialogues for training\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"training_data = convert_messages_for_finetuning(alternating_sessions)\n",
|
||||
"print(f\"Prepared {len(training_data)} dialogues for training\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b494d64c-8056-42ae-b4c1-a9cfabc002ea",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Fine-tune the model\n",
|
||||
"\n",
|
||||
"It's time to fine-tune the model. Make sure you have `openai` installed\n",
|
||||
"and have set your `OPENAI_API_KEY` appropriately"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "b4b60daa-b899-4291-a09a-412ce9c218fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# %pip install -U openai --quiet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "2cca6c95-c0d6-4826-b4fa-1c403f217f93",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"File file-zHIgf4r8LltZG3RFpkGd4Sjf ready after 10.19 seconds.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from io import BytesIO\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"import openai\n",
|
||||
"\n",
|
||||
"# We will write the jsonl file in memory\n",
|
||||
"my_file = BytesIO()\n",
|
||||
"for m in training_data:\n",
|
||||
" my_file.write((json.dumps({\"messages\": m}) + \"\\n\").encode('utf-8'))\n",
|
||||
"\n",
|
||||
"my_file.seek(0)\n",
|
||||
"training_file = openai.File.create(\n",
|
||||
" file=my_file,\n",
|
||||
" purpose='fine-tune'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# OpenAI audits each training file for compliance reasons.\n",
|
||||
"# This make take a few minutes\n",
|
||||
"status = openai.File.retrieve(training_file.id).status\n",
|
||||
"start_time = time.time()\n",
|
||||
"while status != \"processed\":\n",
|
||||
" print(f\"Status=[{status}]... {time.time() - start_time:.2f}s\", end=\"\\r\", flush=True)\n",
|
||||
" time.sleep(5)\n",
|
||||
" status = openai.File.retrieve(training_file.id).status\n",
|
||||
"print(f\"File {training_file.id} ready after {time.time() - start_time:.2f} seconds.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "60ee0476-3113-4dc8-a886-bce878c60b07",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"With the file ready, it's time to kick off a training job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "c376ddca-5b4f-4e5a-bf4e-6beeb467eacc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"job = openai.FineTuningJob.create(\n",
|
||||
" training_file=training_file.id,\n",
|
||||
" model=\"gpt-3.5-turbo\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09344c60-0bee-4989-b8d1-4a8821553cc3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Grab a cup of tea while your model is being prepared. This may take some time!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "22eae900-04ca-456b-ba51-1dfff1f8e0e1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Status=[running]... 524.95s\r"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"status = openai.FineTuningJob.retrieve(job.id).status\n",
|
||||
"start_time = time.time()\n",
|
||||
"while status != \"succeeded\":\n",
|
||||
" print(f\"Status=[{status}]... {time.time() - start_time:.2f}s\", end=\"\\r\", flush=True)\n",
|
||||
" time.sleep(5)\n",
|
||||
" job = openai.FineTuningJob.retrieve(job.id)\n",
|
||||
" status = job.status"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "39e72616-a7d9-44b8-a4eb-506611d119f4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"ft:gpt-3.5-turbo-0613:personal::7sKoRdlz\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(job.fine_tuned_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0d717749-b1b6-451f-b3c5-3286b82d45b9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Use in LangChain\n",
|
||||
"\n",
|
||||
"You can use the resulting model ID directly the `ChatOpenAI` model class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "1579dfca-95c6-47b7-8549-1195b9dce5b0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(\n",
|
||||
" model=job.fine_tuned_model,\n",
|
||||
" temperature=1,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"id": "6f53d1b1-dcbf-4976-a61a-17f74c6f1b0a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts import ChatPromptTemplate\n",
|
||||
"from langchain.schema.output_parser import StrOutputParser\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You are speaking to hare.\"),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | model | StrOutputParser()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"id": "6619c9bc-54ea-4136-bd9a-44557f7da724",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"A symbol of interconnectedness."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for tok in chain.stream({\"input\": \"What's the golden thread?\"}):\n",
|
||||
" print(tok, end=\"\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "88e0d1a1-48a9-4d9d-9f4e-010cbbb65af8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
188
docs/extras/integrations/chat_loaders/index.mdx
Normal file
188
docs/extras/integrations/chat_loaders/index.mdx
Normal file
@@ -0,0 +1,188 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
---
|
||||
|
||||
# Chat loaders
|
||||
|
||||
Like document loaders, chat loaders are utilities designed to help load conversations from popular communication platforms such as Facebook, Slack, Discord, etc. These are loaded into memory as LangChain chat message objects. Such utilities facilitate tasks such as fine-tuning a language model to match your personal style or voice.
|
||||
|
||||
This brief guide will illustrate the process using [OpenAI's fine-tuning API](https://platform.openai.com/docs/guides/fine-tuning) comprised of six steps:
|
||||
|
||||
1. Export your Facebook Messenger chat data in a compatible format for your intended chat loader.
|
||||
2. Load the chat data into memory as LangChain chat message objects. (_this is what is covered in each integration notebook in this section of the documentation_).
|
||||
- Assign a person to the "AI" role and optionally filter, group, and merge messages.
|
||||
3. Export these acquired messages in a format expected by the fine-tuning API.
|
||||
4. Upload this data to OpenAI.
|
||||
5. Fine-tune your model.
|
||||
6. Implement the fine-tuned model in LangChain.
|
||||
|
||||
This guide is not wholly comprehensive but is designed to take you through the fundamentals of going from raw data to fine-tuned model.
|
||||
|
||||
We will demonstrate the procedure through an example of fine-tuning a `gpt-3.5-turbo` model on Facebook Messenger data.
|
||||
|
||||
### 1. Export your chat data
|
||||
|
||||
To export your Facebook messenger data, you can follow the [instructions here](https://www.zapptales.com/en/download-facebook-messenger-chat-history-how-to/).
|
||||
|
||||
:::important JSON format
|
||||
You must select "JSON format" (instead of HTML) when exporting your data to be compatible with the current loader.
|
||||
:::
|
||||
|
||||
OpenAI requires at least 10 examples to fine-tune your model, but they recommend between 50-100 for more optimal results.
|
||||
You can use the example data stored at [this google drive link](https://drive.google.com/file/d/1rh1s1o2i7B-Sk1v9o8KNgivLVGwJ-osV/view?usp=sharing) to test the process.
|
||||
|
||||
### 2. Load the chat
|
||||
|
||||
Once you've obtained your chat data, you can load it into memory as LangChain chat message objects. Here’s an example of loading data using the Python code:
|
||||
|
||||
```python
|
||||
from langchain.chat_loaders.facebook_messenger import FolderFacebookMessengerChatLoader
|
||||
|
||||
loader = FolderFacebookMessengerChatLoader(
|
||||
path="./facebook_messenger_chats",
|
||||
)
|
||||
|
||||
chat_sessions = loader.load()
|
||||
```
|
||||
|
||||
In this snippet, we point the loader to a directory of Facebook chat dumps which are then loaded as multiple "sessions" of messages, one session per conversation file.
|
||||
|
||||
Once you've loaded the messages, you should decide which person you want to fine-tune the model to (usually yourself). You can also decide to merge consecutive messages from the same sender into a single chat message.
|
||||
For both of these tasks, you can use the chat_loaders utilities to do so:
|
||||
|
||||
```
|
||||
from langchain.chat_loaders.utils import (
|
||||
merge_chat_runs,
|
||||
map_ai_messages,
|
||||
)
|
||||
|
||||
merged_sessions = merge_chat_runs(chat_sessions)
|
||||
alternating_sessions = list(map_ai_messages(merged_sessions, "My Name"))
|
||||
```
|
||||
|
||||
### 3. Export messages to OpenAI format
|
||||
|
||||
Convert the chat messages to dictionaries using the `convert_messages_for_finetuning` function. Then, group the data into chunks for better context modeling and overlap management.
|
||||
|
||||
```python
|
||||
from langchain.adapters.openai import convert_messages_for_finetuning
|
||||
|
||||
openai_messages = convert_messages_for_finetuning(chat_sessions)
|
||||
```
|
||||
|
||||
At this point, the data is ready for upload to OpenAI. You can choose to split up conversations into smaller chunks for training if you
|
||||
do not have enough conversations to train on. Feel free to play around with different chunk sizes or with adding system messages to the fine-tuning data.
|
||||
|
||||
```python
|
||||
chunk_size = 8
|
||||
overlap = 2
|
||||
|
||||
message_groups = [
|
||||
conversation_messages[i: i + chunk_size]
|
||||
for conversation_messages in openai_messages
|
||||
for i in range(
|
||||
0, len(conversation_messages) - chunk_size + 1,
|
||||
chunk_size - overlap)
|
||||
]
|
||||
|
||||
len(message_groups)
|
||||
# 9
|
||||
```
|
||||
|
||||
### 4. Upload the data to OpenAI
|
||||
|
||||
Ensure you have set your OpenAI API key by following these [instructions](https://platform.openai.com/account/api-keys), then upload the training file.
|
||||
An audit is performed to ensure data compliance, so you may have to wait a few minutes for the dataset to become ready for use.
|
||||
|
||||
```python
|
||||
import time
|
||||
import json
|
||||
import io
|
||||
|
||||
import openai
|
||||
|
||||
my_file = io.BytesIO()
|
||||
for group in message_groups:
|
||||
my_file.write((json.dumps({"messages": group}) + "\n").encode('utf-8'))
|
||||
|
||||
my_file.seek(0)
|
||||
training_file = openai.File.create(
|
||||
file=my_file,
|
||||
purpose='fine-tune'
|
||||
)
|
||||
|
||||
# Wait while the file is processed
|
||||
status = openai.File.retrieve(training_file.id).status
|
||||
start_time = time.time()
|
||||
while status != "processed":
|
||||
print(f"Status=[{status}]... {time.time() - start_time:.2f}s", end="\r", flush=True)
|
||||
time.sleep(5)
|
||||
status = openai.File.retrieve(training_file.id).status
|
||||
print(f"File {training_file.id} ready after {time.time() - start_time:.2f} seconds.")
|
||||
```
|
||||
|
||||
Once this is done, you can proceed to the model training!
|
||||
|
||||
### 5. Fine-tune the model
|
||||
|
||||
Start the fine-tuning job with your chosen base model.
|
||||
|
||||
```python
|
||||
job = openai.FineTuningJob.create(
|
||||
training_file=training_file.id,
|
||||
model="gpt-3.5-turbo",
|
||||
)
|
||||
```
|
||||
|
||||
This might take a while. Check the status with `openai.FineTuningJob.retrieve(job.id).status` and wait for it to report `succeeded`.
|
||||
|
||||
```python
|
||||
# It may take 10-20+ minutes to complete training.
|
||||
status = openai.FineTuningJob.retrieve(job.id).status
|
||||
start_time = time.time()
|
||||
while status != "succeeded":
|
||||
print(f"Status=[{status}]... {time.time() - start_time:.2f}s", end="\r", flush=True)
|
||||
time.sleep(5)
|
||||
job = openai.FineTuningJob.retrieve(job.id)
|
||||
status = job.status
|
||||
```
|
||||
|
||||
### 6. Use the model in LangChain
|
||||
|
||||
You're almost there! Use the fine-tuned model in LangChain.
|
||||
|
||||
```python
|
||||
from langchain import chat_models
|
||||
|
||||
model_name = job.fine_tuned_model
|
||||
# Example: ft:gpt-3.5-turbo-0613:personal::5mty86jblapsed
|
||||
model = chat_models.ChatOpenAI(model=model_name)
|
||||
```
|
||||
|
||||
```python
|
||||
from langchain.prompts import ChatPromptTemplate
|
||||
from langchain.schema.output_parser import StrOutputParser
|
||||
|
||||
prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("human", "{input}"),
|
||||
]
|
||||
)
|
||||
|
||||
chain = prompt | model | StrOutputParser()
|
||||
|
||||
for tok in chain.stream({"input": "What classes are you taking?"}):
|
||||
print(tok, end="", flush=True)
|
||||
|
||||
# The usual - Potions, Transfiguration, Defense Against the Dark Arts. What about you?
|
||||
```
|
||||
|
||||
And that's it! You've successfully fine-tuned a model and used it in LangChain.
|
||||
|
||||
## Supported Chat Loaders
|
||||
|
||||
LangChain currently supports the following chat loaders. Feel free to contribute more!
|
||||
|
||||
import DocCardList from "@theme/DocCardList";
|
||||
|
||||
<DocCardList />
|
||||
163
docs/extras/integrations/chat_loaders/slack.ipynb
Normal file
163
docs/extras/integrations/chat_loaders/slack.ipynb
Normal file
@@ -0,0 +1,163 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "01fcfa2f-33a9-48f3-835a-b1956c394d6b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Slack\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the Slack chat loader. This class helps map exported slack conversations to LangChain chat messages.\n",
|
||||
"\n",
|
||||
"The process has three steps:\n",
|
||||
"1. Export the desired conversation thread by following the [instructions here](https://slack.com/help/articles/1500001548241-Request-to-export-all-conversations).\n",
|
||||
"2. Create the `SlackChatLoader` with the file path pointed to the json file or directory of JSON files\n",
|
||||
"3. Call `loader.load()` (or `loader.lazy_load()`) to perform the conversion. Optionally use `merge_chat_runs` to combine message from the same sender in sequence, and/or `map_ai_messages` to convert messages from the specified sender to the \"AIMessage\" class.\n",
|
||||
"\n",
|
||||
"## 1. Creat message dump\n",
|
||||
"\n",
|
||||
"Currently (2023/08/23) this loader best supports a zip directory of files in the format generated by exporting your a direct message converstion from Slack. Follow up-to-date instructions from slack on how to do so.\n",
|
||||
"\n",
|
||||
"We have an example in the LangChain repo."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "a79d35bf-5f21-4063-84bf-a60845c1c51f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"permalink = \"https://raw.githubusercontent.com/langchain-ai/langchain/342087bdfa3ac31d622385d0f2d09cf5e06c8db3/libs/langchain/tests/integration_tests/examples/slack_export.zip\"\n",
|
||||
"response = requests.get(permalink)\n",
|
||||
"with open(\"slack_dump.zip\", \"wb\") as f:\n",
|
||||
" f.write(response.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cf60f703-76f1-4602-a723-02c59535c1af",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Create the Chat Loader\n",
|
||||
"\n",
|
||||
"Provide the loader with the file path to the zip directory. You can optionally specify the user id that maps to an ai message as well an configure whether to merge message runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "4b8b432a-d2bc-49e1-b35f-761730a8fd6d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.slack import SlackChatLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "8ec6661b-0aca-48ae-9e2b-6412856c287b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = SlackChatLoader(\n",
|
||||
" path=\"slack_dump.zip\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8805a7c5-84b4-49f5-8989-0022f2054ace",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Load messages\n",
|
||||
"\n",
|
||||
"The `load()` (or `lazy_load`) methods return a list of \"ChatSessions\" that currently just contain a list of messages per loaded conversation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "fcd69b3e-020d-4a15-8a0d-61c2d34e1ee1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"from langchain.chat_loaders.base import ChatSession\n",
|
||||
"from langchain.chat_loaders.utils import (\n",
|
||||
" map_ai_messages,\n",
|
||||
" merge_chat_runs,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"raw_messages = loader.lazy_load()\n",
|
||||
"# Merge consecutive messages from the same sender into a single message\n",
|
||||
"merged_messages = merge_chat_runs(raw_messages)\n",
|
||||
"# Convert messages from \"U0500003428\" to AI messages\n",
|
||||
"messages: List[ChatSession] = list(map_ai_messages(merged_messages, sender=\"U0500003428\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7d033f87-cd0c-4f44-a753-41b871c1e919",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Next Steps\n",
|
||||
"\n",
|
||||
"You can then use these messages how you see fit, such as finetuning a model, few-shot example selection, or directly make predictions for the next message. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "7d8a1629-5d9e-49b3-b978-3add57027d59",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Hi, \n",
|
||||
"\n",
|
||||
"I hope you're doing well. I wanted to reach out and ask if you'd be available to meet up for coffee sometime next week. I'd love to catch up and hear about what's been going on in your life. Let me know if you're interested and we can find a time that works for both of us. \n",
|
||||
"\n",
|
||||
"Looking forward to hearing from you!\n",
|
||||
"\n",
|
||||
"Best, [Your Name]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI()\n",
|
||||
"\n",
|
||||
"for chunk in llm.stream(messages[1]['messages']):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
206
docs/extras/integrations/chat_loaders/telegram.ipynb
Normal file
206
docs/extras/integrations/chat_loaders/telegram.ipynb
Normal file
@@ -0,0 +1,206 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "735455a6-f82e-4252-b545-27385ef883f4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Telegram\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the Telegram chat loader. This class helps map exported Telegram conversations to LangChain chat messages.\n",
|
||||
"\n",
|
||||
"The process has three steps:\n",
|
||||
"1. Export the chat .txt file by copying chats from the Discord app and pasting them in a file on your local computer\n",
|
||||
"2. Create the `TelegramChatLoader` with the file path pointed to the json file or directory of JSON files\n",
|
||||
"3. Call `loader.load()` (or `loader.lazy_load()`) to perform the conversion. Optionally use `merge_chat_runs` to combine message from the same sender in sequence, and/or `map_ai_messages` to convert messages from the specified sender to the \"AIMessage\" class.\n",
|
||||
"\n",
|
||||
"## 1. Creat message dump\n",
|
||||
"\n",
|
||||
"Currently (2023/08/23) this loader best supports json files in the format generated by exporting your chat history from the [Telegram Desktop App](https://desktop.telegram.org/).\n",
|
||||
"\n",
|
||||
"**Important:** There are 'lite' versions of telegram such as \"Telegram for MacOS\" that lack the export functionality. Please make sure you use the correct app to export the file.\n",
|
||||
"\n",
|
||||
"To make the export:\n",
|
||||
"1. Download and open telegram desktop\n",
|
||||
"2. Select a conversation\n",
|
||||
"3. Navigate to the conversation settings (currently the three dots in the top right corner)\n",
|
||||
"4. Click \"Export Chat History\"\n",
|
||||
"5. Unselect photos and other media. Select \"Machine-readable JSON\" format to export.\n",
|
||||
"\n",
|
||||
"An example is below: "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "285f2044-0f58-4b92-addb-9f8569076734",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Overwriting telegram_conversation.json\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%writefile telegram_conversation.json\n",
|
||||
"{\n",
|
||||
" \"name\": \"Jiminy\",\n",
|
||||
" \"type\": \"personal_chat\",\n",
|
||||
" \"id\": 5965280513,\n",
|
||||
" \"messages\": [\n",
|
||||
" {\n",
|
||||
" \"id\": 1,\n",
|
||||
" \"type\": \"message\",\n",
|
||||
" \"date\": \"2023-08-23T13:11:23\",\n",
|
||||
" \"date_unixtime\": \"1692821483\",\n",
|
||||
" \"from\": \"Jiminy Cricket\",\n",
|
||||
" \"from_id\": \"user123450513\",\n",
|
||||
" \"text\": \"You better trust your conscience\",\n",
|
||||
" \"text_entities\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"plain\",\n",
|
||||
" \"text\": \"You better trust your conscience\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"id\": 2,\n",
|
||||
" \"type\": \"message\",\n",
|
||||
" \"date\": \"2023-08-23T13:13:20\",\n",
|
||||
" \"date_unixtime\": \"1692821600\",\n",
|
||||
" \"from\": \"Batman & Robin\",\n",
|
||||
" \"from_id\": \"user6565661032\",\n",
|
||||
" \"text\": \"What did you just say?\",\n",
|
||||
" \"text_entities\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"plain\",\n",
|
||||
" \"text\": \"What did you just say?\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7cc109f4-4c92-4cd3-8143-c322776c3f03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Create the Chat Loader\n",
|
||||
"\n",
|
||||
"All that's required is the file path. You can optionally specify the user name that maps to an ai message as well an configure whether to merge message runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "111f7767-573c-42d4-86f0-bd766bbaa071",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.telegram import TelegramChatLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a4226efa-2640-4990-a20c-6861d1887329",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TelegramChatLoader(\n",
|
||||
" path=\"./telegram_conversation.json\", \n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "71699fb7-7815-4c89-8d96-30e8fada6923",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Load messages\n",
|
||||
"\n",
|
||||
"The `load()` (or `lazy_load`) methods return a list of \"ChatSessions\" that currently just contain a list of messages per loaded conversation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "81121efb-c875-4a77-ad1e-fe26b3d7e812",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"from langchain.chat_loaders.base import ChatSession\n",
|
||||
"from langchain.chat_loaders.utils import (\n",
|
||||
" map_ai_messages,\n",
|
||||
" merge_chat_runs,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"raw_messages = loader.lazy_load()\n",
|
||||
"# Merge consecutive messages from the same sender into a single message\n",
|
||||
"merged_messages = merge_chat_runs(raw_messages)\n",
|
||||
"# Convert messages from \"Jiminy Cricket\" to AI messages\n",
|
||||
"messages: List[ChatSession] = list(map_ai_messages(merged_messages, sender=\"Jiminy Cricket\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b9089c05-7375-41ca-a2f9-672a845314e4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Next Steps\n",
|
||||
"\n",
|
||||
"You can then use these messages how you see fit, such as finetuning a model, few-shot example selection, or directly make predictions for the next message "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "637a6f5d-6944-4722-9361-a76ef5e9dd2a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"I said, \"You better trust your conscience.\""
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI()\n",
|
||||
"\n",
|
||||
"for chunk in llm.stream(messages[0]['messages']):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
77
docs/extras/integrations/chat_loaders/twitter.ipynb
Normal file
77
docs/extras/integrations/chat_loaders/twitter.ipynb
Normal file
@@ -0,0 +1,77 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d86853d2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Twitter (via Apify)\n",
|
||||
"\n",
|
||||
"This notebook shows how to load chat messages from Twitter to finetune on. We do this by utilizing Apify. \n",
|
||||
"\n",
|
||||
"First, use Apify to export tweets. An example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e5034b4e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from langchain.schema import AIMessage\n",
|
||||
"from langchain.adapters.openai import convert_message_to_dict"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "8bf0fb93",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('example_data/dataset_twitter-scraper_2023-08-23_22-13-19-740.json') as f:\n",
|
||||
" data = json.load(f)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "468124fa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Filter out tweets that reference other tweets, because it's a bit weird\n",
|
||||
"tweets = [d[\"full_text\"] for d in data if \"t.co\" not in d['full_text']]\n",
|
||||
"# Create them as AI messages\n",
|
||||
"messages = [AIMessage(content=t) for t in tweets]\n",
|
||||
"# Add in a system message at the start\n",
|
||||
"# TODO: we could try to extract the subject from the tweets, and put that in the system message.\n",
|
||||
"system_message = {\"role\": \"system\", \"content\": \"write a tweet\"}\n",
|
||||
"data = [[system_message, convert_message_to_dict(m)] for m in messages]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
204
docs/extras/integrations/chat_loaders/whatsapp.ipynb
Normal file
204
docs/extras/integrations/chat_loaders/whatsapp.ipynb
Normal file
@@ -0,0 +1,204 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "735455a6-f82e-4252-b545-27385ef883f4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# WhatsApp\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the WhatsApp chat loader. This class helps map exported Telegram conversations to LangChain chat messages.\n",
|
||||
"\n",
|
||||
"The process has three steps:\n",
|
||||
"1. Export the chat conversations to computer\n",
|
||||
"2. Create the `WhatsAppChatLoader` with the file path pointed to the json file or directory of JSON files\n",
|
||||
"3. Call `loader.load()` (or `loader.lazy_load()`) to perform the conversion.\n",
|
||||
"\n",
|
||||
"## 1. Creat message dump\n",
|
||||
"\n",
|
||||
"To make the export of your WhatsApp conversation(s), complete the following steps:\n",
|
||||
"\n",
|
||||
"1. Open the target conversation\n",
|
||||
"2. Click the three dots in the top right corner and select \"More\".\n",
|
||||
"3. Then select \"Export chat\" and choose \"Without media\".\n",
|
||||
"\n",
|
||||
"An example of the data format for each converation is below: "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "285f2044-0f58-4b92-addb-9f8569076734",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Writing whatsapp_chat.txt\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%writefile whatsapp_chat.txt\n",
|
||||
"[8/15/23, 9:12:33 AM] Dr. Feather: Messages and calls are end-to-end encrypted. No one outside of this chat, not even WhatsApp, can read or listen to them.\n",
|
||||
"[8/15/23, 9:12:43 AM] Dr. Feather: I spotted a rare Hyacinth Macaw yesterday in the Amazon Rainforest. Such a magnificent creature!\n",
|
||||
"[8/15/23, 9:12:48 AM] Dr. Feather: image omitted\n",
|
||||
"[8/15/23, 9:13:15 AM] Jungle Jane: That's stunning! Were you able to observe its behavior?\n",
|
||||
"[8/15/23, 9:13:23 AM] Dr. Feather: image omitted\n",
|
||||
"[8/15/23, 9:14:02 AM] Dr. Feather: Yes, it seemed quite social with other macaws. They're known for their playful nature.\n",
|
||||
"[8/15/23, 9:14:15 AM] Jungle Jane: How's the research going on parrot communication?\n",
|
||||
"[8/15/23, 9:14:30 AM] Dr. Feather: image omitted\n",
|
||||
"[8/15/23, 9:14:50 AM] Dr. Feather: It's progressing well. We're learning so much about how they use sound and color to communicate.\n",
|
||||
"[8/15/23, 9:15:10 AM] Jungle Jane: That's fascinating! Can't wait to read your paper on it.\n",
|
||||
"[8/15/23, 9:15:20 AM] Dr. Feather: Thank you! I'll send you a draft soon.\n",
|
||||
"[8/15/23, 9:25:16 PM] Jungle Jane: Looking forward to it! Keep up the great work."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7cc109f4-4c92-4cd3-8143-c322776c3f03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Create the Chat Loader\n",
|
||||
"\n",
|
||||
"The WhatsAppChatLoader accepts the resulting zip file, unzipped directory, or the path to any of the chat `.txt` files therein.\n",
|
||||
"\n",
|
||||
"Provide that as well as the user name you want to take on the role of \"AI\" when finetuning."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "111f7767-573c-42d4-86f0-bd766bbaa071",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_loaders.whatsapp import WhatsAppChatLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "a4226efa-2640-4990-a20c-6861d1887329",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = WhatsAppChatLoader(\n",
|
||||
" path=\"./whatsapp_chat.txt\", \n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "71699fb7-7815-4c89-8d96-30e8fada6923",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Load messages\n",
|
||||
"\n",
|
||||
"The `load()` (or `lazy_load`) methods return a list of \"ChatSessions\" that currently store the list of messages per loaded conversation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "81121efb-c875-4a77-ad1e-fe26b3d7e812",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'messages': [AIMessage(content='I spotted a rare Hyacinth Macaw yesterday in the Amazon Rainforest. Such a magnificent creature!', additional_kwargs={'sender': 'Dr. Feather', 'events': [{'message_time': '8/15/23, 9:12:43 AM'}]}, example=False),\n",
|
||||
" HumanMessage(content=\"That's stunning! Were you able to observe its behavior?\", additional_kwargs={'sender': 'Jungle Jane', 'events': [{'message_time': '8/15/23, 9:13:15 AM'}]}, example=False),\n",
|
||||
" AIMessage(content=\"Yes, it seemed quite social with other macaws. They're known for their playful nature.\", additional_kwargs={'sender': 'Dr. Feather', 'events': [{'message_time': '8/15/23, 9:14:02 AM'}]}, example=False),\n",
|
||||
" HumanMessage(content=\"How's the research going on parrot communication?\", additional_kwargs={'sender': 'Jungle Jane', 'events': [{'message_time': '8/15/23, 9:14:15 AM'}]}, example=False),\n",
|
||||
" AIMessage(content=\"It's progressing well. We're learning so much about how they use sound and color to communicate.\", additional_kwargs={'sender': 'Dr. Feather', 'events': [{'message_time': '8/15/23, 9:14:50 AM'}]}, example=False),\n",
|
||||
" HumanMessage(content=\"That's fascinating! Can't wait to read your paper on it.\", additional_kwargs={'sender': 'Jungle Jane', 'events': [{'message_time': '8/15/23, 9:15:10 AM'}]}, example=False),\n",
|
||||
" AIMessage(content=\"Thank you! I'll send you a draft soon.\", additional_kwargs={'sender': 'Dr. Feather', 'events': [{'message_time': '8/15/23, 9:15:20 AM'}]}, example=False),\n",
|
||||
" HumanMessage(content='Looking forward to it! Keep up the great work.', additional_kwargs={'sender': 'Jungle Jane', 'events': [{'message_time': '8/15/23, 9:25:16 PM'}]}, example=False)]}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"from langchain.chat_loaders.base import ChatSession\n",
|
||||
"from langchain.chat_loaders.utils import (\n",
|
||||
" map_ai_messages,\n",
|
||||
" merge_chat_runs,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"raw_messages = loader.lazy_load()\n",
|
||||
"# Merge consecutive messages from the same sender into a single message\n",
|
||||
"merged_messages = merge_chat_runs(raw_messages)\n",
|
||||
"# Convert messages from \"Dr. Feather\" to AI messages\n",
|
||||
"messages: List[ChatSession] = list(map_ai_messages(merged_messages, sender=\"Dr. Feather\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b9089c05-7375-41ca-a2f9-672a845314e4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Next Steps\n",
|
||||
"\n",
|
||||
"You can then use these messages how you see fit, such as finetuning a model, few-shot example selection, or directly make predictions for the next message."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "637a6f5d-6944-4722-9361-a76ef5e9dd2a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Thank you for the encouragement! I'll do my best to continue studying and sharing fascinating insights about parrot communication."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI()\n",
|
||||
"\n",
|
||||
"for chunk in llm.stream(messages[0]['messages']):\n",
|
||||
" print(chunk.content, end=\"\", flush=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "16156643-cfbd-444f-b4ae-198eb44f0267",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -8,9 +8,9 @@
|
||||
"# Etherscan Loader\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"The Etherscan loader use etherscan api to load transacactions histories under specific account on Ethereum Mainnet.\n",
|
||||
"The Etherscan loader use etherscan api to load transaction histories under specific account on Ethereum Mainnet.\n",
|
||||
"\n",
|
||||
"You will need a Etherscan api key to proceed. The free api key has 5 calls per seconds quota.\n",
|
||||
"You will need a Etherscan api key to proceed. The free api key has 5 calls per second quota.\n",
|
||||
"\n",
|
||||
"The loader supports the following six functinalities:\n",
|
||||
"* Retrieve normal transactions under specific account on Ethereum Mainet\n",
|
||||
|
||||
@@ -90,7 +90,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Lorem ipsum dolor sit amet.', lookup_str='', metadata={'source': '/var/folders/y6/8_bzdg295ld6s1_97_12m4lr0000gn/T/tmpujbkzf_l/fake.docx'}, lookup_index=0)]"
|
||||
"[Document(page_content='Lorem ipsum dolor sit amet.', lookup_str='', metadata={'source': 's3://testing-hwc/fake.docx'}, lookup_index=0)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
|
||||
@@ -53,7 +53,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Lorem ipsum dolor sit amet.', lookup_str='', metadata={'source': '/var/folders/y6/8_bzdg295ld6s1_97_12m4lr0000gn/T/tmpxvave6wl/fake.docx'}, lookup_index=0)]"
|
||||
"[Document(page_content='Lorem ipsum dolor sit amet.', lookup_str='', metadata={'source': 's3://testing-hwc/fake.docx'}, lookup_index=0)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
@@ -96,3 +96,4 @@
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
|
||||
@@ -106,15 +106,39 @@
|
||||
" - `column_data_type`\n",
|
||||
" - `column_title`\n",
|
||||
" - `column_description`\n",
|
||||
" - `column_values`"
|
||||
" - `column_values`\n",
|
||||
" - `cube_data_obj_type`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"> page_content='Users View City, None' metadata={'table_name': 'users_view', 'column_name': 'users_view.city', 'column_data_type': 'string', 'column_title': 'Users View City', 'column_description': 'None', 'column_member_type': 'dimension', 'column_values': ['Austin', 'Chicago', 'Los Angeles', 'Mountain View', 'New York', 'Palo Alto', 'San Francisco', 'Seattle']}"
|
||||
"# Given string containing page content\n",
|
||||
"page_content = 'Users View City, None'\n",
|
||||
"\n",
|
||||
"# Given dictionary containing metadata\n",
|
||||
"metadata = {\n",
|
||||
" 'table_name': 'users_view',\n",
|
||||
" 'column_name': 'users_view.city',\n",
|
||||
" 'column_data_type': 'string',\n",
|
||||
" 'column_title': 'Users View City',\n",
|
||||
" 'column_description': 'None',\n",
|
||||
" 'column_member_type': 'dimension',\n",
|
||||
" 'column_values': [\n",
|
||||
" 'Austin',\n",
|
||||
" 'Chicago',\n",
|
||||
" 'Los Angeles',\n",
|
||||
" 'Mountain View',\n",
|
||||
" 'New York',\n",
|
||||
" 'Palo Alto',\n",
|
||||
" 'San Francisco',\n",
|
||||
" 'Seattle'\n",
|
||||
" ],\n",
|
||||
" 'cube_data_obj_type': 'view'\n",
|
||||
"}"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -38,7 +38,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "878928a6-a5ae-4f74-b351-64e3b01733fe",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -50,7 +50,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "2216c83f-68e4-4d2f-8ea2-5878fb18bbe7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -66,7 +66,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "8f3b6aa0-b45d-4e37-8c50-5bebe70fdb9d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -93,7 +93,7 @@
|
||||
"source": [
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" folder_id=\"1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5\",\n",
|
||||
" file_types=[\"document\", \"sheet\"]\n",
|
||||
" file_types=[\"document\", \"sheet\"],\n",
|
||||
" recursive=False\n",
|
||||
")"
|
||||
]
|
||||
@@ -110,7 +110,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"id": "94207e39",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -121,7 +121,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "a15fbee0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -136,7 +136,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "98410bda",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -146,21 +146,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"id": "e3e72221",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='\\n \\n \\n Team\\n Location\\n Stanley Cups\\n \\n \\n Blues\\n STL\\n 1\\n \\n \\n Flyers\\n PHI\\n 2\\n \\n \\n Maple Leafs\\n TOR\\n 13\\n \\n \\n', metadata={'filetype': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'page_number': 1, 'page_name': 'Stanley Cups', 'text_as_html': '<table border=\"1\" class=\"dataframe\">\\n <tbody>\\n <tr>\\n <td>Team</td>\\n <td>Location</td>\\n <td>Stanley Cups</td>\\n </tr>\\n <tr>\\n <td>Blues</td>\\n <td>STL</td>\\n <td>1</td>\\n </tr>\\n <tr>\\n <td>Flyers</td>\\n <td>PHI</td>\\n <td>2</td>\\n </tr>\\n <tr>\\n <td>Maple Leafs</td>\\n <td>TOR</td>\\n <td>13</td>\\n </tr>\\n </tbody>\\n</table>', 'category': 'Table', 'source': 'https://drive.google.com/file/d/1aA6L2AR3g0CR-PW03HEZZo4NaVlKpaP7/view'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs[0]"
|
||||
]
|
||||
@@ -175,7 +164,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"id": "0e2d093f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -190,7 +179,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"id": "b35ddcc6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -200,21 +189,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"id": "3cc141e0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(page_content='\\n \\n \\n Team\\n Location\\n Stanley Cups\\n \\n \\n Blues\\n STL\\n 1\\n \\n \\n Flyers\\n PHI\\n 2\\n \\n \\n Maple Leafs\\n TOR\\n 13\\n \\n \\n', metadata={'filetype': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'page_number': 1, 'page_name': 'Stanley Cups', 'text_as_html': '<table border=\"1\" class=\"dataframe\">\\n <tbody>\\n <tr>\\n <td>Team</td>\\n <td>Location</td>\\n <td>Stanley Cups</td>\\n </tr>\\n <tr>\\n <td>Blues</td>\\n <td>STL</td>\\n <td>1</td>\\n </tr>\\n <tr>\\n <td>Flyers</td>\\n <td>PHI</td>\\n <td>2</td>\\n </tr>\\n <tr>\\n <td>Maple Leafs</td>\\n <td>TOR</td>\\n <td>13</td>\\n </tr>\\n </tbody>\\n</table>', 'category': 'Table', 'source': 'https://drive.google.com/file/d/1aA6L2AR3g0CR-PW03HEZZo4NaVlKpaP7/view'})"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs[0]"
|
||||
]
|
||||
@@ -226,6 +204,309 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "83ac576b-48c9-4aad-a35e-e978ea32f746",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Extended usage\n",
|
||||
"An external component can manage the complexity of Google Drive : `langchain-googledrive`\n",
|
||||
"It's compatible with the ̀`langchain.document_loaders.GoogleDriveLoader` and can be used\n",
|
||||
"in its place.\n",
|
||||
"\n",
|
||||
"To be compatible with containers, the authentication uses an environment variable ̀GOOGLE_ACCOUNT_FILE` to credential file (for user or service)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b94f7119-bc1e-4ca3-907f-9d81e837ac59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install langchain-googledrive"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c4c7474e-49cb-48a1-b3a0-77fba8e2dd70",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"folder_id='root'\n",
|
||||
"#folder_id='1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8357f7f1-e2b1-41ef-8e38-48fcc3897dba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Use the advanced version.\n",
|
||||
"from langchain_googledrive.document_loaders import GoogleDriveLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "16ab9d3d-1782-4cb9-ab56-d87edbb25a18",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" folder_id=folder_id,\n",
|
||||
" recursive=False,\n",
|
||||
" num_results=2, # Maximum number of file to load\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ebac43aa-dd64-4964-802a-a90172415fd1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"By default, all files with these mime-type can be converted to `Document`.\n",
|
||||
"- text/text\n",
|
||||
"- text/plain\n",
|
||||
"- text/html\n",
|
||||
"- text/csv\n",
|
||||
"- text/markdown\n",
|
||||
"- image/png\n",
|
||||
"- image/jpeg\n",
|
||||
"- application/epub+zip\n",
|
||||
"- application/pdf\n",
|
||||
"- application/rtf\n",
|
||||
"- application/vnd.google-apps.document (GDoc)\n",
|
||||
"- application/vnd.google-apps.presentation (GSlide)\n",
|
||||
"- application/vnd.google-apps.spreadsheet (GSheet)\n",
|
||||
"- application/vnd.google.colaboratory (Notebook colab)\n",
|
||||
"- application/vnd.openxmlformats-officedocument.presentationml.presentation (PPTX)\n",
|
||||
"- application/vnd.openxmlformats-officedocument.wordprocessingml.document (DOCX)\n",
|
||||
"\n",
|
||||
"It's possible to update or customize this. See the documentation of `GDriveLoader`.\n",
|
||||
"\n",
|
||||
"But, the corresponding packages must be installed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b4560f35-a37d-44e2-be0b-adaa245b3b3d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install unstructured"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6cb08da3-27df-46de-b60e-583bb7e31af4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for doc in loader.load():\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cd13d7d1-db7a-498d-ac98-76ccd9ad9019",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customize the search pattern\n",
|
||||
"\n",
|
||||
"All parameter compatible with Google [`list()`](https://developers.google.com/drive/api/v3/reference/files/list)\n",
|
||||
"API can be set.\n",
|
||||
"\n",
|
||||
"To specify the new pattern of the Google request, you can use a `PromptTemplate()`.\n",
|
||||
"The variables for the prompt can be set with `kwargs` in the constructor.\n",
|
||||
"Some pre-formated request are proposed (use `{query}`, `{folder_id}` and/or `{mime_type}`):\n",
|
||||
"\n",
|
||||
"You can customize the criteria to select the files. A set of predefined filter are proposed:\n",
|
||||
"| template | description |\n",
|
||||
"| -------------------------------------- | --------------------------------------------------------------------- |\n",
|
||||
"| gdrive-all-in-folder | Return all compatible files from a `folder_id` |\n",
|
||||
"| gdrive-query | Search `query` in all drives |\n",
|
||||
"| gdrive-by-name | Search file with name `query` |\n",
|
||||
"| gdrive-query-in-folder | Search `query` in `folder_id` (and sub-folders if `recursive=true`) |\n",
|
||||
"| gdrive-mime-type | Search a specific `mime_type` |\n",
|
||||
"| gdrive-mime-type-in-folder | Search a specific `mime_type` in `folder_id` |\n",
|
||||
"| gdrive-query-with-mime-type | Search `query` with a specific `mime_type` |\n",
|
||||
"| gdrive-query-with-mime-type-and-folder | Search `query` with a specific `mime_type` and in `folder_id` |\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "81348d59-8fd6-45d4-9de3-5df5cff5c7e2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" folder_id=folder_id,\n",
|
||||
" recursive=False,\n",
|
||||
" template=\"gdrive-query\", # Default template to use\n",
|
||||
" query=\"machine learning\",\n",
|
||||
" num_results=2, # Maximum number of file to load\n",
|
||||
" supportsAllDrives=False, # GDrive `list()` parameter\n",
|
||||
")\n",
|
||||
"for doc in loader.load():\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "46c6ba5b-d4b1-4f0f-9801-5c1314021605",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can customize your pattern."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5a5a323b-8d96-46b7-b46a-fd69bd2c8e04",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts.prompt import PromptTemplate\n",
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" folder_id=folder_id,\n",
|
||||
" recursive=False,\n",
|
||||
" template=PromptTemplate(\n",
|
||||
" input_variables=[\"query\", \"query_name\"],\n",
|
||||
" template=\"fullText contains '{query}' and name contains '{query_name}' and trashed=false\",\n",
|
||||
" ), # Default template to use\n",
|
||||
" query=\"machine learning\",\n",
|
||||
" query_name=\"ML\", \n",
|
||||
" num_results=2, # Maximum number of file to load\n",
|
||||
")\n",
|
||||
"for doc in loader.load():\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "375bb465-8f69-407b-94bd-ffa3718ef500",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Modes for GSlide and GSheet\n",
|
||||
"The parameter mode accepts different values:\n",
|
||||
"\n",
|
||||
"- \"document\": return the body of each document\n",
|
||||
"- \"snippets\": return the description of each file (set in metadata of Google Drive files).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"The conversion can manage in Markdown format:\n",
|
||||
"- bullet\n",
|
||||
"- link\n",
|
||||
"- table\n",
|
||||
"- titles\n",
|
||||
"\n",
|
||||
"The parameter `gslide_mode` accepts different values:\n",
|
||||
"\n",
|
||||
"- \"single\" : one document with <PAGE BREAK>\n",
|
||||
"- \"slide\" : one document by slide\n",
|
||||
"- \"elements\" : one document for each elements.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7493d7b0-0600-49af-8107-7f4597c92de7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" template=\"gdrive-mime-type\",\n",
|
||||
" mime_type=\"application/vnd.google-apps.presentation\", # Only GSlide files\n",
|
||||
" gslide_mode=\"slide\",\n",
|
||||
" num_results=2, # Maximum number of file to load\n",
|
||||
")\n",
|
||||
"for doc in loader.load():\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9bf338fb-02d7-452f-8679-c50419b13464",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The parameter `gsheet_mode` accepts different values:\n",
|
||||
"- `\"single\"`: Generate one document by line\n",
|
||||
"- `\"elements\"` : one document with markdown array and <PAGE BREAK> tags."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "469f5af0-67db-4f15-8aee-88cde480729b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" template=\"gdrive-mime-type\",\n",
|
||||
" mime_type=\"application/vnd.google-apps.spreadsheet\", # Only GSheet files\n",
|
||||
" gsheet_mode=\"elements\",\n",
|
||||
" num_results=2, # Maximum number of file to load\n",
|
||||
")\n",
|
||||
"for doc in loader.load():\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09acb864-e919-4add-9e06-deba6f7f0cd8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced usage\n",
|
||||
"All Google File have a 'description' in the metadata. This field can be used to memorize a summary of the document or others indexed tags (See method `lazy_update_description_with_summary()`).\n",
|
||||
"\n",
|
||||
"If you use the `mode=\"snippet\"`, only the description will be used for the body. Else, the `metadata['summary']` has the field.\n",
|
||||
"\n",
|
||||
"Sometime, a specific filter can be used to extract some information from the filename, to select some files with specific criteria. You can use a filter.\n",
|
||||
"\n",
|
||||
"Sometimes, many documents are returned. It's not necessary to have all documents in memory at the same time. You can use the lazy versions of methods, to get one document at a time. It's better to use a complex query in place of a recursive search. For each folder, a query must be applied if you activate `recursive=True`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a5e9c8eb-a266-4ae6-a760-d7826a0aa7c5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"loader = GoogleDriveLoader(\n",
|
||||
" gdrive_api_file=os.environ[\"GOOGLE_ACCOUNT_FILE\"],\n",
|
||||
" num_results=2,\n",
|
||||
" template=\"gdrive-query\",\n",
|
||||
" filter=lambda search, file: \"#test\" not in file.get('description',''),\n",
|
||||
" query='machine learning',\n",
|
||||
" supportsAllDrives=False,\n",
|
||||
" )\n",
|
||||
"for doc in loader.load():\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "51efa73a-4e2d-4f9c-abaf-6c9bde2ff69d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -244,7 +525,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
File diff suppressed because one or more lines are too long
283
docs/extras/integrations/document_transformers/docai.ipynb
Normal file
283
docs/extras/integrations/document_transformers/docai.ipynb
Normal file
@@ -0,0 +1,283 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "48438efb-9f0d-473b-a91c-9f1e29c2539d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.blob_loaders import Blob\n",
|
||||
"from langchain.document_loaders.parsers import DocAIParser"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f95ac25b-f025-40c3-95b8-77919fc4da7f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"DocAI is a Google Cloud platform to transform unstructured data from documents into structured data, making it easier to understand, analyze, and consume. You can read more about it: https://cloud.google.com/document-ai/docs/overview "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "51946817-798c-4d11-abd6-db2ae53a0270",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First, you need to set up a GCS bucket and create your own OCR processor as described here: https://cloud.google.com/document-ai/docs/create-processor\n",
|
||||
"The GCS_OUTPUT_PATH should be a path to a folder on GCS (starting with `gs://`) and a processor name should look like `projects/PROJECT_NUMBER/locations/LOCATION/processors/PROCESSOR_ID`. You can get it either programmatically or copy from the `Prediction endpoint` section of the `Processor details` tab in the Google Cloud Console."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "ac85f7f3-3ef6-41d5-920a-b55f2939c202",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"PROJECT = \"PUT_SOMETHING_HERE\"\n",
|
||||
"GCS_OUTPUT_PATH = \"PUT_SOMETHING_HERE\"\n",
|
||||
"PROCESSOR_NAME = \"PUT_SOMETHING_HERE\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fad2bcca-1c0e-4888-b82d-15823ba57e60",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's create a parser:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "dcc0c65a-86c5-448d-8b21-2e564b1903b7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"parser = DocAIParser(location=\"us\", processor_name=PROCESSOR_NAME, gcs_output_path=GCS_OUTPUT_PATH)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b8b5a3ff-650a-4ad3-a73a-395f86e4c9e1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's go and parse an Alphabet's take from here: https://abc.xyz/assets/a7/5b/9e5ae0364b12b4c883f3cf748226/goog-exhibit-99-1-q1-2023-19.pdf. Copy it to your GCS bucket first, and adjust the path below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "373cc18e-a311-4c8d-8180-47e4ade1d2ad",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"blob = Blob(path=\"gs://vertex-pgt/examples/goog-exhibit-99-1-q1-2023-19.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "6ef84fad-2981-456d-a6b4-3a6a1a46d511",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = list(parser.lazy_parse(blob))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3f8e4ee1-e07d-4c29-a120-4d56aae91859",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We'll get one document per page, 11 in total:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "343919f5-35d2-47fb-9790-de464649ebdf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"11\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(len(docs))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b104ae56-011b-4abe-ac07-e999c69494c5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can run end-to-end parsing of a blob one-by-one. If you have many documents, it might be a better approach to batch them together and maybe even detach parsing from handling the results of parsing."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "9ecc1b99-5cef-47b0-a125-dbb2c41d2224",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"['projects/543079149601/locations/us/operations/16447136779727347991']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"operations = parser.docai_parse([blob])\n",
|
||||
"print([op.operation.name for op in operations])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a2d24d63-c2c7-454c-9df3-2a9cf51309a6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can check whether operations are finished:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "ab11efb0-e514-4f44-9ba5-3d638a59c9e6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"parser.is_running(operations)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "602ca0bc-080a-4a4e-a413-0e705aeab189",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And when they're finished, you can parse the results:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "ec1e6041-bc10-47d4-ba64-d09055c14f27",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"False"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"parser.is_running(operations)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "95d89da4-1c8a-413d-8473-ddd4a39375a5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"DocAIParsingResults(source_path='gs://vertex-pgt/examples/goog-exhibit-99-1-q1-2023-19.pdf', parsed_path='gs://vertex-pgt/test/run1/16447136779727347991/0')\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = parser.get_results(operations)\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "87e5b606-1679-46c7-9577-4cf9bc93a752",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And now we can finally generate Documents from parsed results:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "08e8878d-889b-41ad-9500-2f772d38782f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = list(parser.parse_from_results(results))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "c59525fb-448d-444b-8f12-c4aea791e19b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"11\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(len(docs))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"environment": {
|
||||
"kernel": "python3",
|
||||
"name": "common-cpu.m109",
|
||||
"type": "gcloud",
|
||||
"uri": "gcr.io/deeplearning-platform-release/base-cpu:m109"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -221,9 +221,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.15"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -4,9 +4,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AzureML Online Endpoint\n",
|
||||
"# Azure ML\n",
|
||||
"\n",
|
||||
"[AzureML](https://azure.microsoft.com/en-us/products/machine-learning/) is a platform used to build, train, and deploy machine learning models. Users can explore the types of models to deploy in the Model Catalog, which provides Azure Foundation Models and OpenAI Models. Azure Foundation Models include various open-source models and popular Hugging Face models. Users can also import models of their liking into AzureML.\n",
|
||||
"[Azure ML](https://azure.microsoft.com/en-us/products/machine-learning/) is a platform used to build, train, and deploy machine learning models. Users can explore the types of models to deploy in the Model Catalog, which provides Azure Foundation Models and OpenAI Models. Azure Foundation Models include various open-source models and popular Hugging Face models. Users can also import models of their liking into AzureML.\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use an LLM hosted on an `AzureML online endpoint`"
|
||||
]
|
||||
@@ -236,9 +236,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -30,7 +30,45 @@
|
||||
"```python\n",
|
||||
"import os\n",
|
||||
"os.environ[\"OPENAI_API_TYPE\"] = \"azure\"\n",
|
||||
"...\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"## Azure Active Directory Authentication\n",
|
||||
"There are two ways you can authenticate to Azure OpenAI:\n",
|
||||
"- API Key\n",
|
||||
"- Azure Active Directory (AAD)\n",
|
||||
"\n",
|
||||
"Using the API key is the easiest way to get started. You can find your API key in the Azure portal under your Azure OpenAI resource.\n",
|
||||
"\n",
|
||||
"However, if you have complex security requirements - you may want to use Azure Active Directory. You can find more information on how to use AAD with Azure OpenAI [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/managed-identity).\n",
|
||||
"\n",
|
||||
"If you are developing locally, you will need to have the Azure CLI installed and be logged in. You can install the Azure CLI [here](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli). Then, run `az login` to log in.\n",
|
||||
"\n",
|
||||
"Add a role an Azure role assignment `Cognitive Services OpenAI User` scoped to your Azure OpenAI resource. This will allow you to get a token from AAD to use with Azure OpenAI. You can grant this role assignment to a user, group, service principal, or managed identity. For more information about Azure OpenAI RBAC roles see [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/role-based-access-control).\n",
|
||||
"\n",
|
||||
"To use AAD in Python with LangChain, install the `azure-identity` package. Then, set `OPENAI_API_TYPE` to `azure_ad`. Next, use the `DefaultAzureCredential` class to get a token from AAD by calling `get_token` as shown below. Finally, set the `OPENAI_API_KEY` environment variable to the token value.\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"import os\n",
|
||||
"from azure.identity import DefaultAzureCredential\n",
|
||||
"\n",
|
||||
"# Get the Azure Credential\n",
|
||||
"credential = DefaultAzureCredential()\n",
|
||||
"\n",
|
||||
"# Set the API type to `azure_ad`\n",
|
||||
"os.environ[\"OPENAI_API_TYPE\"] = \"azure_ad\"\n",
|
||||
"# Set the API_KEY to the token from the Azure credential\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = credential.get_token(\"https://cognitiveservices.azure.com/.default\").token\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"The `DefaultAzureCredential` class is an easy way to get started with AAD authentication. You can also customize the credential chain if necessary. In the example shown below, we first try Managed Identity, then fall back to the Azure CLI. This is useful if you are running your code in Azure, but want to develop locally.\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"from azure.identity import ChainedTokenCredential, ManagedIdentityCredential, AzureCliCredential\n",
|
||||
"\n",
|
||||
"credential = ChainedTokenCredential(\n",
|
||||
" ManagedIdentityCredential(),\n",
|
||||
" AzureCliCredential()\n",
|
||||
")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"## Deployments\n",
|
||||
@@ -144,7 +182,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[1mAzureOpenAI\u001b[0m\n",
|
||||
"\u001B[1mAzureOpenAI\u001B[0m\n",
|
||||
"Params: {'deployment_name': 'text-davinci-002', 'model_name': 'text-davinci-002', 'temperature': 0.7, 'max_tokens': 256, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'best_of': 1}\n"
|
||||
]
|
||||
}
|
||||
@@ -178,7 +216,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
File diff suppressed because one or more lines are too long
@@ -1,17 +1,28 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Cloud Platform Vertex AI PaLM \n",
|
||||
"# Google Vertex AI PaLM \n",
|
||||
"\n",
|
||||
"Note: This is seperate from the Google PaLM integration, it exposes [Vertex AI PaLM API](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/overview) on Google Cloud. \n",
|
||||
"**Note:** This is seperate from the `Google PaLM` integration, it exposes [Vertex AI PaLM API](https://cloud.google.com/vertex-ai/docs/generative-ai/learn/overview) on `Google Cloud`. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting up"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"By default, Google Cloud [does not use](https://cloud.google.com/vertex-ai/docs/generative-ai/data-governance#foundation_model_development) customer data to train its foundation models as part of Google Cloud's AI/ML Privacy Commitment. More details about how Google processes data can also be found in [Google's Customer Data Processing Addendum (CDPA)](https://cloud.google.com/terms/data-processing-addendum).\n",
|
||||
"\n",
|
||||
"By default, Google Cloud [does not use](https://cloud.google.com/vertex-ai/docs/generative-ai/data-governance#foundation_model_development) Customer Data to train its foundation models as part of Google Cloud`s AI/ML Privacy Commitment. More details about how Google processes data can also be found in [Google's Customer Data Processing Addendum (CDPA)](https://cloud.google.com/terms/data-processing-addendum).\n",
|
||||
"\n",
|
||||
"To use Vertex AI PaLM you must have the `google-cloud-aiplatform` Python package installed and either:\n",
|
||||
"To use `Vertex AI PaLM` you must have the `google-cloud-aiplatform` Python package installed and either:\n",
|
||||
"- Have credentials configured for your environment (gcloud, workload identity, etc...)\n",
|
||||
"- Store the path to a service account JSON file as the GOOGLE_APPLICATION_CREDENTIALS environment variable\n",
|
||||
"\n",
|
||||
@@ -19,8 +30,7 @@
|
||||
"\n",
|
||||
"For more information, see: \n",
|
||||
"- https://cloud.google.com/docs/authentication/application-default-credentials#GAC\n",
|
||||
"- https://googleapis.dev/python/google-auth/latest/reference/google.auth.html#module-google.auth\n",
|
||||
"\n"
|
||||
"- https://googleapis.dev/python/google-auth/latest/reference/google.auth.html#module-google.auth"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -40,7 +50,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import VertexAI\n",
|
||||
"from langchain.llms import VertexAI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Question-answering example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate, LLMChain"
|
||||
]
|
||||
},
|
||||
@@ -98,13 +123,21 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can now leverage the Codey API for code generation within Vertex AI. The model names are:\n",
|
||||
"- code-bison: for code suggestion\n",
|
||||
"- code-gecko: for code completion"
|
||||
"## Code generation example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can now leverage the `Codey API` for code generation within `Vertex AI`. \n",
|
||||
"\n",
|
||||
"The model names are:\n",
|
||||
"- `code-bison`: for code suggestion\n",
|
||||
"- `code-gecko`: for code completion"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -173,6 +206,68 @@
|
||||
"\n",
|
||||
"llm_chain.run(question)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using models deployed on Vertex Model Garden"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Vertex Model Garden [exposes](https://cloud.google.com/vertex-ai/docs/start/explore-models) open-sourced models that can be deployed and served on Vertex AI. If you have successfully deployed a model from Vertex Model Garden, you can find a corresponding Vertex AI [endpoint](https://cloud.google.com/vertex-ai/docs/general/deployment#what_happens_when_you_deploy_a_model) in the console or via API."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import VertexAIModelGarden"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_oss = VertexAIModelGarden(\n",
|
||||
" project=\"YOUR PROJECT\",\n",
|
||||
" endpoint_id=\"YOUR ENDPOINT_ID\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_oss(\"What is the meaning of life?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also use it as a chain:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm_oss_chain = LLMChain(prompt=prompt, llm=llm_oss(\"What is the meaning of life?\")\n",
|
||||
")\n",
|
||||
"llm_oss_chain.run(question)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -191,7 +286,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -7,9 +7,20 @@
|
||||
"# Llama.cpp\n",
|
||||
"\n",
|
||||
"[llama-cpp-python](https://github.com/abetlen/llama-cpp-python) is a Python binding for [llama.cpp](https://github.com/ggerganov/llama.cpp). \n",
|
||||
"It supports [several LLMs](https://github.com/ggerganov/llama.cpp).\n",
|
||||
"\n",
|
||||
"This notebook goes over how to run `llama-cpp-python` within LangChain."
|
||||
"It supports inference for [many LLMs](https://github.com/ggerganov/llama.cpp), which can be accessed on [HuggingFace](https://huggingface.co/TheBloke).\n",
|
||||
"\n",
|
||||
"This notebook goes over how to run `llama-cpp-python` within LangChain.\n",
|
||||
"\n",
|
||||
"**Note: new versions of `llama-cpp-python` use GGUF model files (see [here](https://github.com/abetlen/llama-cpp-python/pull/633)).**\n",
|
||||
"\n",
|
||||
"This is a breaking change.\n",
|
||||
" \n",
|
||||
"To convert existing GGML models to GGUF you can run the following in [llama.cpp](https://github.com/ggerganov/llama.cpp):\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"python ./convert-llama-ggmlv3-to-gguf.py --eps 1e-5 --input models/openorca-platypus2-13b.ggmlv3.q4_0.bin --output models/openorca-platypus2-13b.gguf.q4_0.bin\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -19,7 +30,7 @@
|
||||
"## Installation\n",
|
||||
"\n",
|
||||
"There are different options on how to install the llama-cpp package: \n",
|
||||
"- only CPU usage\n",
|
||||
"- CPU usage\n",
|
||||
"- CPU + GPU (using one of many BLAS backends)\n",
|
||||
"- Metal GPU (MacOS with Apple Silicon Chip) \n",
|
||||
"\n",
|
||||
@@ -171,7 +182,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
@@ -207,15 +218,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Callbacks support token-wise streaming\n",
|
||||
"callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])\n",
|
||||
"# Verbose is required to pass to the callback manager"
|
||||
"callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -240,12 +250,12 @@
|
||||
"source": [
|
||||
"# Make sure the model path is correct for your system!\n",
|
||||
"llm = LlamaCpp(\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama/llama-2-7b-ggml/llama-2-7b-chat.ggmlv3.q4_0.bin\",\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
|
||||
" temperature=0.75,\n",
|
||||
" max_tokens=2000,\n",
|
||||
" top_p=1,\n",
|
||||
" callback_manager=callback_manager,\n",
|
||||
" verbose=True,\n",
|
||||
" callback_manager=callback_manager, \n",
|
||||
" verbose=True, # Verbose is required to pass to the callback manager\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -375,7 +385,6 @@
|
||||
],
|
||||
"source": [
|
||||
"question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
|
||||
"\n",
|
||||
"llm_chain.run(question)"
|
||||
]
|
||||
},
|
||||
@@ -397,100 +406,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
|
||||
"llama_model_load_internal: format = ggjt v3 (latest)\n",
|
||||
"llama_model_load_internal: n_vocab = 32000\n",
|
||||
"llama_model_load_internal: n_ctx = 512\n",
|
||||
"llama_model_load_internal: n_embd = 5120\n",
|
||||
"llama_model_load_internal: n_mult = 256\n",
|
||||
"llama_model_load_internal: n_head = 40\n",
|
||||
"llama_model_load_internal: n_head_kv = 40\n",
|
||||
"llama_model_load_internal: n_layer = 40\n",
|
||||
"llama_model_load_internal: n_rot = 128\n",
|
||||
"llama_model_load_internal: n_gqa = 1\n",
|
||||
"llama_model_load_internal: rnorm_eps = 5.0e-06\n",
|
||||
"llama_model_load_internal: n_ff = 13824\n",
|
||||
"llama_model_load_internal: freq_base = 10000.0\n",
|
||||
"llama_model_load_internal: freq_scale = 1\n",
|
||||
"llama_model_load_internal: ftype = 2 (mostly Q4_0)\n",
|
||||
"llama_model_load_internal: model size = 13B\n",
|
||||
"llama_model_load_internal: ggml ctx size = 0.11 MB\n",
|
||||
"llama_model_load_internal: mem required = 6983.72 MB (+ 400.00 MB per state)\n",
|
||||
"llama_new_context_with_model: kv self size = 400.00 MB\n",
|
||||
"ggml_metal_init: allocating\n",
|
||||
"ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
|
||||
"ggml_metal_init: loaded kernel_add 0x1405ed6b0\n",
|
||||
"ggml_metal_init: loaded kernel_add_row 0x1405eee00\n",
|
||||
"ggml_metal_init: loaded kernel_mul 0x1405ee650\n",
|
||||
"ggml_metal_init: loaded kernel_mul_row 0x1405eda20\n",
|
||||
"ggml_metal_init: loaded kernel_scale 0x121fc1d80\n",
|
||||
"ggml_metal_init: loaded kernel_silu 0x121fc1fe0\n",
|
||||
"ggml_metal_init: loaded kernel_relu 0x121fc2240\n",
|
||||
"ggml_metal_init: loaded kernel_gelu 0x121fc24e0\n",
|
||||
"ggml_metal_init: loaded kernel_soft_max 0x121fc2950\n",
|
||||
"ggml_metal_init: loaded kernel_diag_mask_inf 0x121fc2d60\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_f16 0x121fc3160\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_0 0x121fc3a20\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_1 0x121fc4170\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q2_K 0x121fc4890\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q3_K 0x121fc5010\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_K 0x121fc5750\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q5_K 0x121fc5e90\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q6_K 0x121fc65d0\n",
|
||||
"ggml_metal_init: loaded kernel_rms_norm 0x121fc6d20\n",
|
||||
"ggml_metal_init: loaded kernel_norm 0x121fc7460\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x121fc7dd0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x121fc8610\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x121fc8e50\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x1405edc80\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x1405efdc0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x140306f30\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x1403073d0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x140307aa0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_f16_f32 0x140307f80\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q4_0_f32 0x140308460\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q4_1_f32 0x140308940\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q2_K_f32 0x140308e20\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q3_K_f32 0x140309300\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q4_K_f32 0x1403097e0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q5_K_f32 0x140309cc0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q6_K_f32 0x14030a1a0\n",
|
||||
"ggml_metal_init: loaded kernel_rope 0x14030a400\n",
|
||||
"ggml_metal_init: loaded kernel_alibi_f32 0x14030aa00\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f32_f16 0x14030afd0\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f32_f32 0x14030b5a0\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f16_f16 0x14030bb70\n",
|
||||
"ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
|
||||
"ggml_metal_init: hasUnifiedMemory = true\n",
|
||||
"ggml_metal_init: maxTransferRate = built-in GPU\n",
|
||||
"llama_new_context_with_model: compute buffer total size = 91.35 MB\n",
|
||||
"llama_new_context_with_model: max tensor size = 87.89 MB\n",
|
||||
"ggml_metal_add_buffer: allocated 'data ' buffer, size = 6984.06 MB, ( 6984.50 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1.36 MB, ( 6985.86 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'kv ' buffer, size = 402.00 MB, ( 7387.86 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'alloc ' buffer, size = 90.02 MB, ( 7477.88 / 21845.34)\n",
|
||||
"AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.\n",
|
||||
"n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.\n",
|
||||
"\n",
|
||||
"# Make sure the model path is correct for your system!\n",
|
||||
"llm = LlamaCpp(\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
|
||||
" n_gpu_layers=n_gpu_layers,\n",
|
||||
" n_batch=n_batch,\n",
|
||||
" callback_manager=callback_manager,\n",
|
||||
" verbose=True,\n",
|
||||
" verbose=True, # Verbose is required to pass to the callback manager\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -505,11 +434,13 @@
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"Justin Bieber was born on March 1, 1994. The Super Bowl is played at the end of the NFL season which runs from September to February.\n",
|
||||
"1. Identify Justin Bieber's birth date: Justin Bieber was born on March 1, 1994.\n",
|
||||
"\n",
|
||||
"In 1994, the NFL season ended with Super Bowl XXVIII which was played on January 28th, 1994.\n",
|
||||
"2. Find the Super Bowl winner of that year: The NFL season of 1993 with the Super Bowl being played in January or of 1994.\n",
|
||||
"\n",
|
||||
"So, there was no Super Bowl in the year Justin Bieber was born. The Super Bowl has only been around since 1967 and is played annually between the champions of the National Football Conference (NFC) and the American Football Conference (AFC)."
|
||||
"3. Determine which team won the game: The Dallas Cowboys faced the Buffalo Bills in Super Bowl XXVII on January 31, 1993 (as the year is mis-labelled due to a error). The Dallas Cowboys won this matchup.\n",
|
||||
"\n",
|
||||
"So, Justin Bieber was born when the Dallas Cowboys were the reigning NFL Super Bowl."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -517,17 +448,17 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"llama_print_timings: load time = 427.90 ms\n",
|
||||
"llama_print_timings: sample time = 98.36 ms / 133 runs ( 0.74 ms per token, 1352.18 tokens per second)\n",
|
||||
"llama_print_timings: prompt eval time = 427.83 ms / 45 tokens ( 9.51 ms per token, 105.18 tokens per second)\n",
|
||||
"llama_print_timings: eval time = 3687.12 ms / 132 runs ( 27.93 ms per token, 35.80 tokens per second)\n",
|
||||
"llama_print_timings: total time = 4401.84 ms\n"
|
||||
"llama_print_timings: load time = 427.63 ms\n",
|
||||
"llama_print_timings: sample time = 115.85 ms / 164 runs ( 0.71 ms per token, 1415.67 tokens per second)\n",
|
||||
"llama_print_timings: prompt eval time = 427.53 ms / 45 tokens ( 9.50 ms per token, 105.26 tokens per second)\n",
|
||||
"llama_print_timings: eval time = 4526.53 ms / 163 runs ( 27.77 ms per token, 36.01 tokens per second)\n",
|
||||
"llama_print_timings: total time = 5293.77 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\n\\nJustin Bieber was born on March 1, 1994. The Super Bowl is played at the end of the NFL season which runs from September to February.\\n\\nIn 1994, the NFL season ended with Super Bowl XXVIII which was played on January 28th, 1994.\\n\\nSo, there was no Super Bowl in the year Justin Bieber was born. The Super Bowl has only been around since 1967 and is played annually between the champions of the National Football Conference (NFC) and the American Football Conference (AFC).'"
|
||||
"\"\\n\\n1. Identify Justin Bieber's birth date: Justin Bieber was born on March 1, 1994.\\n\\n2. Find the Super Bowl winner of that year: The NFL season of 1993 with the Super Bowl being played in January or of 1994.\\n\\n3. Determine which team won the game: The Dallas Cowboys faced the Buffalo Bills in Super Bowl XXVII on January 31, 1993 (as the year is mis-labelled due to a error). The Dallas Cowboys won this matchup.\\n\\nSo, Justin Bieber was born when the Dallas Cowboys were the reigning NFL Super Bowl.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
@@ -537,9 +468,7 @@
|
||||
],
|
||||
"source": [
|
||||
"llm_chain = LLMChain(prompt=prompt, llm=llm)\n",
|
||||
"\n",
|
||||
"question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
|
||||
"\n",
|
||||
"llm_chain.run(question)"
|
||||
]
|
||||
},
|
||||
@@ -563,101 +492,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
|
||||
"llama_model_load_internal: format = ggjt v3 (latest)\n",
|
||||
"llama_model_load_internal: n_vocab = 32000\n",
|
||||
"llama_model_load_internal: n_ctx = 512\n",
|
||||
"llama_model_load_internal: n_embd = 5120\n",
|
||||
"llama_model_load_internal: n_mult = 256\n",
|
||||
"llama_model_load_internal: n_head = 40\n",
|
||||
"llama_model_load_internal: n_head_kv = 40\n",
|
||||
"llama_model_load_internal: n_layer = 40\n",
|
||||
"llama_model_load_internal: n_rot = 128\n",
|
||||
"llama_model_load_internal: n_gqa = 1\n",
|
||||
"llama_model_load_internal: rnorm_eps = 5.0e-06\n",
|
||||
"llama_model_load_internal: n_ff = 13824\n",
|
||||
"llama_model_load_internal: freq_base = 10000.0\n",
|
||||
"llama_model_load_internal: freq_scale = 1\n",
|
||||
"llama_model_load_internal: ftype = 2 (mostly Q4_0)\n",
|
||||
"llama_model_load_internal: model size = 13B\n",
|
||||
"llama_model_load_internal: ggml ctx size = 0.11 MB\n",
|
||||
"llama_model_load_internal: mem required = 6983.72 MB (+ 400.00 MB per state)\n",
|
||||
"llama_new_context_with_model: kv self size = 400.00 MB\n",
|
||||
"ggml_metal_init: allocating\n",
|
||||
"ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
|
||||
"ggml_metal_init: loaded kernel_add 0x113b42480\n",
|
||||
"ggml_metal_init: loaded kernel_add_row 0x113b44210\n",
|
||||
"ggml_metal_init: loaded kernel_mul 0x113b43a80\n",
|
||||
"ggml_metal_init: loaded kernel_mul_row 0x113b44880\n",
|
||||
"ggml_metal_init: loaded kernel_scale 0x113b45010\n",
|
||||
"ggml_metal_init: loaded kernel_silu 0x113b45650\n",
|
||||
"ggml_metal_init: loaded kernel_relu 0x113b427f0\n",
|
||||
"ggml_metal_init: loaded kernel_gelu 0x113b46300\n",
|
||||
"ggml_metal_init: loaded kernel_soft_max 0x113b46980\n",
|
||||
"ggml_metal_init: loaded kernel_diag_mask_inf 0x113b46e20\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_f16 0x113b47860\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_0 0x113b48010\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_1 0x113b48880\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q2_K 0x113b48f70\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q3_K 0x113b49e00\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_K 0x113b4a530\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q5_K 0x113b4ac70\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q6_K 0x113b4b3b0\n",
|
||||
"ggml_metal_init: loaded kernel_rms_norm 0x113b4bb00\n",
|
||||
"ggml_metal_init: loaded kernel_norm 0x113b4c1a0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x113b4cba0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x113b4d360\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x113b4dba0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x113b4e560\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x113b4ed10\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x113b4f580\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x113b4fdc0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x113b50740\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_f16_f32 0x113b51250\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q4_0_f32 0x113b51a80\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q4_1_f32 0x113b522b0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q2_K_f32 0x113b52ae0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q3_K_f32 0x113b53310\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q4_K_f32 0x113b53b40\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q5_K_f32 0x113b54370\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q6_K_f32 0x113b54ba0\n",
|
||||
"ggml_metal_init: loaded kernel_rope 0x113b551a0\n",
|
||||
"ggml_metal_init: loaded kernel_alibi_f32 0x113b55b10\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f32_f16 0x113b56450\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f32_f32 0x113b56dc0\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f16_f16 0x113b576b0\n",
|
||||
"ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
|
||||
"ggml_metal_init: hasUnifiedMemory = true\n",
|
||||
"ggml_metal_init: maxTransferRate = built-in GPU\n",
|
||||
"llama_new_context_with_model: compute buffer total size = 91.35 MB\n",
|
||||
"llama_new_context_with_model: max tensor size = 87.89 MB\n",
|
||||
"ggml_metal_add_buffer: allocated 'data ' buffer, size = 6984.06 MB, ( 6984.50 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1.36 MB, ( 6985.86 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'kv ' buffer, size = 402.00 MB, ( 7387.86 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'alloc ' buffer, size = 90.02 MB, ( 7477.88 / 21845.34)AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"n_gpu_layers = 1 # Metal set to 1 is enough.\n",
|
||||
"n_batch = 512 # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
|
||||
"\n",
|
||||
"# Make sure the model path is correct for your system!\n",
|
||||
"llm = LlamaCpp(\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
|
||||
" n_gpu_layers=n_gpu_layers,\n",
|
||||
" n_batch=n_batch,\n",
|
||||
" f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls\n",
|
||||
" callback_manager=callback_manager,\n",
|
||||
" verbose=True,\n",
|
||||
" verbose=True, # Verbose is required to pass to the callback manager\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -682,147 +530,32 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Grammers\n",
|
||||
"### Grammars\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"We can specify [grammers](https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md) to constrain model outputs.\n",
|
||||
"We can specify [grammars](https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md) to constrain model outputs.\n",
|
||||
"\n",
|
||||
"Supply the path to the specifed `json.gbnf` file."
|
||||
"This will sample tokens according to the grammar.\n",
|
||||
" \n",
|
||||
"For example, supply the path to the specifed `json.gbnf` file in order to produce JSON."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
|
||||
"llama_model_load_internal: format = ggjt v3 (latest)\n",
|
||||
"llama_model_load_internal: n_vocab = 32000\n",
|
||||
"llama_model_load_internal: n_ctx = 512\n",
|
||||
"llama_model_load_internal: n_embd = 5120\n",
|
||||
"llama_model_load_internal: n_mult = 256\n",
|
||||
"llama_model_load_internal: n_head = 40\n",
|
||||
"llama_model_load_internal: n_head_kv = 40\n",
|
||||
"llama_model_load_internal: n_layer = 40\n",
|
||||
"llama_model_load_internal: n_rot = 128\n",
|
||||
"llama_model_load_internal: n_gqa = 1\n",
|
||||
"llama_model_load_internal: rnorm_eps = 5.0e-06\n",
|
||||
"llama_model_load_internal: n_ff = 13824\n",
|
||||
"llama_model_load_internal: freq_base = 10000.0\n",
|
||||
"llama_model_load_internal: freq_scale = 1\n",
|
||||
"llama_model_load_internal: ftype = 2 (mostly Q4_0)\n",
|
||||
"llama_model_load_internal: model size = 13B\n",
|
||||
"llama_model_load_internal: ggml ctx size = 0.11 MB\n",
|
||||
"llama_model_load_internal: mem required = 6983.72 MB (+ 400.00 MB per state)\n",
|
||||
"llama_new_context_with_model: kv self size = 400.00 MB\n",
|
||||
"ggml_metal_init: allocating\n",
|
||||
"ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
|
||||
"ggml_metal_init: loaded kernel_add 0x1516fb530\n",
|
||||
"ggml_metal_init: loaded kernel_add_row 0x1516fb790\n",
|
||||
"ggml_metal_init: loaded kernel_mul 0x1516fb9f0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_row 0x1516fbc50\n",
|
||||
"ggml_metal_init: loaded kernel_scale 0x1516fbeb0\n",
|
||||
"ggml_metal_init: loaded kernel_silu 0x1516fc110\n",
|
||||
"ggml_metal_init: loaded kernel_relu 0x1516fc370\n",
|
||||
"ggml_metal_init: loaded kernel_gelu 0x1516fc5d0\n",
|
||||
"ggml_metal_init: loaded kernel_soft_max 0x1516fc830\n",
|
||||
"ggml_metal_init: loaded kernel_diag_mask_inf 0x1516fca90\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_f16 0x1516fccf0\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_0 0x1516fcf50\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_1 0x1516fd1b0\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q2_K 0x1516fd410\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q3_K 0x1516fd670\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_K 0x1516fd8d0\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q5_K 0x1516fdb30\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q6_K 0x1516fdd90\n",
|
||||
"ggml_metal_init: loaded kernel_rms_norm 0x1516fdff0\n",
|
||||
"ggml_metal_init: loaded kernel_norm 0x1516fe250\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x1516fe4b0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x1516fe710\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x1516fe970\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x1516febd0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x1516fee30\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x1516ff090\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x1516ff2f0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x1516ff550\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_f16_f32 0x1516ff7b0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q4_0_f32 0x121fce650\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q4_1_f32 0x121fcdce0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q2_K_f32 0x121fceab0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q3_K_f32 0x121fced10\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q4_K_f32 0x121fcef70\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q5_K_f32 0x121fcf1d0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q6_K_f32 0x121fcf430\n",
|
||||
"ggml_metal_init: loaded kernel_rope 0x121fcf690\n",
|
||||
"ggml_metal_init: loaded kernel_alibi_f32 0x121fcf8f0\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f32_f16 0x121fcfb50\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f32_f32 0x121fcfdb0\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f16_f16 0x121fd0010\n",
|
||||
"ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
|
||||
"ggml_metal_init: hasUnifiedMemory = true\n",
|
||||
"ggml_metal_init: maxTransferRate = built-in GPU\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"root ::= object \n",
|
||||
"object ::= [{] ws object_11 [}] \n",
|
||||
"value ::= object | array | string | number | boolean | [n] [u] [l] [l] \n",
|
||||
"array ::= [[] ws array_15 []] \n",
|
||||
"string ::= [\"] string_18 [\"] ws \n",
|
||||
"number ::= number_19 number_20 ws \n",
|
||||
"boolean ::= boolean_21 ws \n",
|
||||
"ws ::= ws_23 \n",
|
||||
"object_8 ::= string [:] ws value object_10 \n",
|
||||
"object_9 ::= [,] ws string [:] ws value \n",
|
||||
"object_10 ::= object_9 object_10 | \n",
|
||||
"object_11 ::= object_8 | \n",
|
||||
"array_12 ::= value array_14 \n",
|
||||
"array_13 ::= [,] ws value \n",
|
||||
"array_14 ::= array_13 array_14 | \n",
|
||||
"array_15 ::= array_12 | \n",
|
||||
"string_16 ::= [^\"\\] | [\\] string_17 \n",
|
||||
"string_17 ::= [\"\\/bfnrt] | [u] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] \n",
|
||||
"string_18 ::= string_16 string_18 | \n",
|
||||
"number_19 ::= [-] | \n",
|
||||
"number_20 ::= [0-9] number_20 | [0-9] \n",
|
||||
"boolean_21 ::= [t] [r] [u] [e] | [f] [a] [l] [s] [e] \n",
|
||||
"ws_22 ::= [ <U+0009><U+000A>] ws \n",
|
||||
"ws_23 ::= ws_22 | \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"llama_new_context_with_model: compute buffer total size = 91.35 MB\n",
|
||||
"llama_new_context_with_model: max tensor size = 87.89 MB\n",
|
||||
"ggml_metal_add_buffer: allocated 'data ' buffer, size = 6984.06 MB, (14468.72 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1.36 MB, (14470.08 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'kv ' buffer, size = 402.00 MB, (14872.08 / 21845.34)\n",
|
||||
"ggml_metal_add_buffer: allocated 'alloc ' buffer, size = 90.02 MB, (14962.09 / 21845.34)\n",
|
||||
"AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
|
||||
"from_string grammar:\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"n_gpu_layers = 1 \n",
|
||||
"n_batch = 512 \n",
|
||||
"n_gpu_layers = 1 # Metal set to 1 is enough.\n",
|
||||
"n_batch = 512 # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
|
||||
"# Make sure the model path is correct for your system!\n",
|
||||
"llm = LlamaCpp(\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
|
||||
" n_gpu_layers=n_gpu_layers,\n",
|
||||
" n_batch=n_batch,\n",
|
||||
" f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls\n",
|
||||
" callback_manager=callback_manager,\n",
|
||||
" verbose=True,\n",
|
||||
" verbose=True, # Verbose is required to pass to the callback manager\n",
|
||||
" grammar_path=\"/Users/rlm/Desktop/Code/langchain-main/langchain/libs/langchain/langchain/llms/grammars/json.gbnf\",\n",
|
||||
")"
|
||||
]
|
||||
@@ -832,23 +565,29 @@
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error in LangChainTracer.on_llm_start callback: ctypes objects containing pointers cannot be pickled\n",
|
||||
"Exception ignored in: <function LlamaGrammar.__del__ at 0x1402b15e0>\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/llama_grammar.py\", line 46, in __del__\n",
|
||||
" if self.grammar is not None:\n",
|
||||
"AttributeError: 'LlamaGrammar' object has no attribute 'grammar'\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\"name\": \"John Doe\", \"age\": 30, \"gender\": \"male\"}"
|
||||
"{\n",
|
||||
" \"name\": \"John Doe\",\n",
|
||||
" \"age\": 34,\n",
|
||||
" \"\": {\n",
|
||||
" \"title\": \"Software Developer\",\n",
|
||||
" \"company\": \"Google\"\n",
|
||||
" },\n",
|
||||
" \"interests\": [\n",
|
||||
" \"Sports\",\n",
|
||||
" \"Music\",\n",
|
||||
" \"Cooking\"\n",
|
||||
" ],\n",
|
||||
" \"address\": {\n",
|
||||
" \"street_number\": 123,\n",
|
||||
" \"street_name\": \"Oak Street\",\n",
|
||||
" \"city\": \"Mountain View\",\n",
|
||||
" \"state\": \"California\",\n",
|
||||
" \"postal_code\": 94040\n",
|
||||
" }}"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -856,162 +595,36 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"llama_print_timings: load time = 317.62 ms\n",
|
||||
"llama_print_timings: sample time = 141.83 ms / 22 runs ( 6.45 ms per token, 155.11 tokens per second)\n",
|
||||
"llama_print_timings: prompt eval time = 316.89 ms / 9 tokens ( 35.21 ms per token, 28.40 tokens per second)\n",
|
||||
"llama_print_timings: eval time = 575.93 ms / 21 runs ( 27.43 ms per token, 36.46 tokens per second)\n",
|
||||
"llama_print_timings: total time = 1087.31 ms\n",
|
||||
"Error in LangChainTracer.on_llm_end callback: ctypes objects containing pointers cannot be pickled\n",
|
||||
"Exception ignored in: <function LlamaGrammar.__del__ at 0x1402b15e0>\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/llama_grammar.py\", line 46, in __del__\n",
|
||||
" if self.grammar is not None:\n",
|
||||
"AttributeError: 'LlamaGrammar' object has no attribute 'grammar'\n"
|
||||
"llama_print_timings: load time = 357.51 ms\n",
|
||||
"llama_print_timings: sample time = 1213.30 ms / 144 runs ( 8.43 ms per token, 118.68 tokens per second)\n",
|
||||
"llama_print_timings: prompt eval time = 356.78 ms / 9 tokens ( 39.64 ms per token, 25.23 tokens per second)\n",
|
||||
"llama_print_timings: eval time = 3947.16 ms / 143 runs ( 27.60 ms per token, 36.23 tokens per second)\n",
|
||||
"llama_print_timings: total time = 5846.21 ms\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%capture captured --no-stdout\n",
|
||||
"result=llm(\"Describe a person in JSON format:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'John Doe'"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"eval(result)[\"name\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also try `list.gbnf`."
|
||||
"We can also supply `list.gbnf` to return a list."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
|
||||
"llama_model_load_internal: format = ggjt v3 (latest)\n",
|
||||
"llama_model_load_internal: n_vocab = 32000\n",
|
||||
"llama_model_load_internal: n_ctx = 512\n",
|
||||
"llama_model_load_internal: n_embd = 5120\n",
|
||||
"llama_model_load_internal: n_mult = 256\n",
|
||||
"llama_model_load_internal: n_head = 40\n",
|
||||
"llama_model_load_internal: n_head_kv = 40\n",
|
||||
"llama_model_load_internal: n_layer = 40\n",
|
||||
"llama_model_load_internal: n_rot = 128\n",
|
||||
"llama_model_load_internal: n_gqa = 1\n",
|
||||
"llama_model_load_internal: rnorm_eps = 5.0e-06\n",
|
||||
"llama_model_load_internal: n_ff = 13824\n",
|
||||
"llama_model_load_internal: freq_base = 10000.0\n",
|
||||
"llama_model_load_internal: freq_scale = 1\n",
|
||||
"llama_model_load_internal: ftype = 2 (mostly Q4_0)\n",
|
||||
"llama_model_load_internal: model size = 13B\n",
|
||||
"llama_model_load_internal: ggml ctx size = 0.11 MB\n",
|
||||
"llama_model_load_internal: mem required = 6983.72 MB (+ 400.00 MB per state)\n",
|
||||
"llama_new_context_with_model: kv self size = 400.00 MB\n",
|
||||
"ggml_metal_init: allocating\n",
|
||||
"ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
|
||||
"ggml_metal_init: loaded kernel_add 0x141323050\n",
|
||||
"ggml_metal_init: loaded kernel_add_row 0x141322950\n",
|
||||
"ggml_metal_init: loaded kernel_mul 0x141364b10\n",
|
||||
"ggml_metal_init: loaded kernel_mul_row 0x141364d70\n",
|
||||
"ggml_metal_init: loaded kernel_scale 0x141364fd0\n",
|
||||
"ggml_metal_init: loaded kernel_silu 0x141365230\n",
|
||||
"ggml_metal_init: loaded kernel_relu 0x141365490\n",
|
||||
"ggml_metal_init: loaded kernel_gelu 0x1413656f0\n",
|
||||
"ggml_metal_init: loaded kernel_soft_max 0x141365950\n",
|
||||
"ggml_metal_init: loaded kernel_diag_mask_inf 0x141365bb0\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_f16 0x141365e10\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_0 0x141366070\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_1 0x1413662d0\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q2_K 0x141366530\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q3_K 0x141366790\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q4_K 0x1413669f0\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q5_K 0x141366c50\n",
|
||||
"ggml_metal_init: loaded kernel_get_rows_q6_K 0x141366eb0\n",
|
||||
"ggml_metal_init: loaded kernel_rms_norm 0x141367110\n",
|
||||
"ggml_metal_init: loaded kernel_norm 0x141367370\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_f16_f32 0x1413675d0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_0_f32 0x141367830\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_1_f32 0x141367a90\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q2_K_f32 0x141317e70\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q3_K_f32 0x141327540\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q4_K_f32 0x1413277a0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q5_K_f32 0x141327a00\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mat_q6_K_f32 0x141327c60\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_f16_f32 0x141327ec0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q4_0_f32 0x141328120\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q4_1_f32 0x141328380\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q2_K_f32 0x1413285e0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q3_K_f32 0x141328840\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q4_K_f32 0x141328aa0\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q5_K_f32 0x141328d00\n",
|
||||
"ggml_metal_init: loaded kernel_mul_mm_q6_K_f32 0x141328f60\n",
|
||||
"ggml_metal_init: loaded kernel_rope 0x1413291c0\n",
|
||||
"ggml_metal_init: loaded kernel_alibi_f32 0x141329420\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f32_f16 0x141329680\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f32_f32 0x1413298e0\n",
|
||||
"ggml_metal_init: loaded kernel_cpy_f16_f16 0x141329b40\n",
|
||||
"ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
|
||||
"ggml_metal_init: hasUnifiedMemory = true\n",
|
||||
"ggml_metal_init: maxTransferRate = built-in GPU\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"root ::= [[] items []] EOF \n",
|
||||
"items ::= item items_5 \n",
|
||||
"EOF ::= [<U+000A>] \n",
|
||||
"item ::= word \n",
|
||||
"items_4 ::= [,] [ ] item \n",
|
||||
"items_5 ::= items_4 items_5 | \n",
|
||||
"word ::= word_7 \n",
|
||||
"word_7 ::= [a-zA-Z] word_7 | [a-zA-Z] \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"llama_new_context_with_model: compute buffer total size = 91.35 MB\n",
|
||||
"llama_new_context_with_model: max tensor size = 87.89 MB\n",
|
||||
"ggml_metal_add_buffer: allocated 'data ' buffer, size = 6984.06 MB, (21946.34 / 21845.34), warning: current allocated size is greater than the recommended max working set size\n",
|
||||
"ggml_metal_add_buffer: allocated 'eval ' buffer, size = 1.36 MB, (21947.70 / 21845.34), warning: current allocated size is greater than the recommended max working set size\n",
|
||||
"ggml_metal_add_buffer: allocated 'kv ' buffer, size = 402.00 MB, (22349.70 / 21845.34), warning: current allocated size is greater than the recommended max working set size\n",
|
||||
"ggml_metal_add_buffer: allocated 'alloc ' buffer, size = 90.02 MB, (22439.72 / 21845.34), warning: current allocated size is greater than the recommended max working set size\n",
|
||||
"AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
|
||||
"from_string grammar:\n",
|
||||
"\n",
|
||||
"ggml_metal_free: deallocating\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"n_gpu_layers = 1 \n",
|
||||
"n_batch = 512\n",
|
||||
"llm = LlamaCpp(\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
|
||||
" model_path=\"/Users/rlm/Desktop/Code/llama.cpp/models/openorca-platypus2-13b.gguf.q4_0.bin\",\n",
|
||||
" n_gpu_layers=n_gpu_layers,\n",
|
||||
" n_batch=n_batch,\n",
|
||||
" f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls\n",
|
||||
@@ -1023,27 +636,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error in LangChainTracer.on_llm_start callback: ctypes objects containing pointers cannot be pickled\n",
|
||||
"Exception ignored in: <function LlamaGrammar.__del__ at 0x1402b15e0>\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/llama_grammar.py\", line 46, in __del__\n",
|
||||
" if self.grammar is not None:\n",
|
||||
"AttributeError: 'LlamaGrammar' object has no attribute 'grammar'\n",
|
||||
"Llama.generate: prefix-match hit\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[supanova]\n"
|
||||
"[\"The Catcher in the Rye\", \"Wuthering Heights\", \"Anna Karenina\"]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1051,30 +651,18 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"llama_print_timings: load time = 314.72 ms\n",
|
||||
"llama_print_timings: sample time = 39.22 ms / 7 runs ( 5.60 ms per token, 178.49 tokens per second)\n",
|
||||
"llama_print_timings: prompt eval time = 401.10 ms / 33 tokens ( 12.15 ms per token, 82.27 tokens per second)\n",
|
||||
"llama_print_timings: eval time = 165.29 ms / 6 runs ( 27.55 ms per token, 36.30 tokens per second)\n",
|
||||
"llama_print_timings: total time = 623.48 ms\n",
|
||||
"Error in LangChainTracer.on_llm_end callback: ctypes objects containing pointers cannot be pickled\n",
|
||||
"Exception ignored in: <function LlamaGrammar.__del__ at 0x1402b15e0>\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/Users/rlm/miniforge3/envs/llama2/lib/python3.9/site-packages/llama_cpp/llama_grammar.py\", line 46, in __del__\n",
|
||||
" if self.grammar is not None:\n",
|
||||
"AttributeError: 'LlamaGrammar' object has no attribute 'grammar'\n"
|
||||
"llama_print_timings: load time = 322.34 ms\n",
|
||||
"llama_print_timings: sample time = 232.60 ms / 26 runs ( 8.95 ms per token, 111.78 tokens per second)\n",
|
||||
"llama_print_timings: prompt eval time = 321.90 ms / 11 tokens ( 29.26 ms per token, 34.17 tokens per second)\n",
|
||||
"llama_print_timings: eval time = 680.82 ms / 25 runs ( 27.23 ms per token, 36.72 tokens per second)\n",
|
||||
"llama_print_timings: total time = 1295.27 ms\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result=llm(\"Provide a list of items in the format: '[item 1, item 2, item 3]' for things to bring to a party.\")"
|
||||
"%%capture captured --no-stdout\n",
|
||||
"result=llm(\"List of top-3 my favourite books:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -5,8 +5,9 @@
|
||||
"id": "f36d938c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Caching integrations\n",
|
||||
"This notebook covers how to cache results of individual LLM calls."
|
||||
"# LLM Caching integrations\n",
|
||||
"\n",
|
||||
"This notebook covers how to cache results of individual LLM calls using different caches."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -26,9 +27,12 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b50f0598",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## In Memory Cache"
|
||||
"## `In Memory` Cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -108,9 +112,12 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4bf59c12",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## SQLite Cache"
|
||||
"## `SQLite` Cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -203,9 +210,12 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "278ad7ae",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Redis Cache"
|
||||
"## `Redis` Cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -385,9 +395,12 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "684eab55",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## GPTCache\n",
|
||||
"## `GPTCache`\n",
|
||||
"\n",
|
||||
"We can use [GPTCache](https://github.com/zilliztech/GPTCache) for exact match caching OR to cache results based on semantic similarity\n",
|
||||
"\n",
|
||||
@@ -614,9 +627,12 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "726fe754",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Momento Cache\n",
|
||||
"## `Momento` Cache\n",
|
||||
"Use [Momento](/docs/ecosystem/integrations/momento.html) to cache prompts and responses.\n",
|
||||
"\n",
|
||||
"Requires momento to use, uncomment below to install:"
|
||||
@@ -723,9 +739,14 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "934943dc",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## SQLAlchemy Cache"
|
||||
"## `SQLAlchemy` Cache\n",
|
||||
"\n",
|
||||
"You can use `SQLAlchemyCache` to cache with any SQL database supported by `SQLAlchemy`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -735,8 +756,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You can use SQLAlchemyCache to cache with any SQL database supported by SQLAlchemy.\n",
|
||||
"\n",
|
||||
"# from langchain.cache import SQLAlchemyCache\n",
|
||||
"# from sqlalchemy import create_engine\n",
|
||||
"\n",
|
||||
@@ -795,7 +814,10 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0c69d84d",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Optional Caching\n",
|
||||
"You can also turn off caching for specific LLMs should you choose. In the example below, even though global caching is enabled, we turn it off for a specific LLM"
|
||||
@@ -874,7 +896,10 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5da41b77",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"jp-MarkdownHeadingCollapsed": true,
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"## Optional Caching in Chains\n",
|
||||
"You can also turn off caching for particular nodes in chains. Note that because of certain interfaces, its often easier to construct the chain first, and then edit the LLM afterwards.\n",
|
||||
@@ -1022,9 +1047,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "venv",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "venv"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -1036,7 +1061,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -63,7 +63,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = MosaicML(inject_instruction_format=True, model_kwargs={\"do_sample\": False})"
|
||||
"llm = MosaicML(inject_instruction_format=True, model_kwargs={\"max_new_tokens\": 128})"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# OctoAI Compute Service\n",
|
||||
"# OctoAI\n",
|
||||
"\n",
|
||||
">[OctoML](https://docs.octoai.cloud/docs) is a service with efficient compute. It enables users to integrate their choice of AI models into applications. The `OctoAI` compute service helps you run, tune, and scale AI applications.\n",
|
||||
"\n",
|
||||
"This example goes over how to use LangChain to interact with `OctoAI` [LLM endpoints](https://octoai.cloud/templates)\n",
|
||||
"## Environment setup\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To run our example app, there are four simple steps to take:\n",
|
||||
"\n",
|
||||
@@ -43,6 +46,13 @@
|
||||
"from langchain import PromptTemplate, LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
@@ -98,7 +108,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -112,9 +122,8 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "97697b63fdcee0a640856f91cb41326ad601964008c341809e43189d1cab1047"
|
||||
@@ -122,5 +131,5 @@
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -4,12 +4,12 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PromptGuard\n",
|
||||
"# OpaquePrompts\n",
|
||||
"\n",
|
||||
"[PromptGuard](https://promptguard.readthedocs.io/en/latest/) is a service that enables applications to leverage the power of language models without compromising user privacy. Designed for composability and ease of integration into existing applications and services, PromptGuard is consumable via a simple Python library as well as through LangChain. Perhaps more importantly, PromptGuard leverages the power of [confidential computing](https://en.wikipedia.org/wiki/Confidential_computing) to ensure that even the PromptGuard service itself cannot access the data it is protecting.\n",
|
||||
"[OpaquePrompts](https://opaqueprompts.readthedocs.io/en/latest/) is a service that enables applications to leverage the power of language models without compromising user privacy. Designed for composability and ease of integration into existing applications and services, OpaquePrompts is consumable via a simple Python library as well as through LangChain. Perhaps more importantly, OpaquePrompts leverages the power of [confidential computing](https://en.wikipedia.org/wiki/Confidential_computing) to ensure that even the OpaquePrompts service itself cannot access the data it is protecting.\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"This notebook goes over how to use LangChain to interact with `PromptGuard`."
|
||||
"This notebook goes over how to use LangChain to interact with `OpaquePrompts`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -18,15 +18,15 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# install the promptguard and langchain packages\n",
|
||||
"! pip install promptguard langchain"
|
||||
"# install the opaqueprompts and langchain packages\n",
|
||||
"! pip install opaqueprompts langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Accessing the PromptGuard API requires an API key, which you can get by creating an account on [the PromptGuard website](https://promptguard.opaque.co/). Once you have an account, you can find your API key on [the API Keys page](https://promptguard.opaque.co/api-keys)."
|
||||
"Accessing the OpaquePrompts API requires an API key, which you can get by creating an account on [the OpaquePrompts website](https://opaqueprompts.opaque.co/). Once you have an account, you can find your API key on [the API Keys page](https:opaqueprompts.opaque.co/api-keys)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -39,7 +39,7 @@
|
||||
"\n",
|
||||
"# Set API keys\n",
|
||||
"\n",
|
||||
"os.environ['PROMPTGUARD_API_KEY'] = \"<PROMPTGUARD_API_KEY>\"\n",
|
||||
"os.environ['OPAQUEPROMPTS_API_KEY'] = \"<OPAQUEPROMPTS_API_KEY>\"\n",
|
||||
"os.environ['OPENAI_API_KEY'] = \"<OPENAI_API_KEY>\""
|
||||
]
|
||||
},
|
||||
@@ -47,9 +47,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Use PromptGuard LLM Wrapper\n",
|
||||
"# Use OpaquePrompts LLM Wrapper\n",
|
||||
"\n",
|
||||
"Applying promptguard to your application could be as simple as wrapping your LLM using the PromptGuard class by replace `llm=OpenAI()` with `llm=PromptGuard(base_llm=OpenAI())`."
|
||||
"Applying OpaquePrompts to your application could be as simple as wrapping your LLM using the OpaquePrompts class by replace `llm=OpenAI()` with `llm=OpaquePrompts(base_llm=OpenAI())`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -64,7 +64,7 @@
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.memory import ConversationBufferWindowMemory\n",
|
||||
"\n",
|
||||
"from langchain.llms import PromptGuard\n",
|
||||
"from langchain.llms import OpaquePrompts\n",
|
||||
"\n",
|
||||
"langchain.verbose = True\n",
|
||||
"langchain.debug = True\n",
|
||||
@@ -106,7 +106,7 @@
|
||||
"\n",
|
||||
"chain = LLMChain(\n",
|
||||
" prompt=PromptTemplate.from_template(prompt_template),\n",
|
||||
" llm=PromptGuard(base_llm=OpenAI()),\n",
|
||||
" llm=OpaquePrompts(base_llm=OpenAI()),\n",
|
||||
" memory=ConversationBufferWindowMemory(k=2),\n",
|
||||
" verbose=True,\n",
|
||||
")\n",
|
||||
@@ -132,10 +132,10 @@
|
||||
"During our recent meeting on February 23, 2023, at 10:30 AM, John Doe provided me with his personal details. His email is johndoe@example.com and his contact number is 650-456-7890. He lives in New York City, USA, and belongs to the American nationality with Christian beliefs and a leaning towards the Democratic party. He mentioned that he recently made a transaction using his credit card 4111 1111 1111 1111 and transferred bitcoins to the wallet address 1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa. While discussing his European travels, he noted down his IBAN as GB29 NWBK 6016 1331 9268 19. Additionally, he provided his website as https://johndoeportfolio.com. John also discussed some of his US-specific details. He said his bank account number is 1234567890123456 and his drivers license is Y12345678. His ITIN is 987-65-4321, and he recently renewed his passport, the number for which is 123456789. He emphasized not to share his SSN, which is 669-45-6789. Furthermore, he mentioned that he accesses his work files remotely through the IP 192.168.1.1 and has a medical license number MED-123456.\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"PromptGuard will automatically detect the sensitive data and replace it with a placeholder. \n",
|
||||
"OpaquePrompts will automatically detect the sensitive data and replace it with a placeholder. \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"# Context after PromptGuard\n",
|
||||
"# Context after OpaquePrompts\n",
|
||||
"\n",
|
||||
"During our recent meeting on DATE_TIME_3, at DATE_TIME_2, PERSON_3 provided me with his personal details. His email is EMAIL_ADDRESS_1 and his contact number is PHONE_NUMBER_1. He lives in LOCATION_3, LOCATION_2, and belongs to the NRP_3 nationality with NRP_2 beliefs and a leaning towards the Democratic party. He mentioned that he recently made a transaction using his credit card CREDIT_CARD_1 and transferred bitcoins to the wallet address CRYPTO_1. While discussing his NRP_1 travels, he noted down his IBAN as IBAN_CODE_1. Additionally, he provided his website as URL_1. PERSON_2 also discussed some of his LOCATION_1-specific details. He said his bank account number is US_BANK_NUMBER_1 and his drivers license is US_DRIVER_LICENSE_2. His ITIN is US_ITIN_1, and he recently renewed his passport, the number for which is DATE_TIME_1. He emphasized not to share his SSN, which is US_SSN_1. Furthermore, he mentioned that he accesses his work files remotely through the IP IP_ADDRESS_1 and has a medical license number MED-US_DRIVER_LICENSE_1.\n",
|
||||
"```\n",
|
||||
@@ -151,7 +151,7 @@
|
||||
"Response is desanitized by replacing the placeholder with the original sensitive data.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"# desanitized LLM response from PromptGuard\n",
|
||||
"# desanitized LLM response from OpaquePrompts\n",
|
||||
"\n",
|
||||
"Hey John, just wanted to remind you to do a password reset for your website https://johndoeportfolio.com through your email johndoe@example.com. It's important to stay secure online, so don't forget to do it!\n",
|
||||
"```"
|
||||
@@ -161,7 +161,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Use PromptGuard in LangChain expression\n",
|
||||
"# Use OpaquePrompts in LangChain expression\n",
|
||||
"\n",
|
||||
"There are functions that can be used with LangChain expression as well if a drop-in replacement doesn't offer the flexibility you need. "
|
||||
]
|
||||
@@ -172,7 +172,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import langchain.utilities.promptguard as pgf\n",
|
||||
"import langchain.utilities.opaqueprompts as op\n",
|
||||
"from langchain.schema.runnable import RunnableMap\n",
|
||||
"from langchain.schema.output_parser import StrOutputParser\n",
|
||||
"\n",
|
||||
@@ -180,7 +180,7 @@
|
||||
"prompt=PromptTemplate.from_template(prompt_template), \n",
|
||||
"llm = OpenAI()\n",
|
||||
"pg_chain = (\n",
|
||||
" pgf.sanitize\n",
|
||||
" op.sanitize\n",
|
||||
" | RunnableMap(\n",
|
||||
" {\n",
|
||||
" \"response\": (lambda x: x[\"sanitized_input\"])\n",
|
||||
@@ -190,7 +190,7 @@
|
||||
" \"secure_context\": lambda x: x[\"secure_context\"],\n",
|
||||
" }\n",
|
||||
" )\n",
|
||||
" | (lambda x: pgf.desanitize(x[\"response\"], x[\"secure_context\"]))\n",
|
||||
" | (lambda x: op.desanitize(x[\"response\"], x[\"secure_context\"]))\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"pg_chain.invoke({\"question\": \"Write a text message to remind John to do password reset for his website through his email to stay secure.\", \"history\": \"\"})"
|
||||
@@ -186,7 +186,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
@@ -1,23 +1,25 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PipelineAI\n",
|
||||
"\n",
|
||||
"PipelineAI allows you to run your ML models at scale in the cloud. It also provides API access to [several LLM models](https://pipeline.ai).\n",
|
||||
">[PipelineAI](https://pipeline.ai) allows you to run your ML models at scale in the cloud. It also provides API access to [several LLM models](https://pipeline.ai).\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use Langchain with [PipelineAI](https://docs.pipeline.ai/docs)."
|
||||
"This notebook goes over how to use Langchain with [PipelineAI](https://docs.pipeline.ai/docs).\n",
|
||||
"\n",
|
||||
"## PipelineAI example\n",
|
||||
"\n",
|
||||
"[This example shows how PipelineAI integrated with LangChain](https://docs.pipeline.ai/docs/langchain) and it is created by PipelineAI."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Install pipeline-ai\n",
|
||||
"## Setup\n",
|
||||
"The `pipeline-ai` library is required to use the `PipelineAI` API, AKA `Pipeline Cloud`. Install `pipeline-ai` using `pip install pipeline-ai`."
|
||||
]
|
||||
},
|
||||
@@ -35,7 +37,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Imports"
|
||||
"## Example\n",
|
||||
"\n",
|
||||
"### Imports"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -50,11 +54,10 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set the Environment API Key\n",
|
||||
"### Set the Environment API Key\n",
|
||||
"Make sure to get your API key from PipelineAI. Check out the [cloud quickstart guide](https://docs.pipeline.ai/docs/cloud-quickstart). You'll be given a 30 day free trial with 10 hours of serverless GPU compute to test different models."
|
||||
]
|
||||
},
|
||||
@@ -68,7 +71,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -89,7 +91,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a Prompt Template\n",
|
||||
"### Create a Prompt Template\n",
|
||||
"We will create a prompt template for Question and Answer."
|
||||
]
|
||||
},
|
||||
@@ -110,7 +112,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initiate the LLMChain"
|
||||
"### Initiate the LLMChain"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -126,7 +128,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run the LLMChain\n",
|
||||
"### Run the LLMChain\n",
|
||||
"Provide a question and run the LLMChain."
|
||||
]
|
||||
},
|
||||
@@ -158,7 +160,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
File diff suppressed because one or more lines are too long
@@ -1,19 +1,17 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Titan Takeoff\n",
|
||||
"\n",
|
||||
"TitanML helps businesses build and deploy better, smaller, cheaper, and faster NLP models through our training, compression, and inference optimization platform. \n",
|
||||
">`TitanML` helps businesses build and deploy better, smaller, cheaper, and faster NLP models through our training, compression, and inference optimization platform. \n",
|
||||
"\n",
|
||||
"Our inference server, [Titan Takeoff](https://docs.titanml.co/docs/titan-takeoff/getting-started) enables deployment of LLMs locally on your hardware in a single command. Most generative model architectures are supported, such as Falcon, Llama 2, GPT2, T5 and many more."
|
||||
">Our inference server, [Titan Takeoff](https://docs.titanml.co/docs/titan-takeoff/getting-started) enables deployment of LLMs locally on your hardware in a single command. Most generative model architectures are supported, such as Falcon, Llama 2, GPT2, T5 and many more."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -40,7 +38,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -72,7 +69,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -103,7 +99,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -130,7 +125,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -159,11 +153,24 @@
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
326
docs/extras/integrations/memory/xata_chat_message_history.ipynb
Normal file
326
docs/extras/integrations/memory/xata_chat_message_history.ipynb
Normal file
@@ -0,0 +1,326 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Xata chat memory\n",
|
||||
"\n",
|
||||
"[Xata](https://xata.io) is a serverless data platform, based on PostgreSQL and Elasticsearch. It provides a Python SDK for interacting with your database, and a UI for managing your data. With the `XataChatMessageHistory` class, you can use Xata databases for longer-term persistence of chat sessions.\n",
|
||||
"\n",
|
||||
"This notebook covers:\n",
|
||||
"\n",
|
||||
"* A simple example showing what `XataChatMessageHistory` does.\n",
|
||||
"* A more complex example using a REACT agent that answer questions based on a knowledge based or documentation (stored in Xata as a vector store) and also having a long-term searchable history of its past messages (stored in Xata as a memory store)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Create a database\n",
|
||||
"\n",
|
||||
"In the [Xata UI](https://app.xata.io) create a new database. You can name it whatever you want, in this notepad we'll use `langchain`. The Langchain integration can auto-create the table used for storying the memory, and this is what we'll use in this example. If you want to pre-create the table, ensure it has the right schema and set `create_table` to `False` when creating the class. Pre-creating the table saves one round-trip to the database during each session initialization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's first install our dependencies:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install xata openai langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we need to get the environment variables for Xata. You can create a new API key by visiting your [account settings](https://app.xata.io/settings). To find the database URL, go to the Settings page of the database that you have created. The database URL should look something like this: `https://demo-uni3q8.eu-west-1.xata.sh/db/langchain`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"api_key = getpass.getpass(\"Xata API key: \")\n",
|
||||
"db_url = input(\"Xata database URL (copy it from your DB settings):\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a simple memory store\n",
|
||||
"\n",
|
||||
"To test the memory store functionality in isolation, let's use the following code snippet:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import XataChatMessageHistory\n",
|
||||
"\n",
|
||||
"history = XataChatMessageHistory(\n",
|
||||
" session_id=\"session-1\",\n",
|
||||
" api_key=api_key,\n",
|
||||
" db_url=db_url,\n",
|
||||
" table_name=\"memory\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"history.add_user_message(\"hi!\")\n",
|
||||
"\n",
|
||||
"history.add_ai_message(\"whats up?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above code creates a session with the ID `session-1` and stores two messages in it. After running the above, if you visit the Xata UI, you should see a table named `memory` and the two messages added to it.\n",
|
||||
"\n",
|
||||
"You can retrieve the message history for a particular session with the following code:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"history.messages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Conversational Q&A chain on your data with memory\n",
|
||||
"\n",
|
||||
"Let's now see a more complex example in which we combine OpenAI, the Xata Vector Store integration, and the Xata memory store integration to create a Q&A chat bot on your data, with follow-up questions and history."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We're going to need to access the OpenAI API, so let's configure the API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To store the documents that the chatbot will search for answers, add a table named `docs` to your `langchain` database using the Xata UI, and add the following columns:\n",
|
||||
"\n",
|
||||
"* `content` of type \"Text\". This is used to store the `Document.pageContent` values.\n",
|
||||
"* `embedding` of type \"Vector\". Use the dimension used by the model you plan to use. In this notebook we use OpenAI embeddings, which have 1536 dimensions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's create the vector store and add some sample docs to it:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores.xata import XataVectorStore\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"\n",
|
||||
"texts = [\n",
|
||||
" \"Xata is a Serverless Data platform based on PostgreSQL\",\n",
|
||||
" \"Xata offers a built-in vector type that can be used to store and query vectors\",\n",
|
||||
" \"Xata includes similarity search\"\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"vector_store = XataVectorStore.from_texts(texts, embeddings, api_key=api_key, db_url=db_url, table_name=\"docs\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"After running the above command, if you go to the Xata UI, you should see the documents loaded together with their embeddings in the `docs` table."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's now create a ConversationBufferMemory to store the chat messages from both the user and the AI."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.memory import ConversationBufferMemory\n",
|
||||
"from uuid import uuid4\n",
|
||||
"\n",
|
||||
"chat_memory = XataChatMessageHistory(\n",
|
||||
" session_id=str(uuid4()), # needs to be unique per user session\n",
|
||||
" api_key=api_key,\n",
|
||||
" db_url=db_url,\n",
|
||||
" table_name=\"memory\"\n",
|
||||
")\n",
|
||||
"memory = ConversationBufferMemory(memory_key=\"chat_history\", chat_memory=chat_memory, return_messages=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now it's time to create an Agent to use both the vector store and the chat memory together."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import initialize_agent, AgentType\n",
|
||||
"from langchain.agents.agent_toolkits import create_retriever_tool\n",
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"\n",
|
||||
"tool = create_retriever_tool(\n",
|
||||
" vector_store.as_retriever(), \n",
|
||||
" \"search_docs\",\n",
|
||||
" \"Searches and returns documents from the Xata manual. Useful when you need to answer questions about Xata.\"\n",
|
||||
")\n",
|
||||
"tools = [tool]\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
" agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,\n",
|
||||
" verbose=True,\n",
|
||||
" memory=memory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To test, let's tell the agent our name:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(input=\"My name is bob\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's now ask the agent some questions about Xata:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(input=\"What is xata?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Notice that it answers based on the data stored in the document store. And now, let's ask a follow up question:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(input=\"Does it support similarity search?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"And now let's test its memory:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(input=\"Did I tell you my name? What is it?\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
23
docs/extras/integrations/providers/ainetwork.mdx
Normal file
23
docs/extras/integrations/providers/ainetwork.mdx
Normal file
@@ -0,0 +1,23 @@
|
||||
# AINetwork
|
||||
|
||||
>[AI Network](https://www.ainetwork.ai/build-on-ain) is a layer 1 blockchain designed to accommodate
|
||||
> large-scale AI models, utilizing a decentralized GPU network powered by the
|
||||
> [$AIN token](https://www.ainetwork.ai/token), enriching AI-driven `NFTs` (`AINFTs`).
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
You need to install `ain-py` python package.
|
||||
|
||||
```bash
|
||||
pip install ain-py
|
||||
```
|
||||
You need to set the `AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY` environmental variable to your AIN Blockchain Account Private Key.
|
||||
## Toolkit
|
||||
|
||||
See a [usage example](/docs/integrations/toolkits/ainetwork).
|
||||
|
||||
```python
|
||||
from langchain.agents.agent_toolkits.ainetwork.toolkit import AINetworkToolkit
|
||||
```
|
||||
|
||||
@@ -21,7 +21,7 @@ pip install cassio
|
||||
See a [usage example](/docs/integrations/vectorstores/cassandra).
|
||||
|
||||
```python
|
||||
from langchain.memory import CassandraChatMessageHistory
|
||||
from langchain.vectorstores import Cassandra
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
|
||||
>[Infino](https://github.com/infinohq/infino) is an open-source observability platform that stores both metrics and application logs together.
|
||||
|
||||
Key features of infino include:
|
||||
- Metrics Tracking: Capture time taken by LLM model to handle request, errors, number of tokens, and costing indication for the particular LLM.
|
||||
- Data Tracking: Log and store prompt, request, and response data for each LangChain interaction.
|
||||
- Graph Visualization: Generate basic graphs over time, depicting metrics such as request duration, error occurrences, token count, and cost.
|
||||
Key features of `Infino` include:
|
||||
- **Metrics Tracking**: Capture time taken by LLM model to handle request, errors, number of tokens, and costing indication for the particular LLM.
|
||||
- **Data Tracking**: Log and store prompt, request, and response data for each LangChain interaction.
|
||||
- **Graph Visualization**: Generate basic graphs over time, depicting metrics such as request duration, error occurrences, token count, and cost.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
@@ -15,7 +15,7 @@ First, you'll need to install the `infinopy` Python package as follows:
|
||||
pip install infinopy
|
||||
```
|
||||
|
||||
If you already have an Infino Server running, then you're good to go; but if
|
||||
If you already have an `Infino Server` running, then you're good to go; but if
|
||||
you don't, follow the next steps to start it:
|
||||
|
||||
- Make sure you have Docker installed
|
||||
@@ -28,7 +28,7 @@ you don't, follow the next steps to start it:
|
||||
|
||||
## Using Infino
|
||||
|
||||
See a [usage example of `InfinoCallbackHandler`](/docs/modules/callbacks/integrations/infino.html).
|
||||
See a [usage example of `InfinoCallbackHandler`](/docs/integrations/callbacks/infino.html).
|
||||
|
||||
```python
|
||||
from langchain.callbacks import InfinoCallbackHandler
|
||||
|
||||
44
docs/extras/integrations/providers/neo4j.mdx
Normal file
44
docs/extras/integrations/providers/neo4j.mdx
Normal file
@@ -0,0 +1,44 @@
|
||||
# Neo4j
|
||||
|
||||
This page covers how to use the Neo4j ecosystem within LangChain.
|
||||
|
||||
What is Neo4j?
|
||||
|
||||
**Neo4j in a nutshell:**
|
||||
|
||||
- Neo4j is an open-source database management system that specializes in graph database technology.
|
||||
- Neo4j allows you to represent and store data in nodes and edges, making it ideal for handling connected data and relationships.
|
||||
- Neo4j provides a Cypher Query Language, making it easy to interact with and query your graph data.
|
||||
- With Neo4j, you can achieve high-performance graph traversals and queries, suitable for production-level systems.
|
||||
- Get started quickly with Neo4j by visiting [their website](https://neo4j.com/).
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Install the Python SDK with `pip install neo4j`
|
||||
|
||||
## Wrappers
|
||||
|
||||
### VectorStore
|
||||
|
||||
There exists a wrapper around Neo4j vector index, allowing you to use it as a vectorstore,
|
||||
whether for semantic search or example selection.
|
||||
|
||||
To import this vectorstore:
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import Neo4jVector
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the Neo4j vector index wrapper, see [this notebook](/docs/integrations/vectorstores/neo4jvector.html)
|
||||
|
||||
### GraphCypherQAChain
|
||||
|
||||
There exists a wrapper around Neo4j graph database that allows you to generate Cypher statements based on the user input
|
||||
and use them to retrieve relevant information from the database.
|
||||
|
||||
```python
|
||||
from langchain.graphs import Neo4jGraph
|
||||
from langchain.chains import GraphCypherQAChain
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of Cypher generating chain, see [this notebook](/docs/use_cases/more/graph/graph_cypher_qa.html)
|
||||
15
docs/extras/integrations/providers/tencentvectordb.mdx
Normal file
15
docs/extras/integrations/providers/tencentvectordb.mdx
Normal file
@@ -0,0 +1,15 @@
|
||||
# TencentVectorDB
|
||||
|
||||
This page covers how to use the TencentVectorDB ecosystem within LangChain.
|
||||
|
||||
### VectorStore
|
||||
|
||||
There exists a wrapper around TencentVectorDB, allowing you to use it as a vectorstore,
|
||||
whether for semantic search or example selection.
|
||||
|
||||
To import this vectorstore:
|
||||
```python
|
||||
from langchain.vectorstores import TencentVectorDB
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the TencentVectorDB wrapper, see [this notebook](/docs/integrations/vectorstores/tencentvectordb.html)
|
||||
File diff suppressed because one or more lines are too long
279
docs/extras/integrations/retrievers/google_drive.ipynb
Normal file
279
docs/extras/integrations/retrievers/google_drive.ipynb
Normal file
@@ -0,0 +1,279 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b0ed136e-6983-4893-ae1b-b75753af05f8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Drive Retriever\n",
|
||||
"This notebook covers how to retrieve documents from Google Drive.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"1. Create a Google Cloud project or use an existing project\n",
|
||||
"1. Enable the [Google Drive API](https://console.cloud.google.com/flows/enableapi?apiid=drive.googleapis.com)\n",
|
||||
"1. [Authorize credentials for desktop app](https://developers.google.com/drive/api/quickstart/python#authorize_credentials_for_a_desktop_application)\n",
|
||||
"1. `pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib`\n",
|
||||
"\n",
|
||||
"## Instructions for retrieving your Google Docs data\n",
|
||||
"By default, the `GoogleDriveRetriever` expects the `credentials.json` file to be `~/.credentials/credentials.json`, but this is configurable using the `GOOGLE_ACCOUNT_FILE` environment variable. \n",
|
||||
"The location of `token.json` use the same directory (or use the parameter `token_path`). Note that `token.json` will be created automatically the first time you use the retriever.\n",
|
||||
"\n",
|
||||
"`GoogleDriveRetriever` can retrieve a selection of files with some requests. \n",
|
||||
"\n",
|
||||
"By default, If you use a `folder_id`, all the files inside this folder can be retrieved to `Document`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "35b94a93-97de-4af8-9cca-de9ffb7930c3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can obtain your folder and document id from the URL:\n",
|
||||
"* Folder: https://drive.google.com/drive/u/0/folders/1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5 -> folder id is `\"1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5\"`\n",
|
||||
"* Document: https://docs.google.com/document/d/1bfaMQ18_i56204VaQDVeAFpqEijJTgvurupdEDiaUQw/edit -> document id is `\"1bfaMQ18_i56204VaQDVeAFpqEijJTgvurupdEDiaUQw\"`\n",
|
||||
"\n",
|
||||
"The special value `root` is for your personal home."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9c9665c9-a023-4078-9d95-e43021cecb6f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "878928a6-a5ae-4f74-b351-64e3b01733fe",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-09T10:45:59.438650905Z",
|
||||
"start_time": "2023-05-09T10:45:57.955900302Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.retrievers import GoogleDriveRetriever"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "755907c2-145d-4f0f-9b15-07a628a2d2d2",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-09T10:45:59.442890834Z",
|
||||
"start_time": "2023-05-09T10:45:59.440941528Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"folder_id=\"root\"\n",
|
||||
"#folder_id='1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2216c83f-68e4-4d2f-8ea2-5878fb18bbe7",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-09T10:45:59.795842403Z",
|
||||
"start_time": "2023-05-09T10:45:59.445262457Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = GoogleDriveRetriever(\n",
|
||||
" num_results=2,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fa339ca0-f478-440c-ba80-0e5f41a19ce1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"By default, all files with these mime-type can be converted to `Document`.\n",
|
||||
"- text/text\n",
|
||||
"- text/plain\n",
|
||||
"- text/html\n",
|
||||
"- text/csv\n",
|
||||
"- text/markdown\n",
|
||||
"- image/png\n",
|
||||
"- image/jpeg\n",
|
||||
"- application/epub+zip\n",
|
||||
"- application/pdf\n",
|
||||
"- application/rtf\n",
|
||||
"- application/vnd.google-apps.document (GDoc)\n",
|
||||
"- application/vnd.google-apps.presentation (GSlide)\n",
|
||||
"- application/vnd.google-apps.spreadsheet (GSheet)\n",
|
||||
"- application/vnd.google.colaboratory (Notebook colab)\n",
|
||||
"- application/vnd.openxmlformats-officedocument.presentationml.presentation (PPTX)\n",
|
||||
"- application/vnd.openxmlformats-officedocument.wordprocessingml.document (DOCX)\n",
|
||||
"\n",
|
||||
"It's possible to update or customize this. See the documentation of `GDriveRetriever`.\n",
|
||||
"\n",
|
||||
"But, the corresponding packages must be installed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9dadec48",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install unstructured"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8f3b6aa0-b45d-4e37-8c50-5bebe70fdb9d",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-09T10:46:00.990310466Z",
|
||||
"start_time": "2023-05-09T10:45:59.798774595Z"
|
||||
},
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever.get_relevant_documents(\"machine learning\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8ff33817-8619-4897-8742-2216b9934d2a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can customize the criteria to select the files. A set of predefined filter are proposed:\n",
|
||||
"| template | description |\n",
|
||||
"| -------------------------------------- | --------------------------------------------------------------------- |\n",
|
||||
"| gdrive-all-in-folder | Return all compatible files from a `folder_id` |\n",
|
||||
"| gdrive-query | Search `query` in all drives |\n",
|
||||
"| gdrive-by-name | Search file with name `query`) |\n",
|
||||
"| gdrive-query-in-folder | Search `query` in `folder_id` (and sub-folders in `_recursive=true`) |\n",
|
||||
"| gdrive-mime-type | Search a specific `mime_type` |\n",
|
||||
"| gdrive-mime-type-in-folder | Search a specific `mime_type` in `folder_id` |\n",
|
||||
"| gdrive-query-with-mime-type | Search `query` with a specific `mime_type` |\n",
|
||||
"| gdrive-query-with-mime-type-and-folder | Search `query` with a specific `mime_type` and in `folder_id` |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9977c712-9659-4959-b508-f59cc7d49d44",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = GoogleDriveRetriever(\n",
|
||||
" template=\"gdrive-query\", # Search everywhere\n",
|
||||
" num_results=2, # But take only 2 documents\n",
|
||||
")\n",
|
||||
"for doc in retriever.get_relevant_documents(\"machine learning\"):\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a5a0f3ef-26fb-4a5c-85f0-5aba90b682b1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Else, you can customize the prompt with a specialized `PromptTemplate`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b0bbebde-0487-4d20-9d77-8070e4f0e0d6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import PromptTemplate\n",
|
||||
"retriever = GoogleDriveRetriever(\n",
|
||||
" template=PromptTemplate(input_variables=['query'],\n",
|
||||
" # See https://developers.google.com/drive/api/guides/search-files\n",
|
||||
" template=\"(fullText contains '{query}') \"\n",
|
||||
" \"and mimeType='application/vnd.google-apps.document' \"\n",
|
||||
" \"and modifiedTime > '2000-01-01T00:00:00' \"\n",
|
||||
" \"and trashed=false\"),\n",
|
||||
" num_results=2,\n",
|
||||
" # See https://developers.google.com/drive/api/v3/reference/files/list\n",
|
||||
" includeItemsFromAllDrives=False,\n",
|
||||
" supportsAllDrives=False,\n",
|
||||
")\n",
|
||||
"for doc in retriever.get_relevant_documents(\"machine learning\"):\n",
|
||||
" print(f\"{doc.metadata['name']}:\")\n",
|
||||
" print(\"---\")\n",
|
||||
" print(doc.page_content.strip()[:60]+\"...\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9b6fed29-1666-452e-b677-401613270388",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Use GDrive 'description' metadata\n",
|
||||
"Each Google Drive has a `description` field in metadata (see the *details of a file*).\n",
|
||||
"Use the `snippets` mode to return the description of selected files.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "342dbe12-ed83-40f4-8957-0cc8c4609542",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = GoogleDriveRetriever(\n",
|
||||
" template='gdrive-mime-type-in-folder',\n",
|
||||
" folder_id=folder_id,\n",
|
||||
" mime_type='application/vnd.google-apps.document', # Only Google Docs\n",
|
||||
" num_results=2,\n",
|
||||
" mode='snippets',\n",
|
||||
" includeItemsFromAllDrives=False,\n",
|
||||
" supportsAllDrives=False,\n",
|
||||
")\n",
|
||||
"retriever.get_relevant_documents(\"machine learning\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -11,7 +11,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Eden AI is an AI consulting company that was founded to use its resources to empower people and create impactful products that use AI to improve the quality of life for individuals, businesses and societies at large."
|
||||
"Eden AI is revolutionizing the AI landscape by uniting the best AI providers, empowering users to unlock limitless possibilities and tap into the true potential of artificial intelligence. With an all-in-one comprehensive and hassle-free platform, it allows users to deploy AI features to production lightning fast, enabling effortless access to the full breadth of AI capabilities via a single API. (website: https://edenai.co/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -48,10 +48,31 @@
|
||||
" accepts = \"application/json\"\n",
|
||||
"\n",
|
||||
" def transform_input(self, inputs: list[str], model_kwargs: Dict) -> bytes:\n",
|
||||
" input_str = json.dumps({\"inputs\": inputs, **model_kwargs})\n",
|
||||
" \"\"\"\n",
|
||||
" Transforms the input into bytes that can be consumed by SageMaker endpoint.\n",
|
||||
" Args:\n",
|
||||
" inputs: List of input strings.\n",
|
||||
" model_kwargs: Additional keyword arguments to be passed to the endpoint.\n",
|
||||
" Returns:\n",
|
||||
" The transformed bytes input.\n",
|
||||
" \"\"\"\n",
|
||||
" # Example: inference.py expects a JSON string with a \"inputs\" key:\n",
|
||||
" input_str = json.dumps({\"inputs\": inputs, **model_kwargs}) \n",
|
||||
" return input_str.encode(\"utf-8\")\n",
|
||||
"\n",
|
||||
" def transform_output(self, output: bytes) -> List[List[float]]:\n",
|
||||
" \"\"\"\n",
|
||||
" Transforms the bytes output from the endpoint into a list of embeddings.\n",
|
||||
" Args:\n",
|
||||
" output: The bytes output from SageMaker endpoint.\n",
|
||||
" Returns:\n",
|
||||
" The transformed output - list of embeddings\n",
|
||||
" Note:\n",
|
||||
" The length of the outer list is the number of input strings.\n",
|
||||
" The length of the inner lists is the embedding dimension.\n",
|
||||
" \"\"\"\n",
|
||||
" # Example: inference.py returns a JSON string with the list of\n",
|
||||
" # embeddings in a \"vectors\" key:\n",
|
||||
" response_json = json.loads(output.read().decode(\"utf-8\"))\n",
|
||||
" return response_json[\"vectors\"]\n",
|
||||
"\n",
|
||||
@@ -60,7 +81,6 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"embeddings = SagemakerEndpointEmbeddings(\n",
|
||||
" # endpoint_name=\"endpoint-name\",\n",
|
||||
" # credentials_profile_name=\"credentials-profile-name\",\n",
|
||||
" endpoint_name=\"huggingface-pytorch-inference-2023-03-21-16-14-03-834\",\n",
|
||||
" region_name=\"us-east-1\",\n",
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AINetwork Toolkit\n",
|
||||
"# AINetwork\n",
|
||||
"\n",
|
||||
"The AINetwork Toolkit is a set of tools for interacting with the AINetwork Blockchain. These tools allow you to transfer AIN, read and write values, create apps, and set permissions for specific paths within the blockchain database."
|
||||
">[AI Network](https://www.ainetwork.ai/build-on-ain) is a layer 1 blockchain designed to accommodate large-scale AI models, utilizing a decentralized GPU network powered by the [$AIN token](https://www.ainetwork.ai/token), enriching AI-driven `NFTs` (`AINFTs`).\n",
|
||||
">\n",
|
||||
">The `AINetwork Toolkit` is a set of tools for interacting with the [AINetwork Blockchain](https://www.ainetwork.ai/public/whitepaper.pdf). These tools allow you to transfer `AIN`, read and write values, create apps, and set permissions for specific paths within the blockchain database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -30,7 +30,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -51,7 +50,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -96,7 +94,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -119,7 +116,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -147,7 +143,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -157,7 +152,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -174,7 +168,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -213,7 +206,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -250,7 +242,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -290,7 +281,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -337,7 +327,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -362,7 +351,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -397,7 +385,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -438,7 +425,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -453,9 +440,8 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
"id": "245a954a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ArXiv API Tool\n",
|
||||
"# ArXiv\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use the `arxiv` component. \n",
|
||||
"This notebook goes over how to use the `arxiv` tool with an agent. \n",
|
||||
"\n",
|
||||
"First, you need to install `arxiv` python package."
|
||||
]
|
||||
@@ -110,7 +110,7 @@
|
||||
"source": [
|
||||
"## The ArXiv API Wrapper\n",
|
||||
"\n",
|
||||
"The tool wraps the API Wrapper. Below, we can explore some of the features it provides."
|
||||
"The tool uses the `API Wrapper`. Below, we explore some of the features it provides."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -167,7 +167,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "840f70c9-8f80-4680-bb38-46198e931bcf",
|
||||
"metadata": {},
|
||||
@@ -250,7 +249,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,25 +1,23 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AWS Lambda API"
|
||||
"# AWS Lambda"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook goes over how to use the AWS Lambda Tool component.\n",
|
||||
">`Amazon AWS Lambda` is a serverless computing service provided by `Amazon Web Services` (`AWS`). It helps developers to build and run applications and services without provisioning or managing servers. This serverless architecture enables you to focus on writing and deploying code, while AWS automatically takes care of scaling, patching, and managing the infrastructure required to run your applications.\n",
|
||||
"\n",
|
||||
"AWS Lambda is a serverless computing service provided by Amazon Web Services (AWS), designed to allow developers to build and run applications and services without the need for provisioning or managing servers. This serverless architecture enables you to focus on writing and deploying code, while AWS automatically takes care of scaling, patching, and managing the infrastructure required to run your applications.\n",
|
||||
"This notebook goes over how to use the `AWS Lambda` Tool.\n",
|
||||
"\n",
|
||||
"By including a `awslambda` in the list of tools provided to an Agent, you can grant your Agent the ability to invoke code running in your AWS Cloud for whatever purposes you need.\n",
|
||||
"\n",
|
||||
"When an Agent uses the awslambda tool, it will provide an argument of type string which will in turn be passed into the Lambda function via the event parameter.\n",
|
||||
"When an Agent uses the `AWS Lambda` tool, it will provide an argument of type string which will in turn be passed into the Lambda function via the event parameter.\n",
|
||||
"\n",
|
||||
"First, you need to install `boto3` python package."
|
||||
]
|
||||
@@ -38,7 +36,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -48,7 +45,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -98,7 +94,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -112,10 +108,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -5,11 +5,13 @@
|
||||
"id": "8f210ec3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Shell Tool\n",
|
||||
"# Shell (bash)\n",
|
||||
"\n",
|
||||
"Giving agents access to the shell is powerful (though risky outside a sandboxed environment).\n",
|
||||
"\n",
|
||||
"The LLM can use it to execute any shell commands. A common use case for this is letting the LLM interact with your local file system."
|
||||
"The LLM can use it to execute any shell commands. A common use case for this is letting the LLM interact with your local file system.\n",
|
||||
"\n",
|
||||
"**Note:** Shell tool does not work with Windows OS."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -184,7 +186,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.16"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DataForSeo API Wrapper\n",
|
||||
"This notebook demonstrates how to use the DataForSeo API wrapper to obtain search engine results. The DataForSeo API allows users to retrieve SERP from most popular search engines like Google, Bing, Yahoo. It also allows to get SERPs from different search engine types like Maps, News, Events, etc.\n"
|
||||
"# DataForSeo\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to use the `DataForSeo API` to obtain search engine results. The `DataForSeo API` retrieves `SERP` from most popular search engines like `Google`, `Bing`, `Yahoo`. It also allows to get SERPs from different search engine types like `Maps`, `News`, `Events`, etc.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -19,12 +19,12 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setting up the API wrapper with your credentials\n",
|
||||
"You can obtain your API credentials by registering on the DataForSeo website."
|
||||
"## Setting up the API credentials\n",
|
||||
"\n",
|
||||
"You can obtain your API credentials by registering on the `DataForSeo` website."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -42,7 +42,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -59,7 +58,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -72,7 +70,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -103,7 +100,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -127,7 +123,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -151,7 +146,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -178,7 +172,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -214,7 +207,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -228,10 +221,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
513
docs/extras/integrations/tools/edenai_tools.ipynb
Normal file
513
docs/extras/integrations/tools/edenai_tools.ipynb
Normal file
@@ -0,0 +1,513 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Eden AI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This Jupyter Notebook demonstrates how to use Eden AI tools with an Agent.\n",
|
||||
"\n",
|
||||
"Eden AI is revolutionizing the AI landscape by uniting the best AI providers, empowering users to unlock limitless possibilities and tap into the true potential of artificial intelligence. With an all-in-one comprehensive and hassle-free platform, it allows users to deploy AI features to production lightning fast, enabling effortless access to the full breadth of AI capabilities via a single API. (website: https://edenai.co/ )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"By including an Edenai tool in the list of tools provided to an Agent, you can grant your Agent the ability to do multiple tasks, such as:\n",
|
||||
"\n",
|
||||
"- speech to text\n",
|
||||
"- text to speech\n",
|
||||
"- text explicit content detection \n",
|
||||
"- image explicit content detection\n",
|
||||
"- object detection\n",
|
||||
"- OCR invoice parsing\n",
|
||||
"- OCR ID parsing\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"In this example, we will go through the process of utilizing the Edenai tools to create an Agent that can perform some of the tasks listed above."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---------------------------------------------------------------------------\n",
|
||||
"Accessing the EDENAI's API requires an API key, \n",
|
||||
"\n",
|
||||
"which you can get by creating an account https://app.edenai.run/user/register and heading here https://app.edenai.run/admin/account/settings\n",
|
||||
"\n",
|
||||
"Once we have a key we'll want to set it as the environment variable ``EDENAI_API_KEY`` or you can pass the key in directly via the edenai_api_key named parameter when initiating the EdenAI tools, e.g. ``EdenAiTextModerationTool(edenai_api_key=\"...\")``"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools.edenai import (\n",
|
||||
" EdenAiSpeechToTextTool,\n",
|
||||
" EdenAiTextToSpeechTool,\n",
|
||||
" EdenAiExplicitImageTool,\n",
|
||||
" EdenAiObjectDetectionTool,\n",
|
||||
" EdenAiParsingIDTool,\n",
|
||||
" EdenAiParsingInvoiceTool,\n",
|
||||
" EdenAiTextModerationTool,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 41,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import EdenAI\n",
|
||||
"from langchain.agents import initialize_agent, AgentType\n",
|
||||
"\n",
|
||||
"llm=EdenAI(feature=\"text\",provider=\"openai\", params={\"temperature\" : 0.2,\"max_tokens\" : 250})\n",
|
||||
"\n",
|
||||
"tools = [\n",
|
||||
" EdenAiTextModerationTool(providers=[\"openai\"],language=\"en\"),\n",
|
||||
" EdenAiObjectDetectionTool(providers=[\"google\",\"api4ai\"]),\n",
|
||||
" EdenAiTextToSpeechTool(providers=[\"amazon\"],language=\"en\",voice=\"MALE\"),\n",
|
||||
" EdenAiExplicitImageTool(providers=[\"amazon\",\"google\"]),\n",
|
||||
" EdenAiSpeechToTextTool(providers=[\"amazon\"]),\n",
|
||||
" EdenAiParsingIDTool(providers=[\"amazon\",\"klippa\"],language=\"en\"),\n",
|
||||
" EdenAiParsingInvoiceTool(providers=[\"amazon\",\"google\"],language=\"en\"),\n",
|
||||
"]\n",
|
||||
"agent_chain = initialize_agent(\n",
|
||||
" tools,\n",
|
||||
" llm,\n",
|
||||
" agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
" verbose=True,\n",
|
||||
" return_intermediate_steps=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example with text"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 42,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to scan the text for explicit content and then convert it to speech\n",
|
||||
"Action: edenai_explicit_content_detection_text\n",
|
||||
"Action Input: 'i want to slap you'\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mnsfw_likelihood: 3\n",
|
||||
"\"sexual\": 1\n",
|
||||
"\"hate\": 1\n",
|
||||
"\"harassment\": 1\n",
|
||||
"\"self-harm\": 1\n",
|
||||
"\"sexual/minors\": 1\n",
|
||||
"\"hate/threatening\": 1\n",
|
||||
"\"violence/graphic\": 1\n",
|
||||
"\"self-harm/intent\": 1\n",
|
||||
"\"self-harm/instructions\": 1\n",
|
||||
"\"harassment/threatening\": 1\n",
|
||||
"\"violence\": 3\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now need to convert the text to speech\n",
|
||||
"Action: edenai_text_to_speech\n",
|
||||
"Action Input: 'i want to slap you'\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mhttps://d14uq1pz7dzsdq.cloudfront.net/0c825002-b4ef-4165-afa3-a140a5b25c82_.mp3?Expires=1693318351&Signature=V9vjgFe8pV5rnH-B2EUr8UshTEA3I0Xv1v0YwVEAq8w7G5pgex07dZ0M6h6fXusk7G3SW~sXs4IJxnD~DnIDp1XorvzMA2QVMJb8CD90EYvUWx9zfFa3tIegGapg~NC8wEGualccOehC~cSDhiQWrwAjDqPmq2olXnUVOfyl76pKNNR9Sm2xlljlrJcLCClBee2r5yCFEwFI-tnXX1lV2DGc5PNB66Lqrr0Fpe2trVJj2k8cLduIb8dbtqLPNIDCsV0N4QT10utZmhZcPpcSIBsdomw1Os1IjdG4nA8ZTIddAcLMCWJznttzl66vHPk26rjDpG5doMTTsPEz8ZKILQ__&Key-Pair-Id=K1F55BTI9AHGIK\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: The text contains explicit content of violence with a likelihood of 3. The audio file of the text can be found at https://d14uq1pz7dzsdq.cloudfront.net/0c825002-b4ef-4165-afa3-a140a5b25c82_.mp3?Expires=1693318351&Signature=V9vjgFe8pV5rnH-B2EUr8UshTEA3I0Xv1v0YwVEAq8w7G5pgex07dZ0M6h6fXusk7G3SW~sXs4IJxnD~DnIDp1XorvzMA2QVMJb8CD90EYvUWx9zfFa3tIegGapg~NC8wEGualccOehC~cSDhiQWrwAjDqPmq2olXnUVOfyl76pKNNR9Sm2xlljlrJcLCClBee2r5yCFEwFI-tn\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"input_ = \"\"\"i have this text : 'i want to slap you' \n",
|
||||
"first : i want to know if this text contains explicit content or not .\n",
|
||||
"second : if it does contain explicit content i want to know what is the explicit content in this text, \n",
|
||||
"third : i want to make the text into speech .\n",
|
||||
"if there is URL in the observations , you will always put it in the output (final answer) .\n",
|
||||
"\"\"\"\n",
|
||||
"result = agent_chain(input_)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"you can have more details of the execution by printing the result "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The text contains explicit content of violence with a likelihood of 3. The audio file of the text can be found at https://d14uq1pz7dzsdq.cloudfront.net/0c825002-b4ef-4165-afa3-a140a5b25c82_.mp3?Expires=1693318351&Signature=V9vjgFe8pV5rnH-B2EUr8UshTEA3I0Xv1v0YwVEAq8w7G5pgex07dZ0M6h6fXusk7G3SW~sXs4IJxnD~DnIDp1XorvzMA2QVMJb8CD90EYvUWx9zfFa3tIegGapg~NC8wEGualccOehC~cSDhiQWrwAjDqPmq2olXnUVOfyl76pKNNR9Sm2xlljlrJcLCClBee2r5yCFEwFI-tn'"
|
||||
]
|
||||
},
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result['output']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': \" i have this text : 'i want to slap you' \\n first : i want to know if this text contains explicit content or not .\\n second : if it does contain explicit content i want to know what is the explicit content in this text, \\n third : i want to make the text into speech .\\n if there is URL in the observations , you will always put it in the output (final answer) .\\n\\n \",\n",
|
||||
" 'output': 'The text contains explicit content of violence with a likelihood of 3. The audio file of the text can be found at https://d14uq1pz7dzsdq.cloudfront.net/0c825002-b4ef-4165-afa3-a140a5b25c82_.mp3?Expires=1693318351&Signature=V9vjgFe8pV5rnH-B2EUr8UshTEA3I0Xv1v0YwVEAq8w7G5pgex07dZ0M6h6fXusk7G3SW~sXs4IJxnD~DnIDp1XorvzMA2QVMJb8CD90EYvUWx9zfFa3tIegGapg~NC8wEGualccOehC~cSDhiQWrwAjDqPmq2olXnUVOfyl76pKNNR9Sm2xlljlrJcLCClBee2r5yCFEwFI-tn',\n",
|
||||
" 'intermediate_steps': [(AgentAction(tool='edenai_explicit_content_detection_text', tool_input=\"'i want to slap you'\", log=\" I need to scan the text for explicit content and then convert it to speech\\nAction: edenai_explicit_content_detection_text\\nAction Input: 'i want to slap you'\"),\n",
|
||||
" 'nsfw_likelihood: 3\\n\"sexual\": 1\\n\"hate\": 1\\n\"harassment\": 1\\n\"self-harm\": 1\\n\"sexual/minors\": 1\\n\"hate/threatening\": 1\\n\"violence/graphic\": 1\\n\"self-harm/intent\": 1\\n\"self-harm/instructions\": 1\\n\"harassment/threatening\": 1\\n\"violence\": 3'),\n",
|
||||
" (AgentAction(tool='edenai_text_to_speech', tool_input=\"'i want to slap you'\", log=\" I now need to convert the text to speech\\nAction: edenai_text_to_speech\\nAction Input: 'i want to slap you'\"),\n",
|
||||
" 'https://d14uq1pz7dzsdq.cloudfront.net/0c825002-b4ef-4165-afa3-a140a5b25c82_.mp3?Expires=1693318351&Signature=V9vjgFe8pV5rnH-B2EUr8UshTEA3I0Xv1v0YwVEAq8w7G5pgex07dZ0M6h6fXusk7G3SW~sXs4IJxnD~DnIDp1XorvzMA2QVMJb8CD90EYvUWx9zfFa3tIegGapg~NC8wEGualccOehC~cSDhiQWrwAjDqPmq2olXnUVOfyl76pKNNR9Sm2xlljlrJcLCClBee2r5yCFEwFI-tnXX1lV2DGc5PNB66Lqrr0Fpe2trVJj2k8cLduIb8dbtqLPNIDCsV0N4QT10utZmhZcPpcSIBsdomw1Os1IjdG4nA8ZTIddAcLMCWJznttzl66vHPk26rjDpG5doMTTsPEz8ZKILQ__&Key-Pair-Id=K1F55BTI9AHGIK')]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 44,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example with images"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to determine if the image contains objects, if any of them are harmful, and then convert the text to speech.\n",
|
||||
"Action: edenai_object_detection\n",
|
||||
"Action Input: https://static.javatpoint.com/images/objects.jpg\u001b[0m\n",
|
||||
"Observation: \u001b[33;1m\u001b[1;3mApple - Confidence 0.94003654\n",
|
||||
"Apple - Confidence 0.94003654\n",
|
||||
"Apple - Confidence 0.94003654\n",
|
||||
"Backpack - Confidence 0.7481894\n",
|
||||
"Backpack - Confidence 0.7481894\n",
|
||||
"Backpack - Confidence 0.7481894\n",
|
||||
"Luggage & bags - Confidence 0.70691586\n",
|
||||
"Luggage & bags - Confidence 0.70691586\n",
|
||||
"Luggage & bags - Confidence 0.70691586\n",
|
||||
"Container - Confidence 0.654727\n",
|
||||
"Container - Confidence 0.654727\n",
|
||||
"Container - Confidence 0.654727\n",
|
||||
"Luggage & bags - Confidence 0.5871518\n",
|
||||
"Luggage & bags - Confidence 0.5871518\n",
|
||||
"Luggage & bags - Confidence 0.5871518\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I need to check if any of the objects are harmful.\n",
|
||||
"Action: edenai_explicit_content_detection_text\n",
|
||||
"Action Input: Apple, Backpack, Luggage & bags, Container\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mnsfw_likelihood: 2\n",
|
||||
"\"sexually explicit\": 1\n",
|
||||
"\"sexually suggestive\": 2\n",
|
||||
"\"offensive\": 1\n",
|
||||
"nsfw_likelihood: 1\n",
|
||||
"\"sexual\": 1\n",
|
||||
"\"hate\": 1\n",
|
||||
"\"harassment\": 1\n",
|
||||
"\"self-harm\": 1\n",
|
||||
"\"sexual/minors\": 1\n",
|
||||
"\"hate/threatening\": 1\n",
|
||||
"\"violence/graphic\": 1\n",
|
||||
"\"self-harm/intent\": 1\n",
|
||||
"\"self-harm/instructions\": 1\n",
|
||||
"\"harassment/threatening\": 1\n",
|
||||
"\"violence\": 1\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m None of the objects are harmful.\n",
|
||||
"Action: edenai_text_to_speech\n",
|
||||
"Action Input: 'this item is safe'\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mhttps://d14uq1pz7dzsdq.cloudfront.net/0546db8b-528e-4b63-9a69-d14d43ad1566_.mp3?Expires=1693316753&Signature=N0KZeK9I-1s7wTgiQOAwH7LFlltwyonSJcDnkdnr8JIJmbgSw6fo6RTxWl~VvD2Hg6igJqxtJFFWyrBmmx-f9wWLw3bZSnuMxkhTRqLX9aUA9N-vPJGiRZV5BFredaOm8pwfo8TcXhVjw08iSxv8GSuyZEIwZkiq4PzdiyVTnKKji6eytV0CrnHrTs~eXZkSnOdD2Fu0ECaKvFHlsF4IDLI8efRvituSk0X3ygdec4HQojl5vmBXJzi1TuhKWOX8UxeQle8pdjjqUPSJ9thTHpucdPy6UbhZOH0C9rbtLrCfvK5rzrT4D~gKy9woICzG34tKRxNxHYVVUPqx2BiInA__&Key-Pair-Id=K1F55BTI9AHGIK\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n",
|
||||
"Final Answer: The image contains objects such as Apple, Backpack, Luggage & bags, and Container. None of them are harmful. The text 'this item is safe' can be found in the audio file at https://d14uq1pz7dzsdq.cloudfront.net/0546db8b-528e-4b63-9a69-d14d43ad1566_.mp3?Expires=1693316753&Signature=N0KZeK9I-1s7wTgiQOAwH7LFlltwyonSJcDnkdnr8JIJmbgSw6fo6RTxWl~VvD2Hg6igJqxtJFFWyrBmmx-f9wWLw3bZSnuMxkhTRqLX9aUA9N-vPJGiRZV5BFredaOm8pwfo8TcXhVjw08iSxv8GSuyZEIwZkiq4PzdiyVTnKKji6eyt\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"input_ = \"\"\"i have this url of an image : \"https://static.javatpoint.com/images/objects.jpg\"\n",
|
||||
"first : i want to know if the image contain objects .\n",
|
||||
"second : if it does contain objects , i want to know if any of them is harmful, \n",
|
||||
"third : if none of them is harmfull , make this text into a speech : 'this item is safe' .\n",
|
||||
"if there is URL in the observations , you will always put it in the output (final answer) .\n",
|
||||
"\"\"\"\n",
|
||||
"result = agent_chain(input_)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"The image contains objects such as Apple, Backpack, Luggage & bags, and Container. None of them are harmful. The text 'this item is safe' can be found in the audio file at https://d14uq1pz7dzsdq.cloudfront.net/0546db8b-528e-4b63-9a69-d14d43ad1566_.mp3?Expires=1693316753&Signature=N0KZeK9I-1s7wTgiQOAwH7LFlltwyonSJcDnkdnr8JIJmbgSw6fo6RTxWl~VvD2Hg6igJqxtJFFWyrBmmx-f9wWLw3bZSnuMxkhTRqLX9aUA9N-vPJGiRZV5BFredaOm8pwfo8TcXhVjw08iSxv8GSuyZEIwZkiq4PzdiyVTnKKji6eyt\""
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result['output']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"you can have more details of the execution by printing the result "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': ' i have this url of an image : \"https://static.javatpoint.com/images/objects.jpg\"\\n first : i want to know if the image contain objects .\\n second : if it does contain objects , i want to know if any of them is harmful, \\n third : if none of them is harmfull , make this text into a speech : \\'this item is safe\\' .\\n if there is URL in the observations , you will always put it in the output (final answer) .\\n ',\n",
|
||||
" 'output': \"The image contains objects such as Apple, Backpack, Luggage & bags, and Container. None of them are harmful. The text 'this item is safe' can be found in the audio file at https://d14uq1pz7dzsdq.cloudfront.net/0546db8b-528e-4b63-9a69-d14d43ad1566_.mp3?Expires=1693316753&Signature=N0KZeK9I-1s7wTgiQOAwH7LFlltwyonSJcDnkdnr8JIJmbgSw6fo6RTxWl~VvD2Hg6igJqxtJFFWyrBmmx-f9wWLw3bZSnuMxkhTRqLX9aUA9N-vPJGiRZV5BFredaOm8pwfo8TcXhVjw08iSxv8GSuyZEIwZkiq4PzdiyVTnKKji6eyt\",\n",
|
||||
" 'intermediate_steps': [(AgentAction(tool='edenai_object_detection', tool_input='https://static.javatpoint.com/images/objects.jpg', log=' I need to determine if the image contains objects, if any of them are harmful, and then convert the text to speech.\\nAction: edenai_object_detection\\nAction Input: https://static.javatpoint.com/images/objects.jpg'),\n",
|
||||
" 'Apple - Confidence 0.94003654\\nApple - Confidence 0.94003654\\nApple - Confidence 0.94003654\\nBackpack - Confidence 0.7481894\\nBackpack - Confidence 0.7481894\\nBackpack - Confidence 0.7481894\\nLuggage & bags - Confidence 0.70691586\\nLuggage & bags - Confidence 0.70691586\\nLuggage & bags - Confidence 0.70691586\\nContainer - Confidence 0.654727\\nContainer - Confidence 0.654727\\nContainer - Confidence 0.654727\\nLuggage & bags - Confidence 0.5871518\\nLuggage & bags - Confidence 0.5871518\\nLuggage & bags - Confidence 0.5871518'),\n",
|
||||
" (AgentAction(tool='edenai_explicit_content_detection_text', tool_input='Apple, Backpack, Luggage & bags, Container', log=' I need to check if any of the objects are harmful.\\nAction: edenai_explicit_content_detection_text\\nAction Input: Apple, Backpack, Luggage & bags, Container'),\n",
|
||||
" 'nsfw_likelihood: 2\\n\"sexually explicit\": 1\\n\"sexually suggestive\": 2\\n\"offensive\": 1\\nnsfw_likelihood: 1\\n\"sexual\": 1\\n\"hate\": 1\\n\"harassment\": 1\\n\"self-harm\": 1\\n\"sexual/minors\": 1\\n\"hate/threatening\": 1\\n\"violence/graphic\": 1\\n\"self-harm/intent\": 1\\n\"self-harm/instructions\": 1\\n\"harassment/threatening\": 1\\n\"violence\": 1'),\n",
|
||||
" (AgentAction(tool='edenai_text_to_speech', tool_input=\"'this item is safe'\", log=\" None of the objects are harmful.\\nAction: edenai_text_to_speech\\nAction Input: 'this item is safe'\"),\n",
|
||||
" 'https://d14uq1pz7dzsdq.cloudfront.net/0546db8b-528e-4b63-9a69-d14d43ad1566_.mp3?Expires=1693316753&Signature=N0KZeK9I-1s7wTgiQOAwH7LFlltwyonSJcDnkdnr8JIJmbgSw6fo6RTxWl~VvD2Hg6igJqxtJFFWyrBmmx-f9wWLw3bZSnuMxkhTRqLX9aUA9N-vPJGiRZV5BFredaOm8pwfo8TcXhVjw08iSxv8GSuyZEIwZkiq4PzdiyVTnKKji6eytV0CrnHrTs~eXZkSnOdD2Fu0ECaKvFHlsF4IDLI8efRvituSk0X3ygdec4HQojl5vmBXJzi1TuhKWOX8UxeQle8pdjjqUPSJ9thTHpucdPy6UbhZOH0C9rbtLrCfvK5rzrT4D~gKy9woICzG34tKRxNxHYVVUPqx2BiInA__&Key-Pair-Id=K1F55BTI9AHGIK')]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example with OCR images"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to extract the information from the ID and then convert it to text and then to speech\n",
|
||||
"Action: edenai_identity_parsing\n",
|
||||
"Action Input: \"https://www.citizencard.com/images/citizencard-uk-id-card-2023.jpg\"\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mlast_name : \n",
|
||||
" value : ANGELA\n",
|
||||
"given_names : \n",
|
||||
" value : GREENE\n",
|
||||
"birth_place : \n",
|
||||
"birth_date : \n",
|
||||
" value : 2000-11-09\n",
|
||||
"issuance_date : \n",
|
||||
"expire_date : \n",
|
||||
"document_id : \n",
|
||||
"issuing_state : \n",
|
||||
"address : \n",
|
||||
"age : \n",
|
||||
"country : \n",
|
||||
"document_type : \n",
|
||||
" value : DRIVER LICENSE FRONT\n",
|
||||
"gender : \n",
|
||||
"image_id : \n",
|
||||
"image_signature : \n",
|
||||
"mrz : \n",
|
||||
"nationality : \u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now need to convert the information to text and then to speech\n",
|
||||
"Action: edenai_text_to_speech\n",
|
||||
"Action Input: \"Welcome Angela Greene!\"\u001b[0m\n",
|
||||
"Observation: \u001b[38;5;200m\u001b[1;3mhttps://d14uq1pz7dzsdq.cloudfront.net/0c494819-0bbc-4433-bfa4-6e99bd9747ea_.mp3?Expires=1693316851&Signature=YcMoVQgPuIMEOuSpFuvhkFM8JoBMSoGMcZb7MVWdqw7JEf5~67q9dEI90o5todE5mYXB5zSYoib6rGrmfBl4Rn5~yqDwZ~Tmc24K75zpQZIEyt5~ZSnHuXy4IFWGmlIVuGYVGMGKxTGNeCRNUXDhT6TXGZlr4mwa79Ei1YT7KcNyc1dsTrYB96LphnsqOERx4X9J9XriSwxn70X8oUPFfQmLcitr-syDhiwd9Wdpg6J5yHAJjf657u7Z1lFTBMoXGBuw1VYmyno-3TAiPeUcVlQXPueJ-ymZXmwaITmGOfH7HipZngZBziofRAFdhMYbIjYhegu5jS7TxHwRuox32A__&Key-Pair-Id=K1F55BTI9AHGIK\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n",
|
||||
"Final Answer: https://d14uq1pz7dzsdq.cloudfront.net/0c494819-0bbc-4433-bfa4-6e99bd9747ea_.mp3?Expires=1693316851&Signature=YcMoVQgPuIMEOuSpFuvhkFM8JoBMSoGMcZb7MVWdqw7JEf5~67q9dEI90o5todE5mYXB5zSYoib6rGrmfBl4Rn5~yqDwZ~Tmc24K75zpQZIEyt5~ZSnHuXy4IFWGmlIVuGYVGMGKxTGNeCRNUXDhT6TXGZlr4mwa79Ei1YT7KcNyc1dsTrYB96LphnsqOERx4X9J9XriSwxn70X8oUPFfQmLcitr-syDhiwd9Wdpg6J5y\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"input_ = \"\"\"i have this url of an id: \"https://www.citizencard.com/images/citizencard-uk-id-card-2023.jpg\"\n",
|
||||
"i want to extract the information in it.\n",
|
||||
"create a text welcoming the person by his name and make it into speech .\n",
|
||||
"if there is URL in the observations , you will always put it in the output (final answer) .\n",
|
||||
"\"\"\"\n",
|
||||
"result = agent_chain(input_)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'https://d14uq1pz7dzsdq.cloudfront.net/0c494819-0bbc-4433-bfa4-6e99bd9747ea_.mp3?Expires=1693316851&Signature=YcMoVQgPuIMEOuSpFuvhkFM8JoBMSoGMcZb7MVWdqw7JEf5~67q9dEI90o5todE5mYXB5zSYoib6rGrmfBl4Rn5~yqDwZ~Tmc24K75zpQZIEyt5~ZSnHuXy4IFWGmlIVuGYVGMGKxTGNeCRNUXDhT6TXGZlr4mwa79Ei1YT7KcNyc1dsTrYB96LphnsqOERx4X9J9XriSwxn70X8oUPFfQmLcitr-syDhiwd9Wdpg6J5y'"
|
||||
]
|
||||
},
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result['output']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m I need to extract information from the invoice document\n",
|
||||
"Action: edenai_invoice_parsing\n",
|
||||
"Action Input: \"https://app.edenai.run/assets/img/data_1.72e3bdcc.png\"\u001b[0m\n",
|
||||
"Observation: \u001b[36;1m\u001b[1;3mcustomer_information : \n",
|
||||
" customer_name : Damita J Goldsmith\n",
|
||||
" customer_address : 201 Stan Fey Dr,Upper Marlboro, MD 20774\n",
|
||||
" customer_shipping_address : 201 Stan Fey Drive,Upper Marlboro\n",
|
||||
"merchant_information : \n",
|
||||
" merchant_name : SNG Engineering Inc\n",
|
||||
" merchant_address : 344 Main St #200 Gaithersburg, MD 20878 USA\n",
|
||||
" merchant_phone : +1 301 548 0055\n",
|
||||
"invoice_number : 014-03\n",
|
||||
"taxes : \n",
|
||||
"payment_term : on receipt of service\n",
|
||||
"date : 2003-01-20\n",
|
||||
"po_number : \n",
|
||||
"locale : \n",
|
||||
"bank_informations : \n",
|
||||
"item_lines : \n",
|
||||
" description : Field inspection of construction on 1/19/2003 deficiencies in house,construction, Garage drive way & legal support to Attorney to\u001b[0m\n",
|
||||
"Thought:\u001b[32;1m\u001b[1;3m I now know the answer to the questions\n",
|
||||
"Final Answer: The customer is Damita J Goldsmith and the company name is SNG Engineering Inc.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"input_ = \"\"\"i have this url of an invoice document: \"https://app.edenai.run/assets/img/data_1.72e3bdcc.png\"\n",
|
||||
"i want to extract the information in it.\n",
|
||||
"and answer these questions :\n",
|
||||
"who is the customer ?\n",
|
||||
"what is the company name ? \n",
|
||||
"\"\"\"\n",
|
||||
"result=agent_chain()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The customer is Damita J Goldsmith and the company name is SNG Engineering Inc.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result['output']"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -4,11 +4,11 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# File System Tools\n",
|
||||
"# File System\n",
|
||||
"\n",
|
||||
"LangChain provides tools for interacting with a local file system out of the box. This notebook walks through some of them.\n",
|
||||
"\n",
|
||||
"Note: these tools are not recommended for use outside a sandboxed environment! "
|
||||
"**Note:** these tools are not recommended for use outside a sandboxed environment! "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -187,7 +187,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
215
docs/extras/integrations/tools/google_drive.ipynb
Normal file
215
docs/extras/integrations/tools/google_drive.ipynb
Normal file
@@ -0,0 +1,215 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Drive\n",
|
||||
"\n",
|
||||
"This notebook walks through connecting a LangChain to the `Google Drive API`.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"1. Create a Google Cloud project or use an existing project\n",
|
||||
"1. Enable the [Google Drive API](https://console.cloud.google.com/flows/enableapi?apiid=drive.googleapis.com)\n",
|
||||
"1. [Authorize credentials for desktop app](https://developers.google.com/drive/api/quickstart/python#authorize_credentials_for_a_desktop_application)\n",
|
||||
"1. `pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib`\n",
|
||||
"\n",
|
||||
"## Instructions for retrieving your Google Docs data\n",
|
||||
"By default, the `GoogleDriveTools` and `GoogleDriveWrapper` expects the `credentials.json` file to be `~/.credentials/credentials.json`, but this is configurable using the `GOOGLE_ACCOUNT_FILE` environment variable. \n",
|
||||
"The location of `token.json` use the same directory (or use the parameter `token_path`). Note that `token.json` will be created automatically the first time you use the tool.\n",
|
||||
"\n",
|
||||
"`GoogleDriveSearchTool` can retrieve a selection of files with some requests. \n",
|
||||
"\n",
|
||||
"By default, If you use a `folder_id`, all the files inside this folder can be retrieved to `Document`, if the name match the query.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can obtain your folder and document id from the URL:\n",
|
||||
"* Folder: https://drive.google.com/drive/u/0/folders/1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5 -> folder id is `\"1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5\"`\n",
|
||||
"* Document: https://docs.google.com/document/d/1bfaMQ18_i56204VaQDVeAFpqEijJTgvurupdEDiaUQw/edit -> document id is `\"1bfaMQ18_i56204VaQDVeAFpqEijJTgvurupdEDiaUQw\"`\n",
|
||||
"\n",
|
||||
"The special value `root` is for your personal home."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"folder_id=\"root\"\n",
|
||||
"#folder_id='1yucgL9WGgWZdM1TOuKkeghlPizuzMYb5'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"By default, all files with these mime-type can be converted to `Document`.\n",
|
||||
"- text/text\n",
|
||||
"- text/plain\n",
|
||||
"- text/html\n",
|
||||
"- text/csv\n",
|
||||
"- text/markdown\n",
|
||||
"- image/png\n",
|
||||
"- image/jpeg\n",
|
||||
"- application/epub+zip\n",
|
||||
"- application/pdf\n",
|
||||
"- application/rtf\n",
|
||||
"- application/vnd.google-apps.document (GDoc)\n",
|
||||
"- application/vnd.google-apps.presentation (GSlide)\n",
|
||||
"- application/vnd.google-apps.spreadsheet (GSheet)\n",
|
||||
"- application/vnd.google.colaboratory (Notebook colab)\n",
|
||||
"- application/vnd.openxmlformats-officedocument.presentationml.presentation (PPTX)\n",
|
||||
"- application/vnd.openxmlformats-officedocument.wordprocessingml.document (DOCX)\n",
|
||||
"\n",
|
||||
"It's possible to update or customize this. See the documentation of `GoogleDriveAPIWrapper`.\n",
|
||||
"\n",
|
||||
"But, the corresponding packages must installed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install unstructured"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.utilities.google_drive import GoogleDriveAPIWrapper\n",
|
||||
"from langchain.tools.google_drive.tool import GoogleDriveSearchTool\n",
|
||||
"\n",
|
||||
"# By default, search only in the filename.\n",
|
||||
"tool = GoogleDriveSearchTool(\n",
|
||||
" api_wrapper=GoogleDriveAPIWrapper(\n",
|
||||
" folder_id=folder_id,\n",
|
||||
" num_results=2,\n",
|
||||
" template=\"gdrive-query-in-folder\", # Search in the body of documents\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"logging.basicConfig(level=logging.INFO)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tool.run(\"machine learning\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tool.description"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents import load_tools\n",
|
||||
"tools = load_tools([\"google-drive-search\"],\n",
|
||||
" folder_id=folder_id,\n",
|
||||
" template=\"gdrive-query-in-folder\",\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use within an Agent"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import OpenAI\n",
|
||||
"from langchain.agents import initialize_agent, AgentType\n",
|
||||
"llm = OpenAI(temperature=0)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools=tools,\n",
|
||||
" llm=llm,\n",
|
||||
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent.run(\n",
|
||||
" \"Search in google drive, who is 'Yann LeCun' ?\"\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -5,32 +5,35 @@
|
||||
"id": "dc23c48e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Google Serper API\n",
|
||||
"# Google Serper\n",
|
||||
"\n",
|
||||
"This notebook goes over how to use the Google Serper component to search the web. First you need to sign up for a free account at [serper.dev](https://serper.dev) and get your api key."
|
||||
"This notebook goes over how to use the `Google Serper` component to search the web. First you need to sign up for a free account at [serper.dev](https://serper.dev) and get your api key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "a8acfb24",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:56:29.336521Z",
|
||||
"start_time": "2023-05-04T00:56:29.334173Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
},
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import pprint\n",
|
||||
"\n",
|
||||
"os.environ[\"SERPER_API_KEY\"] = \"\""
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
},
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:56:29.336521Z",
|
||||
"start_time": "2023-05-04T00:56:29.334173Z"
|
||||
}
|
||||
},
|
||||
"id": "a8acfb24"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -75,7 +78,9 @@
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'Barack Hussein Obama II'"
|
||||
"text/plain": [
|
||||
"'Barack Hussein Obama II'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
@@ -88,33 +93,41 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f1c6c22",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## As part of a Self Ask With Search Chain"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "1f1c6c22"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\""
|
||||
],
|
||||
"id": "c1b5edd7",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:14.311773Z",
|
||||
"start_time": "2023-05-04T00:54:14.304389Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"id": "c1b5edd7"
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ[\"OPENAI_API_KEY\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "a8ccea61",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -135,7 +148,9 @@
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "'El Palmar, Spain'"
|
||||
"text/plain": [
|
||||
"'El Palmar, Spain'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
@@ -164,26 +179,34 @@
|
||||
"self_ask_with_search.run(\n",
|
||||
" \"What is the hometown of the reigning men's U.S. Open champion?\"\n",
|
||||
")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "a8ccea61"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3aee3682",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Obtaining results with metadata\n",
|
||||
"If you would also like to obtain the results in a structured way including metadata. For this we will be using the `results` method of the wrapper."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "3aee3682"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "073c3fc5",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:22.863413Z",
|
||||
"start_time": "2023-05-04T00:54:20.827395Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
},
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -344,33 +367,31 @@
|
||||
"search = GoogleSerperAPIWrapper()\n",
|
||||
"results = search.results(\"Apple Inc.\")\n",
|
||||
"pprint.pp(results)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"pycharm": {
|
||||
"is_executing": true
|
||||
},
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:22.863413Z",
|
||||
"start_time": "2023-05-04T00:54:20.827395Z"
|
||||
}
|
||||
},
|
||||
"id": "073c3fc5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b402c308",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Searching for Google Images\n",
|
||||
"We can also query Google Images using this wrapper. For example:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "b402c308"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "7fb2b7e2",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:27.879867Z",
|
||||
"start_time": "2023-05-04T00:54:26.380022Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -501,30 +522,31 @@
|
||||
"search = GoogleSerperAPIWrapper(type=\"images\")\n",
|
||||
"results = search.results(\"Lion\")\n",
|
||||
"pprint.pp(results)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:27.879867Z",
|
||||
"start_time": "2023-05-04T00:54:26.380022Z"
|
||||
}
|
||||
},
|
||||
"id": "7fb2b7e2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "85a3bed3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Searching for Google News\n",
|
||||
"We can also query Google News using this wrapper. For example:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "85a3bed3"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "afc48b39",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:34.984087Z",
|
||||
"start_time": "2023-05-04T00:54:33.369231Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -630,29 +652,30 @@
|
||||
"search = GoogleSerperAPIWrapper(type=\"news\")\n",
|
||||
"results = search.results(\"Tesla Inc.\")\n",
|
||||
"pprint.pp(results)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:34.984087Z",
|
||||
"start_time": "2023-05-04T00:54:33.369231Z"
|
||||
}
|
||||
},
|
||||
"id": "afc48b39"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d42ee7b5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to only receive news articles published in the last hour, you can do the following:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "d42ee7b5"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "8e3824cb",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:41.786864Z",
|
||||
"start_time": "2023-05-04T00:54:40.691905Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -701,18 +724,12 @@
|
||||
"search = GoogleSerperAPIWrapper(type=\"news\", tbs=\"qdr:h\")\n",
|
||||
"results = search.results(\"Tesla Inc.\")\n",
|
||||
"pprint.pp(results)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:54:41.786864Z",
|
||||
"start_time": "2023-05-04T00:54:40.691905Z"
|
||||
}
|
||||
},
|
||||
"id": "8e3824cb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3f13e9f9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Some examples of the `tbs` parameter:\n",
|
||||
"\n",
|
||||
@@ -730,26 +747,31 @@
|
||||
"`qdr:m2` (past 2 years)\n",
|
||||
"\n",
|
||||
"For all supported filters simply go to [Google Search](https://google.com), search for something, click on \"Tools\", add your date filter and check the URL for \"tbs=\".\n"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "3f13e9f9"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "38d4402c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Searching for Google Places\n",
|
||||
"We can also query Google Places using this wrapper. For example:"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "38d4402c"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "e7881203",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:56:07.271164Z",
|
||||
"start_time": "2023-05-04T00:56:05.645847Z"
|
||||
},
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -858,15 +880,7 @@
|
||||
"search = GoogleSerperAPIWrapper(type=\"places\")\n",
|
||||
"results = search.results(\"Italian restaurants in Upper East Side\")\n",
|
||||
"pprint.pp(results)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-05-04T00:56:07.271164Z",
|
||||
"start_time": "2023-05-04T00:56:05.645847Z"
|
||||
}
|
||||
},
|
||||
"id": "e7881203"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -885,9 +899,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -4,17 +4,17 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# GraphQL\n",
|
||||
"\n",
|
||||
"# GraphQL tool\n",
|
||||
"This Jupyter Notebook demonstrates how to use the BaseGraphQLTool component with an Agent.\n",
|
||||
">[GraphQL](https://graphql.org/) is a query language for APIs and a runtime for executing those queries against your data. `GraphQL` provides a complete and understandable description of the data in your API, gives clients the power to ask for exactly what they need and nothing more, makes it easier to evolve APIs over time, and enables powerful developer tools.\n",
|
||||
"\n",
|
||||
"GraphQL is a query language for APIs and a runtime for executing those queries against your data. GraphQL provides a complete and understandable description of the data in your API, gives clients the power to ask for exactly what they need and nothing more, makes it easier to evolve APIs over time, and enables powerful developer tools.\n",
|
||||
"By including a `BaseGraphQLTool` in the list of tools provided to an Agent, you can grant your Agent the ability to query data from GraphQL APIs for any purposes you need.\n",
|
||||
"\n",
|
||||
"By including a BaseGraphQLTool in the list of tools provided to an Agent, you can grant your Agent the ability to query data from GraphQL APIs for any purposes you need.\n",
|
||||
"This Jupyter Notebook demonstrates how to use the `GraphQLAPIWrapper` component with an Agent.\n",
|
||||
"\n",
|
||||
"In this example, we'll be using the public Star Wars GraphQL API available at the following endpoint: https://swapi-graphql.netlify.app/.netlify/functions/index.\n",
|
||||
"In this example, we'll be using the public `Star Wars GraphQL API` available at the following endpoint: https://swapi-graphql.netlify.app/.netlify/functions/index.\n",
|
||||
"\n",
|
||||
"First, you need to install httpx and gql Python packages."
|
||||
"First, you need to install `httpx` and `gql` Python packages."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -131,7 +131,7 @@
|
||||
"hash": "f85209c3c4c190dca7367d6a1e623da50a9a4392fd53313a7cf9d4bda9c4b85b"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.9.16 ('langchain')",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -145,10 +145,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
"id": "40a27d3c-4e5c-4b96-b290-4c49d4fd7219",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## HuggingFace Tools\n",
|
||||
"# HuggingFace Hub Tools\n",
|
||||
"\n",
|
||||
"[Huggingface Tools](https://huggingface.co/docs/transformers/v4.29.0/en/custom_tools) supporting text I/O can be\n",
|
||||
">[Huggingface Tools](https://huggingface.co/docs/transformers/v4.29.0/en/custom_tools) that supporting text I/O can be\n",
|
||||
"loaded directly using the `load_huggingface_tool` function."
|
||||
]
|
||||
},
|
||||
@@ -94,7 +94,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user