mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-04 16:20:16 +00:00
Compare commits
298 Commits
langchain=
...
wfh/nowarn
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a028e2d092 | ||
|
|
67b6e6c2e3 | ||
|
|
6247259438 | ||
|
|
0091947efd | ||
|
|
e958f76160 | ||
|
|
3981d736df | ||
|
|
fb1d67edf6 | ||
|
|
4f347cbcb9 | ||
|
|
4591bc0b01 | ||
|
|
f535e8a99e | ||
|
|
766b650fdc | ||
|
|
9daff60698 | ||
|
|
c8be0a9f70 | ||
|
|
f4b3c90886 | ||
|
|
b71ae52e65 | ||
|
|
39c44817ae | ||
|
|
4feda41ab6 | ||
|
|
71c2ec6782 | ||
|
|
628574b9c2 | ||
|
|
0bc3845e1e | ||
|
|
a78843bb77 | ||
|
|
10a2ce2a26 | ||
|
|
d457d7d121 | ||
|
|
b002702af6 | ||
|
|
34d0417eb5 | ||
|
|
e7d6b25653 | ||
|
|
55fd2e2158 | ||
|
|
be27e1787f | ||
|
|
f878df404f | ||
|
|
60cf49a618 | ||
|
|
e37caa9b9a | ||
|
|
3e296e39c8 | ||
|
|
d40bdd6257 | ||
|
|
8a71f1b41b | ||
|
|
8e3e532e7d | ||
|
|
12e490ea56 | ||
|
|
498a482e76 | ||
|
|
d324fd1821 | ||
|
|
4bd005adb6 | ||
|
|
e01c6789c4 | ||
|
|
dd2d094adc | ||
|
|
6b98207eda | ||
|
|
c5bf114c0f | ||
|
|
015ab91b83 | ||
|
|
5a3aaae6dc | ||
|
|
75c3c81b8c | ||
|
|
0f7b8adddf | ||
|
|
09c0823c3a | ||
|
|
32f5147523 | ||
|
|
4255a30f20 | ||
|
|
49dea06af1 | ||
|
|
937b3904eb | ||
|
|
bda3becbe7 | ||
|
|
f6e6a17878 | ||
|
|
c1bd4e05bc | ||
|
|
a2e90a5a43 | ||
|
|
a06818a654 | ||
|
|
df98552b6f | ||
|
|
b83f1eb0d5 | ||
|
|
9f0c76bf89 | ||
|
|
01ecd0acba | ||
|
|
1fd1c1dca5 | ||
|
|
253ceca76a | ||
|
|
e18511bb22 | ||
|
|
34da8be60b | ||
|
|
b297af5482 | ||
|
|
4cdaca67dc | ||
|
|
d72a08a60d | ||
|
|
8eb63a609e | ||
|
|
5150ec3a04 | ||
|
|
2b4fbcb4b4 | ||
|
|
eb3870e9d8 | ||
|
|
75ae585deb | ||
|
|
b7c070d437 | ||
|
|
60b65528c5 | ||
|
|
2ef9d12372 | ||
|
|
6910b0b3aa | ||
|
|
831708beb7 | ||
|
|
a114255b82 | ||
|
|
6f68c8d6ab | ||
|
|
8afbab4cf6 | ||
|
|
66e30efa61 | ||
|
|
ba167dc158 | ||
|
|
44f69063b1 | ||
|
|
f18b77fd59 | ||
|
|
966b408634 | ||
|
|
bd261456f6 | ||
|
|
ec8ffc8f40 | ||
|
|
2494cecabf | ||
|
|
df632b8cde | ||
|
|
1050e890c6 | ||
|
|
c4779f5b9c | ||
|
|
0a99935794 | ||
|
|
63aba3fe5b | ||
|
|
dc80be5efe | ||
|
|
ab29ee79a3 | ||
|
|
1d3f7231b8 | ||
|
|
a58d4ba340 | ||
|
|
d178fb9dc3 | ||
|
|
414154fa59 | ||
|
|
94c9cb7321 | ||
|
|
012929551c | ||
|
|
63c483ea01 | ||
|
|
eec7bb4f51 | ||
|
|
f0f125dac7 | ||
|
|
f4196f1fb8 | ||
|
|
d0ad713937 | ||
|
|
ddd7919f6a | ||
|
|
493e474063 | ||
|
|
4a812e3193 | ||
|
|
5f5e8c9a60 | ||
|
|
d00176e523 | ||
|
|
dc51cc5690 | ||
|
|
27690506d0 | ||
|
|
4029f5650c | ||
|
|
10e6725a7e | ||
|
|
967b6f21f6 | ||
|
|
4a78be7861 | ||
|
|
d6c180996f | ||
|
|
93dcc47463 | ||
|
|
27def6bddb | ||
|
|
b4e3bdb714 | ||
|
|
f82c3f622a | ||
|
|
d55d99222b | ||
|
|
0f6217f507 | ||
|
|
8645a49f31 | ||
|
|
a4ef830480 | ||
|
|
b1aed44540 | ||
|
|
f4ffd692a3 | ||
|
|
e0bbb81d04 | ||
|
|
d5b548b4ce | ||
|
|
0478f7f5e4 | ||
|
|
9d08369442 | ||
|
|
6bc451b942 | ||
|
|
2b15518c5f | ||
|
|
b6df3405fb | ||
|
|
089f5e6cad | ||
|
|
35e2230f56 | ||
|
|
24155aa1ac | ||
|
|
ebbe609193 | ||
|
|
f679ed72ca | ||
|
|
2907ab2297 | ||
|
|
06f8bd9946 | ||
|
|
252f0877d1 | ||
|
|
217a915b29 | ||
|
|
056c7c2983 | ||
|
|
1adc161642 | ||
|
|
deb27d8970 | ||
|
|
5efd0fe9ae | ||
|
|
1c9917dfa2 | ||
|
|
ccff1ba8b8 | ||
|
|
53ee5770d3 | ||
|
|
8626abf8b5 | ||
|
|
1af8456a2c | ||
|
|
0a3500808d | ||
|
|
ee8a585791 | ||
|
|
e77eeee6ee | ||
|
|
9927a4866d | ||
|
|
420534c8ca | ||
|
|
794f28d4e2 | ||
|
|
f28ae20b81 | ||
|
|
9f0eda6a18 | ||
|
|
472527166f | ||
|
|
074fa0db73 | ||
|
|
4fd1efc48f | ||
|
|
aa2722cbe2 | ||
|
|
a82c0533f2 | ||
|
|
bc60cddc1b | ||
|
|
43deed2a95 | ||
|
|
9cd608efb3 | ||
|
|
fd546196ef | ||
|
|
6dd9f053e3 | ||
|
|
ca9dcee940 | ||
|
|
dadb6f1445 | ||
|
|
b6f0174bb9 | ||
|
|
c3ced4c6ce | ||
|
|
bd6c31617e | ||
|
|
6e57aa7c36 | ||
|
|
a2b4c33bd6 | ||
|
|
4825dc0d76 | ||
|
|
02300471be | ||
|
|
66b7206ab6 | ||
|
|
c81c77b465 | ||
|
|
3b7437d184 | ||
|
|
91ea4b7449 | ||
|
|
652b3fa4a4 | ||
|
|
7040013140 | ||
|
|
dc7423e88f | ||
|
|
25f2e25be1 | ||
|
|
786ef021a3 | ||
|
|
429a0ee7fd | ||
|
|
da9281feb2 | ||
|
|
c6ece6a96d | ||
|
|
86355640c3 | ||
|
|
b9f65e5038 | ||
|
|
30fb345342 | ||
|
|
98779797fe | ||
|
|
663638d6a8 | ||
|
|
2f209d84fa | ||
|
|
c72e522e96 | ||
|
|
bf5193bb99 | ||
|
|
11adc09e02 | ||
|
|
6b32810b68 | ||
|
|
3da2713172 | ||
|
|
425f6ffa5b | ||
|
|
15a36dd0a2 | ||
|
|
249945a572 | ||
|
|
59b8850909 | ||
|
|
4828c441a7 | ||
|
|
725e4912ae | ||
|
|
d895db11d6 | ||
|
|
d77c7c4236 | ||
|
|
7b1a132aff | ||
|
|
df99b832a7 | ||
|
|
803eba3163 | ||
|
|
6e9a8b188f | ||
|
|
f337408b0f | ||
|
|
0b4608f71e | ||
|
|
a4086119f8 | ||
|
|
b4c12346cc | ||
|
|
dff83cce66 | ||
|
|
130e80b60f | ||
|
|
09fbce13c5 | ||
|
|
0ba125c3cd | ||
|
|
28e0958ff4 | ||
|
|
a2e9910268 | ||
|
|
7e7fcf5b1f | ||
|
|
04dd8d3b0a | ||
|
|
63d84e93b9 | ||
|
|
4d28c70000 | ||
|
|
46a47710b0 | ||
|
|
35ebd2620c | ||
|
|
23c9aba575 | ||
|
|
1331e8589c | ||
|
|
7882d5c978 | ||
|
|
70677202c7 | ||
|
|
78403a3746 | ||
|
|
5d10139fc7 | ||
|
|
d283f452cc | ||
|
|
264ab96980 | ||
|
|
267855b3c1 | ||
|
|
71c0698ee4 | ||
|
|
a72fddbf8d | ||
|
|
2c798622cd | ||
|
|
3abf1b6905 | ||
|
|
1028af17e7 | ||
|
|
061ed250f6 | ||
|
|
293a4a78de | ||
|
|
ffa0c838d8 | ||
|
|
111c7df117 | ||
|
|
6eb42c657e | ||
|
|
88a9a6a758 | ||
|
|
8f33fce871 | ||
|
|
423d286546 | ||
|
|
e572521f2a | ||
|
|
63ddf0afb4 | ||
|
|
4bcd2aad6c | ||
|
|
427a04151c | ||
|
|
d166967003 | ||
|
|
a74e466507 | ||
|
|
a02a09c973 | ||
|
|
41dfad5104 | ||
|
|
e7b95e0802 | ||
|
|
16bd0697dc | ||
|
|
6890daa90c | ||
|
|
335894893b | ||
|
|
c5cb52a3c6 | ||
|
|
cda79dbb6c | ||
|
|
cea3f72485 | ||
|
|
02c35da445 | ||
|
|
208042e0f2 | ||
|
|
f5da0d6d87 | ||
|
|
2c3e3dc6b1 | ||
|
|
7de62abc91 | ||
|
|
f9a11a9197 | ||
|
|
1dcee68cb8 | ||
|
|
e81ddb32a6 | ||
|
|
57747892ce | ||
|
|
679843abb0 | ||
|
|
73570873ab | ||
|
|
2ae76cecde | ||
|
|
4305f78e40 | ||
|
|
64ccddf3cb | ||
|
|
dd8e4cd020 | ||
|
|
0de0cd2d31 | ||
|
|
8e2316b8c2 | ||
|
|
c2538e7834 | ||
|
|
acba38a18e | ||
|
|
22c1a4041b | ||
|
|
4797b806c2 | ||
|
|
7061869aec | ||
|
|
98c22e9082 | ||
|
|
c04d95b962 | ||
|
|
54e9ea433a | ||
|
|
fe1820cdaf | ||
|
|
71c0564c9f | ||
|
|
c65e48996c | ||
|
|
d7688a4328 |
7
.github/scripts/check_diff.py
vendored
7
.github/scripts/check_diff.py
vendored
@@ -68,6 +68,13 @@ def dependents_graph() -> dict:
|
||||
|
||||
if "langchain" in dep:
|
||||
dependents[dep].add(pkg_dir)
|
||||
|
||||
# remove huggingface from dependents because of CI instability
|
||||
# specifically in huggingface jobs
|
||||
# https://github.com/langchain-ai/langchain/issues/25558
|
||||
for k in dependents:
|
||||
if "libs/partners/huggingface" in dependents[k]:
|
||||
dependents[k].remove("libs/partners/huggingface")
|
||||
return dependents
|
||||
|
||||
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -172,6 +172,8 @@ docs/api_reference/*/
|
||||
!docs/api_reference/_static/
|
||||
!docs/api_reference/templates/
|
||||
!docs/api_reference/themes/
|
||||
!docs/api_reference/_extensions/
|
||||
!docs/api_reference/scripts/
|
||||
docs/docs/build
|
||||
docs/docs/node_modules
|
||||
docs/docs/yarn.lock
|
||||
|
||||
@@ -52,7 +52,7 @@ Now:
|
||||
|
||||
`from langchain_experimental.sql import SQLDatabaseChain`
|
||||
|
||||
Alternatively, if you are just interested in using the query generation part of the SQL chain, you can check out [`create_sql_query_chain`](https://github.com/langchain-ai/langchain/blob/master/docs/extras/use_cases/tabular/sql_query.ipynb)
|
||||
Alternatively, if you are just interested in using the query generation part of the SQL chain, you can check out this [`SQL question-answering tutorial`](https://python.langchain.com/v0.2/docs/tutorials/sql_qa/#convert-question-to-sql-query)
|
||||
|
||||
`from langchain.chains import create_sql_query_chain`
|
||||
|
||||
|
||||
5
Makefile
5
Makefile
@@ -31,6 +31,7 @@ docs_linkcheck:
|
||||
api_docs_build:
|
||||
poetry run python docs/api_reference/create_api_rst.py
|
||||
cd docs/api_reference && poetry run make html
|
||||
poetry run python docs/api_reference/scripts/custom_formatter.py docs/api_reference/_build/html/
|
||||
|
||||
API_PKG ?= text-splitters
|
||||
|
||||
@@ -38,12 +39,14 @@ api_docs_quick_preview:
|
||||
poetry run pip install "pydantic<2"
|
||||
poetry run python docs/api_reference/create_api_rst.py $(API_PKG)
|
||||
cd docs/api_reference && poetry run make html
|
||||
open docs/api_reference/_build/html/$(shell echo $(API_PKG) | sed 's/-/_/g')_api_reference.html
|
||||
poetry run python docs/api_reference/scripts/custom_formatter.py docs/api_reference/_build/html/
|
||||
open docs/api_reference/_build/html/reference.html
|
||||
|
||||
## api_docs_clean: Clean the API Reference documentation build artifacts.
|
||||
api_docs_clean:
|
||||
find ./docs/api_reference -name '*_api_reference.rst' -delete
|
||||
git clean -fdX ./docs/api_reference
|
||||
rm docs/api_reference/index.md
|
||||
|
||||
|
||||
## api_docs_linkcheck: Run linkchecker on the API Reference documentation.
|
||||
|
||||
20
README.md
20
README.md
@@ -14,18 +14,20 @@
|
||||
|
||||
Looking for the JS/TS library? Check out [LangChain.js](https://github.com/langchain-ai/langchainjs).
|
||||
|
||||
To help you ship LangChain apps to production faster, check out [LangSmith](https://smith.langchain.com).
|
||||
[LangSmith](https://smith.langchain.com) is a unified developer platform for building, testing, and monitoring LLM applications.
|
||||
To help you ship LangChain apps to production faster, check out [LangSmith](https://smith.langchain.com).
|
||||
[LangSmith](https://smith.langchain.com) is a unified developer platform for building, testing, and monitoring LLM applications.
|
||||
Fill out [this form](https://www.langchain.com/contact-sales) to speak with our sales team.
|
||||
|
||||
## Quick Install
|
||||
|
||||
With pip:
|
||||
|
||||
```bash
|
||||
pip install langchain
|
||||
```
|
||||
|
||||
With conda:
|
||||
|
||||
```bash
|
||||
conda install langchain -c conda-forge
|
||||
```
|
||||
@@ -36,12 +38,13 @@ conda install langchain -c conda-forge
|
||||
|
||||
For these applications, LangChain simplifies the entire application lifecycle:
|
||||
|
||||
- **Open-source libraries**: Build your applications using LangChain's open-source [building blocks](https://python.langchain.com/v0.2/docs/concepts#langchain-expression-language-lcel), [components](https://python.langchain.com/v0.2/docs/concepts), and [third-party integrations](https://python.langchain.com/v0.2/docs/integrations/platforms/).
|
||||
Use [LangGraph](/docs/concepts/#langgraph) to build stateful agents with first-class streaming and human-in-the-loop support.
|
||||
- **Open-source libraries**: Build your applications using LangChain's open-source [building blocks](https://python.langchain.com/v0.2/docs/concepts#langchain-expression-language-lcel), [components](https://python.langchain.com/v0.2/docs/concepts), and [third-party integrations](https://python.langchain.com/v0.2/docs/integrations/platforms/).
|
||||
Use [LangGraph](/docs/concepts/#langgraph) to build stateful agents with first-class streaming and human-in-the-loop support.
|
||||
- **Productionization**: Inspect, monitor, and evaluate your apps with [LangSmith](https://docs.smith.langchain.com/) so that you can constantly optimize and deploy with confidence.
|
||||
- **Deployment**: Turn your LangGraph applications into production-ready APIs and Assistants with [LangGraph Cloud](https://langchain-ai.github.io/langgraph/cloud/).
|
||||
|
||||
### Open-source libraries
|
||||
|
||||
- **`langchain-core`**: Base abstractions and LangChain Expression Language.
|
||||
- **`langchain-community`**: Third party integrations.
|
||||
- Some integrations have been further split into **partner packages** that only rely on **`langchain-core`**. Examples include **`langchain_openai`** and **`langchain_anthropic`**.
|
||||
@@ -49,9 +52,11 @@ Use [LangGraph](/docs/concepts/#langgraph) to build stateful agents with first-c
|
||||
- **[`LangGraph`](https://langchain-ai.github.io/langgraph/)**: A library for building robust and stateful multi-actor applications with LLMs by modeling steps as edges and nodes in a graph. Integrates smoothly with LangChain, but can be used without it.
|
||||
|
||||
### Productionization:
|
||||
|
||||
- **[LangSmith](https://docs.smith.langchain.com/)**: A developer platform that lets you debug, test, evaluate, and monitor chains built on any LLM framework and seamlessly integrates with LangChain.
|
||||
|
||||
### Deployment:
|
||||
|
||||
- **[LangGraph Cloud](https://langchain-ai.github.io/langgraph/cloud/)**: Turn your LangGraph applications into production-ready APIs and Assistants.
|
||||
|
||||

|
||||
@@ -76,15 +81,17 @@ Use [LangGraph](/docs/concepts/#langgraph) to build stateful agents with first-c
|
||||
And much more! Head to the [Tutorials](https://python.langchain.com/v0.2/docs/tutorials/) section of the docs for more.
|
||||
|
||||
## 🚀 How does LangChain help?
|
||||
|
||||
The main value props of the LangChain libraries are:
|
||||
|
||||
1. **Components**: composable building blocks, tools and integrations for working with language models. Components are modular and easy-to-use, whether you are using the rest of the LangChain framework or not
|
||||
2. **Off-the-shelf chains**: built-in assemblages of components for accomplishing higher-level tasks
|
||||
|
||||
Off-the-shelf chains make it easy to get started. Components make it easy to customize existing chains and build new ones.
|
||||
Off-the-shelf chains make it easy to get started. Components make it easy to customize existing chains and build new ones.
|
||||
|
||||
## LangChain Expression Language (LCEL)
|
||||
|
||||
LCEL is the foundation of many of LangChain's components, and is a declarative way to compose chains. LCEL was designed from day 1 to support putting prototypes in production, with no code changes, from the simplest “prompt + LLM” chain to the most complex chains.
|
||||
LCEL is a key part of LangChain, allowing you to build and organize chains of processes in a straightforward, declarative manner. It was designed to support taking prototypes directly into production without needing to alter any code. This means you can use LCEL to set up everything from basic "prompt + LLM" setups to intricate, multi-step workflows.
|
||||
|
||||
- **[Overview](https://python.langchain.com/v0.2/docs/concepts/#langchain-expression-language-lcel)**: LCEL and its benefits
|
||||
- **[Interface](https://python.langchain.com/v0.2/docs/concepts/#runnable-interface)**: The standard Runnable interface for LCEL objects
|
||||
@@ -123,7 +130,6 @@ Please see [here](https://python.langchain.com) for full documentation, which in
|
||||
- [🦜🕸️ LangGraph](https://langchain-ai.github.io/langgraph/): Create stateful, multi-actor applications with LLMs. Integrates smoothly with LangChain, but can be used without it.
|
||||
- [🦜🏓 LangServe](https://python.langchain.com/docs/langserve): Deploy LangChain runnables and chains as REST APIs.
|
||||
|
||||
|
||||
## 💁 Contributing
|
||||
|
||||
As an open-source project in a rapidly developing field, we are extremely open to contributions, whether it be in the form of a new feature, improved infrastructure, or better documentation.
|
||||
|
||||
@@ -39,7 +39,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install langchain langchain-chroma unstructured[all-docs] pydantic lxml langchainhub"
|
||||
"! pip install langchain langchain-chroma \"unstructured[all-docs]\" pydantic lxml langchainhub"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -59,7 +59,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install langchain langchain-chroma unstructured[all-docs] pydantic lxml"
|
||||
"! pip install langchain langchain-chroma \"unstructured[all-docs]\" pydantic lxml"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -59,7 +59,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install langchain langchain-chroma unstructured[all-docs] pydantic lxml"
|
||||
"! pip install langchain langchain-chroma \"unstructured[all-docs]\" pydantic lxml"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -166,7 +166,7 @@
|
||||
"source": [
|
||||
"### SQL Database Agent example\n",
|
||||
"\n",
|
||||
"This example demonstrates the use of the [SQL Database Agent](/docs/integrations/toolkits/sql_database.html) for answering questions over a Databricks database."
|
||||
"This example demonstrates the use of the [SQL Database Agent](/docs/integrations/tools/sql_database) for answering questions over a Databricks database."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -13,7 +13,12 @@ OUTPUT_NEW_DOCS_DIR = $(OUTPUT_NEW_DIR)/docs
|
||||
|
||||
PYTHON = .venv/bin/python
|
||||
|
||||
PARTNER_DEPS_LIST := $(shell find ../libs/partners -mindepth 1 -maxdepth 1 -type d -exec test -e "{}/pyproject.toml" \; -print | grep -vE "airbyte|ibm|couchbase" | tr '\n' ' ')
|
||||
PARTNER_DEPS_LIST := $(shell find ../libs/partners -mindepth 1 -maxdepth 1 -type d -exec sh -c ' \
|
||||
for dir; do \
|
||||
if find "$$dir" -maxdepth 1 -type f \( -name "pyproject.toml" -o -name "setup.py" \) | grep -q .; then \
|
||||
echo "$$dir"; \
|
||||
fi \
|
||||
done' sh {} + | grep -vE "airbyte|ibm|couchbase|databricks" | tr '\n' ' ')
|
||||
|
||||
PORT ?= 3001
|
||||
|
||||
@@ -36,12 +41,8 @@ generate-files:
|
||||
cp -r $(SOURCE_DIR)/* $(INTERMEDIATE_DIR)
|
||||
mkdir -p $(INTERMEDIATE_DIR)/templates
|
||||
|
||||
$(PYTHON) scripts/model_feat_table.py $(INTERMEDIATE_DIR)
|
||||
|
||||
$(PYTHON) scripts/tool_feat_table.py $(INTERMEDIATE_DIR)
|
||||
|
||||
$(PYTHON) scripts/document_loader_feat_table.py $(INTERMEDIATE_DIR)
|
||||
|
||||
$(PYTHON) scripts/kv_store_feat_table.py $(INTERMEDIATE_DIR)
|
||||
|
||||
$(PYTHON) scripts/partner_pkg_table.py $(INTERMEDIATE_DIR)
|
||||
@@ -81,7 +82,11 @@ vercel-build: install-vercel-deps build generate-references
|
||||
rm -rf docs
|
||||
mv $(OUTPUT_NEW_DOCS_DIR) docs
|
||||
rm -rf build
|
||||
yarn run docusaurus build
|
||||
mkdir static/api_reference
|
||||
git clone --depth=1 https://github.com/baskaryan/langchain-api-docs-build.git
|
||||
mv langchain-api-docs-build/api_reference_build/html/* static/api_reference/
|
||||
rm -rf langchain-api-docs-build
|
||||
NODE_OPTIONS="--max-old-space-size=5000" yarn run docusaurus build
|
||||
mv build v0.2
|
||||
mkdir build
|
||||
mv v0.2 build
|
||||
|
||||
144
docs/api_reference/_extensions/gallery_directive.py
Normal file
144
docs/api_reference/_extensions/gallery_directive.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""A directive to generate a gallery of images from structured data.
|
||||
|
||||
Generating a gallery of images that are all the same size is a common
|
||||
pattern in documentation, and this can be cumbersome if the gallery is
|
||||
generated programmatically. This directive wraps this particular use-case
|
||||
in a helper-directive to generate it with a single YAML configuration file.
|
||||
|
||||
It currently exists for maintainers of the pydata-sphinx-theme,
|
||||
but might be abstracted into a standalone package if it proves useful.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, ClassVar, Dict, List
|
||||
|
||||
from docutils import nodes
|
||||
from docutils.parsers.rst import directives
|
||||
from sphinx.application import Sphinx
|
||||
from sphinx.util import logging
|
||||
from sphinx.util.docutils import SphinxDirective
|
||||
from yaml import safe_load
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
TEMPLATE_GRID = """
|
||||
`````{{grid}} {columns}
|
||||
{options}
|
||||
|
||||
{content}
|
||||
|
||||
`````
|
||||
"""
|
||||
|
||||
GRID_CARD = """
|
||||
````{{grid-item-card}} {title}
|
||||
{options}
|
||||
|
||||
{content}
|
||||
````
|
||||
"""
|
||||
|
||||
|
||||
class GalleryGridDirective(SphinxDirective):
|
||||
"""A directive to show a gallery of images and links in a Bootstrap grid.
|
||||
|
||||
The grid can be generated from a YAML file that contains a list of items, or
|
||||
from the content of the directive (also formatted in YAML). Use the parameter
|
||||
"class-card" to add an additional CSS class to all cards. When specifying the grid
|
||||
items, you can use all parameters from "grid-item-card" directive to customize
|
||||
individual cards + ["image", "header", "content", "title"].
|
||||
|
||||
Danger:
|
||||
This directive can only be used in the context of a Myst documentation page as
|
||||
the templates use Markdown flavored formatting.
|
||||
"""
|
||||
|
||||
name = "gallery-grid"
|
||||
has_content = True
|
||||
required_arguments = 0
|
||||
optional_arguments = 1
|
||||
final_argument_whitespace = True
|
||||
option_spec: ClassVar[dict[str, Any]] = {
|
||||
# A class to be added to the resulting container
|
||||
"grid-columns": directives.unchanged,
|
||||
"class-container": directives.unchanged,
|
||||
"class-card": directives.unchanged,
|
||||
}
|
||||
|
||||
def run(self) -> List[nodes.Node]:
|
||||
"""Create the gallery grid."""
|
||||
if self.arguments:
|
||||
# If an argument is given, assume it's a path to a YAML file
|
||||
# Parse it and load it into the directive content
|
||||
path_data_rel = Path(self.arguments[0])
|
||||
path_doc, _ = self.get_source_info()
|
||||
path_doc = Path(path_doc).parent
|
||||
path_data = (path_doc / path_data_rel).resolve()
|
||||
if not path_data.exists():
|
||||
logger.info(f"Could not find grid data at {path_data}.")
|
||||
nodes.text("No grid data found at {path_data}.")
|
||||
return
|
||||
yaml_string = path_data.read_text()
|
||||
else:
|
||||
yaml_string = "\n".join(self.content)
|
||||
|
||||
# Use all the element with an img-bottom key as sites to show
|
||||
# and generate a card item for each of them
|
||||
grid_items = []
|
||||
for item in safe_load(yaml_string):
|
||||
# remove parameters that are not needed for the card options
|
||||
title = item.pop("title", "")
|
||||
|
||||
# build the content of the card using some extra parameters
|
||||
header = f"{item.pop('header')} \n^^^ \n" if "header" in item else ""
|
||||
image = f"}) \n" if "image" in item else ""
|
||||
content = f"{item.pop('content')} \n" if "content" in item else ""
|
||||
|
||||
# optional parameter that influence all cards
|
||||
if "class-card" in self.options:
|
||||
item["class-card"] = self.options["class-card"]
|
||||
|
||||
loc_options_str = "\n".join(f":{k}: {v}" for k, v in item.items()) + " \n"
|
||||
|
||||
card = GRID_CARD.format(
|
||||
options=loc_options_str, content=header + image + content, title=title
|
||||
)
|
||||
grid_items.append(card)
|
||||
|
||||
# Parse the template with Sphinx Design to create an output container
|
||||
# Prep the options for the template grid
|
||||
class_ = "gallery-directive" + f' {self.options.get("class-container", "")}'
|
||||
options = {"gutter": 2, "class-container": class_}
|
||||
options_str = "\n".join(f":{k}: {v}" for k, v in options.items())
|
||||
|
||||
# Create the directive string for the grid
|
||||
grid_directive = TEMPLATE_GRID.format(
|
||||
columns=self.options.get("grid-columns", "1 2 3 4"),
|
||||
options=options_str,
|
||||
content="\n".join(grid_items),
|
||||
)
|
||||
|
||||
# Parse content as a directive so Sphinx Design processes it
|
||||
container = nodes.container()
|
||||
self.state.nested_parse([grid_directive], 0, container)
|
||||
|
||||
# Sphinx Design outputs a container too, so just use that
|
||||
return [container.children[0]]
|
||||
|
||||
|
||||
def setup(app: Sphinx) -> Dict[str, Any]:
|
||||
"""Add custom configuration to sphinx app.
|
||||
|
||||
Args:
|
||||
app: the Sphinx application
|
||||
|
||||
Returns:
|
||||
the 2 parallel parameters set to ``True``.
|
||||
"""
|
||||
app.add_directive("gallery-grid", GalleryGridDirective)
|
||||
|
||||
return {
|
||||
"parallel_read_safe": True,
|
||||
"parallel_write_safe": True,
|
||||
}
|
||||
@@ -1,26 +1,411 @@
|
||||
pre {
|
||||
white-space: break-spaces;
|
||||
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;700&display=swap');
|
||||
|
||||
/*******************************************************************************
|
||||
* master color map. Only the colors that actually differ between light and dark
|
||||
* themes are specified separately.
|
||||
*
|
||||
* To see the full list of colors see https://www.figma.com/file/rUrrHGhUBBIAAjQ82x6pz9/PyData-Design-system---proposal-for-implementation-(2)?node-id=1234%3A765&t=ifcFT1JtnrSshGfi-1
|
||||
*/
|
||||
/**
|
||||
* Function to get items from nested maps
|
||||
*/
|
||||
/* Assign base colors for the PyData theme */
|
||||
:root {
|
||||
--pst-teal-50: #f4fbfc;
|
||||
--pst-teal-100: #e9f6f8;
|
||||
--pst-teal-200: #d0ecf1;
|
||||
--pst-teal-300: #abdde6;
|
||||
--pst-teal-400: #3fb1c5;
|
||||
--pst-teal-500: #0a7d91;
|
||||
--pst-teal-600: #085d6c;
|
||||
--pst-teal-700: #064752;
|
||||
--pst-teal-800: #042c33;
|
||||
--pst-teal-900: #021b1f;
|
||||
--pst-violet-50: #f4eefb;
|
||||
--pst-violet-100: #e0c7ff;
|
||||
--pst-violet-200: #d5b4fd;
|
||||
--pst-violet-300: #b780ff;
|
||||
--pst-violet-400: #9c5ffd;
|
||||
--pst-violet-500: #8045e5;
|
||||
--pst-violet-600: #6432bd;
|
||||
--pst-violet-700: #4b258f;
|
||||
--pst-violet-800: #341a61;
|
||||
--pst-violet-900: #1e0e39;
|
||||
--pst-gray-50: #f9f9fa;
|
||||
--pst-gray-100: #f3f4f5;
|
||||
--pst-gray-200: #e5e7ea;
|
||||
--pst-gray-300: #d1d5da;
|
||||
--pst-gray-400: #9ca4af;
|
||||
--pst-gray-500: #677384;
|
||||
--pst-gray-600: #48566b;
|
||||
--pst-gray-700: #29313d;
|
||||
--pst-gray-800: #222832;
|
||||
--pst-gray-900: #14181e;
|
||||
--pst-pink-50: #fcf8fd;
|
||||
--pst-pink-100: #fcf0fa;
|
||||
--pst-pink-200: #f8dff5;
|
||||
--pst-pink-300: #f3c7ee;
|
||||
--pst-pink-400: #e47fd7;
|
||||
--pst-pink-500: #c132af;
|
||||
--pst-pink-600: #912583;
|
||||
--pst-pink-700: #6e1c64;
|
||||
--pst-pink-800: #46123f;
|
||||
--pst-pink-900: #2b0b27;
|
||||
--pst-foundation-white: #ffffff;
|
||||
--pst-foundation-black: #14181e;
|
||||
--pst-green-10: #f1fdfd;
|
||||
--pst-green-50: #E0F7F6;
|
||||
--pst-green-100: #B3E8E6;
|
||||
--pst-green-200: #80D6D3;
|
||||
--pst-green-300: #4DC4C0;
|
||||
--pst-green-400: #4FB2AD;
|
||||
--pst-green-500: #287977;
|
||||
--pst-green-600: #246161;
|
||||
--pst-green-700: #204F4F;
|
||||
--pst-green-800: #1C3C3C;
|
||||
--pst-green-900: #0D2427;
|
||||
--pst-lilac-50: #f4eefb;
|
||||
--pst-lilac-100: #DAD6FE;
|
||||
--pst-lilac-200: #BCB2FD;
|
||||
--pst-lilac-300: #9F8BFA;
|
||||
--pst-lilac-400: #7F5CF6;
|
||||
--pst-lilac-500: #6F3AED;
|
||||
--pst-lilac-600: #6028D9;
|
||||
--pst-lilac-700: #5021B6;
|
||||
--pst-lilac-800: #431D95;
|
||||
--pst-lilac-900: #1e0e39;
|
||||
--pst-header-height: 2.5rem;
|
||||
}
|
||||
|
||||
@media (min-width: 1200px) {
|
||||
.container,
|
||||
.container-lg,
|
||||
.container-md,
|
||||
.container-sm,
|
||||
.container-xl {
|
||||
max-width: 2560px !important;
|
||||
}
|
||||
html {
|
||||
--pst-font-family-base: 'Inter';
|
||||
--pst-font-family-heading: 'Inter Tight', sans-serif;
|
||||
}
|
||||
|
||||
#my-component-root *,
|
||||
#headlessui-portal-root * {
|
||||
z-index: 10000;
|
||||
/*******************************************************************************
|
||||
* write the color rules for each theme (light/dark)
|
||||
*/
|
||||
/* NOTE:
|
||||
* Mixins enable us to reuse the same definitions for the different modes
|
||||
* https://sass-lang.com/documentation/at-rules/mixin
|
||||
* something inserts a variable into a CSS selector or property name
|
||||
* https://sass-lang.com/documentation/interpolation
|
||||
*/
|
||||
/* Defaults to light mode if data-theme is not set */
|
||||
html:not([data-theme]) {
|
||||
--pst-color-primary: #287977;
|
||||
--pst-color-primary-bg: #80D6D3;
|
||||
--pst-color-secondary: #6F3AED;
|
||||
--pst-color-secondary-bg: #DAD6FE;
|
||||
--pst-color-accent: #c132af;
|
||||
--pst-color-accent-bg: #f8dff5;
|
||||
--pst-color-info: #276be9;
|
||||
--pst-color-info-bg: #dce7fc;
|
||||
--pst-color-warning: #f66a0a;
|
||||
--pst-color-warning-bg: #f8e3d0;
|
||||
--pst-color-success: #00843f;
|
||||
--pst-color-success-bg: #d6ece1;
|
||||
--pst-color-attention: var(--pst-color-warning);
|
||||
--pst-color-attention-bg: var(--pst-color-warning-bg);
|
||||
--pst-color-danger: #d72d47;
|
||||
--pst-color-danger-bg: #f9e1e4;
|
||||
--pst-color-text-base: #222832;
|
||||
--pst-color-text-muted: #48566b;
|
||||
--pst-color-heading-color: #ffffff;
|
||||
--pst-color-shadow: rgba(0, 0, 0, 0.1);
|
||||
--pst-color-border: #d1d5da;
|
||||
--pst-color-border-muted: rgba(23, 23, 26, 0.2);
|
||||
--pst-color-inline-code: #912583;
|
||||
--pst-color-inline-code-links: #246161;
|
||||
--pst-color-target: #f3cf95;
|
||||
--pst-color-background: #ffffff;
|
||||
--pst-color-on-background: #F4F9F8;
|
||||
--pst-color-surface: #F4F9F8;
|
||||
--pst-color-on-surface: #222832;
|
||||
}
|
||||
html:not([data-theme]) {
|
||||
--pst-color-link: var(--pst-color-primary);
|
||||
--pst-color-link-hover: var(--pst-color-secondary);
|
||||
}
|
||||
html:not([data-theme]) .only-dark,
|
||||
html:not([data-theme]) .only-dark ~ figcaption {
|
||||
display: none !important;
|
||||
}
|
||||
|
||||
table.longtable code {
|
||||
white-space: normal;
|
||||
/* NOTE: @each {...} is like a for-loop
|
||||
* https://sass-lang.com/documentation/at-rules/control/each
|
||||
*/
|
||||
html[data-theme=light] {
|
||||
--pst-color-primary: #287977;
|
||||
--pst-color-primary-bg: #80D6D3;
|
||||
--pst-color-secondary: #6F3AED;
|
||||
--pst-color-secondary-bg: #DAD6FE;
|
||||
--pst-color-accent: #c132af;
|
||||
--pst-color-accent-bg: #f8dff5;
|
||||
--pst-color-info: #276be9;
|
||||
--pst-color-info-bg: #dce7fc;
|
||||
--pst-color-warning: #f66a0a;
|
||||
--pst-color-warning-bg: #f8e3d0;
|
||||
--pst-color-success: #00843f;
|
||||
--pst-color-success-bg: #d6ece1;
|
||||
--pst-color-attention: var(--pst-color-warning);
|
||||
--pst-color-attention-bg: var(--pst-color-warning-bg);
|
||||
--pst-color-danger: #d72d47;
|
||||
--pst-color-danger-bg: #f9e1e4;
|
||||
--pst-color-text-base: #222832;
|
||||
--pst-color-text-muted: #48566b;
|
||||
--pst-color-heading-color: #ffffff;
|
||||
--pst-color-shadow: rgba(0, 0, 0, 0.1);
|
||||
--pst-color-border: #d1d5da;
|
||||
--pst-color-border-muted: rgba(23, 23, 26, 0.2);
|
||||
--pst-color-inline-code: #912583;
|
||||
--pst-color-inline-code-links: #246161;
|
||||
--pst-color-target: #f3cf95;
|
||||
--pst-color-background: #ffffff;
|
||||
--pst-color-on-background: #F4F9F8;
|
||||
--pst-color-surface: #F4F9F8;
|
||||
--pst-color-on-surface: #222832;
|
||||
color-scheme: light;
|
||||
}
|
||||
html[data-theme=light] {
|
||||
--pst-color-link: var(--pst-color-primary);
|
||||
--pst-color-link-hover: var(--pst-color-secondary);
|
||||
}
|
||||
html[data-theme=light] .only-dark,
|
||||
html[data-theme=light] .only-dark ~ figcaption {
|
||||
display: none !important;
|
||||
}
|
||||
|
||||
table.longtable td {
|
||||
max-width: 600px;
|
||||
html[data-theme=dark] {
|
||||
--pst-color-primary: #4FB2AD;
|
||||
--pst-color-primary-bg: #1C3C3C;
|
||||
--pst-color-secondary: #7F5CF6;
|
||||
--pst-color-secondary-bg: #431D95;
|
||||
--pst-color-accent: #e47fd7;
|
||||
--pst-color-accent-bg: #46123f;
|
||||
--pst-color-info: #79a3f2;
|
||||
--pst-color-info-bg: #06245d;
|
||||
--pst-color-warning: #ff9245;
|
||||
--pst-color-warning-bg: #652a02;
|
||||
--pst-color-success: #5fb488;
|
||||
--pst-color-success-bg: #002f17;
|
||||
--pst-color-attention: var(--pst-color-warning);
|
||||
--pst-color-attention-bg: var(--pst-color-warning-bg);
|
||||
--pst-color-danger: #e78894;
|
||||
--pst-color-danger-bg: #4e111b;
|
||||
--pst-color-text-base: #ced6dd;
|
||||
--pst-color-text-muted: #9ca4af;
|
||||
--pst-color-heading-color: #14181e;
|
||||
--pst-color-shadow: rgba(0, 0, 0, 0.2);
|
||||
--pst-color-border: #48566b;
|
||||
--pst-color-border-muted: #29313d;
|
||||
--pst-color-inline-code: #f3c7ee;
|
||||
--pst-color-inline-code-links: #4FB2AD;
|
||||
--pst-color-target: #675c04;
|
||||
--pst-color-background: #14181e;
|
||||
--pst-color-on-background: #222832;
|
||||
--pst-color-surface: #29313d;
|
||||
--pst-color-on-surface: #f3f4f5;
|
||||
/* Adjust images in dark mode (unless they have class .only-dark or
|
||||
* .dark-light, in which case assume they're already optimized for dark
|
||||
* mode).
|
||||
*/
|
||||
/* Give images a light background in dark mode in case they have
|
||||
* transparency and black text (unless they have class .only-dark or .dark-light, in
|
||||
* which case assume they're already optimized for dark mode).
|
||||
*/
|
||||
color-scheme: dark;
|
||||
}
|
||||
html[data-theme=dark] {
|
||||
--pst-color-link: var(--pst-color-primary);
|
||||
--pst-color-link-hover: var(--pst-color-secondary);
|
||||
}
|
||||
html[data-theme=dark] .only-light,
|
||||
html[data-theme=dark] .only-light ~ figcaption {
|
||||
display: none !important;
|
||||
}
|
||||
html[data-theme=dark] img:not(.only-dark):not(.dark-light) {
|
||||
filter: brightness(0.8) contrast(1.2);
|
||||
}
|
||||
html[data-theme=dark] .bd-content img:not(.only-dark):not(.dark-light) {
|
||||
background: rgb(255, 255, 255);
|
||||
border-radius: 0.25rem;
|
||||
}
|
||||
html[data-theme=dark] .MathJax_SVG * {
|
||||
fill: var(--pst-color-text-base);
|
||||
}
|
||||
|
||||
.pst-color-primary {
|
||||
color: var(--pst-color-primary);
|
||||
}
|
||||
|
||||
.pst-color-secondary {
|
||||
color: var(--pst-color-secondary);
|
||||
}
|
||||
|
||||
.pst-color-accent {
|
||||
color: var(--pst-color-accent);
|
||||
}
|
||||
|
||||
.pst-color-info {
|
||||
color: var(--pst-color-info);
|
||||
}
|
||||
|
||||
.pst-color-warning {
|
||||
color: var(--pst-color-warning);
|
||||
}
|
||||
|
||||
.pst-color-success {
|
||||
color: var(--pst-color-success);
|
||||
}
|
||||
|
||||
.pst-color-attention {
|
||||
color: var(--pst-color-attention);
|
||||
}
|
||||
|
||||
.pst-color-danger {
|
||||
color: var(--pst-color-danger);
|
||||
}
|
||||
|
||||
.pst-color-text-base {
|
||||
color: var(--pst-color-text-base);
|
||||
}
|
||||
|
||||
.pst-color-text-muted {
|
||||
color: var(--pst-color-text-muted);
|
||||
}
|
||||
|
||||
.pst-color-heading-color {
|
||||
color: var(--pst-color-heading-color);
|
||||
}
|
||||
|
||||
.pst-color-shadow {
|
||||
color: var(--pst-color-shadow);
|
||||
}
|
||||
|
||||
.pst-color-border {
|
||||
color: var(--pst-color-border);
|
||||
}
|
||||
|
||||
.pst-color-border-muted {
|
||||
color: var(--pst-color-border-muted);
|
||||
}
|
||||
|
||||
.pst-color-inline-code {
|
||||
color: var(--pst-color-inline-code);
|
||||
}
|
||||
|
||||
.pst-color-inline-code-links {
|
||||
color: var(--pst-color-inline-code-links);
|
||||
}
|
||||
|
||||
.pst-color-target {
|
||||
color: var(--pst-color-target);
|
||||
}
|
||||
|
||||
.pst-color-background {
|
||||
color: var(--pst-color-background);
|
||||
}
|
||||
|
||||
.pst-color-on-background {
|
||||
color: var(--pst-color-on-background);
|
||||
}
|
||||
|
||||
.pst-color-surface {
|
||||
color: var(--pst-color-surface);
|
||||
}
|
||||
|
||||
.pst-color-on-surface {
|
||||
color: var(--pst-color-on-surface);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Adjust the height of the navbar */
|
||||
.bd-header .bd-header__inner{
|
||||
height: 52px; /* Adjust this value as needed */
|
||||
}
|
||||
|
||||
.navbar-nav > li > a {
|
||||
line-height: 52px; /* Vertically center the navbar links */
|
||||
}
|
||||
|
||||
/* Make sure the navbar items align properly */
|
||||
.navbar-nav {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
|
||||
.bd-header .navbar-header-items__start{
|
||||
margin-left: 0rem
|
||||
}
|
||||
|
||||
.bd-header button.primary-toggle {
|
||||
margin-right: 0rem;
|
||||
}
|
||||
|
||||
.bd-header ul.navbar-nav .dropdown .dropdown-menu {
|
||||
overflow-y: auto; /* Enable vertical scrolling */
|
||||
max-height: 80vh
|
||||
}
|
||||
|
||||
.bd-sidebar-primary {
|
||||
width: 22%; /* Adjust this value to your preference */
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.bd-sidebar-secondary {
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
.toc-entry a.nav-link, .toc-entry a>code {
|
||||
background-color: transparent;
|
||||
border-color: transparent;
|
||||
}
|
||||
|
||||
.bd-sidebar-primary code{
|
||||
background-color: transparent;
|
||||
border-color: transparent;
|
||||
}
|
||||
|
||||
|
||||
.toctree-wrapper li[class^=toctree-l1]>a {
|
||||
font-size: 1.3em
|
||||
}
|
||||
|
||||
.toctree-wrapper li[class^=toctree-l1] {
|
||||
margin-bottom: 2em;
|
||||
}
|
||||
|
||||
.toctree-wrapper li[class^=toctree-l]>ul {
|
||||
margin-top: 0.5em;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
*, :after, :before {
|
||||
font-style: normal;
|
||||
}
|
||||
|
||||
div.deprecated {
|
||||
margin-top: 0.5em;
|
||||
margin-bottom: 2em;
|
||||
}
|
||||
|
||||
.admonition-beta.admonition, div.admonition-beta.admonition {
|
||||
border-color: var(--pst-color-warning);
|
||||
margin-top:0.5em;
|
||||
margin-bottom: 2em;
|
||||
}
|
||||
|
||||
.admonition-beta>.admonition-title, div.admonition-beta>.admonition-title {
|
||||
background-color: var(--pst-color-warning-bg);
|
||||
}
|
||||
|
||||
dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) dd {
|
||||
margin-left: 1rem;
|
||||
}
|
||||
|
||||
p {
|
||||
font-size: 0.9rem;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
BIN
docs/api_reference/_static/img/brand/favicon.png
Normal file
BIN
docs/api_reference/_static/img/brand/favicon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 777 B |
11
docs/api_reference/_static/wordmark-api-dark.svg
Normal file
11
docs/api_reference/_static/wordmark-api-dark.svg
Normal file
@@ -0,0 +1,11 @@
|
||||
<svg width="72" height="19" viewBox="0 0 72 19" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<g clip-path="url(#clip0_4019_2020)">
|
||||
<path d="M29.4038 5.84477C30.1256 6.56657 30.1256 7.74117 29.4038 8.46296L27.7869 10.0538L27.7704 9.96259C27.6524 9.30879 27.3415 8.71552 26.8723 8.24627C26.5189 7.8936 26.1012 7.63282 25.6305 7.47143C25.3383 7.76508 25.1777 8.14989 25.1777 8.55487C25.1777 8.63706 25.1851 8.72224 25.2001 8.80742C25.4593 8.90082 25.6887 9.04503 25.8815 9.23781C26.6033 9.9596 26.6033 11.1342 25.8815 11.856L24.4738 13.2637C24.1129 13.6246 23.6392 13.8047 23.1647 13.8047C22.6902 13.8047 22.2165 13.6246 21.8556 13.2637C21.1338 12.5419 21.1338 11.3673 21.8556 10.6455L23.4725 9.05549L23.489 9.14665C23.6063 9.79896 23.9171 10.3922 24.3879 10.8622C24.742 11.2164 25.1343 11.4518 25.6043 11.6124L25.691 11.5257C25.954 11.2627 26.0982 10.913 26.0982 10.5402C26.0982 10.4572 26.0907 10.3743 26.0765 10.2929C25.8053 10.2032 25.5819 10.0754 25.3786 9.87218C25.0857 9.57928 24.9034 9.20493 24.8526 8.79024C24.8489 8.76035 24.8466 8.73121 24.8437 8.70132C24.8033 8.16109 24.9983 7.63357 25.3786 7.25399L26.7864 5.84627C27.1353 5.49733 27.6001 5.30455 28.0955 5.30455C28.5909 5.30455 29.0556 5.49658 29.4046 5.84627L29.4038 5.84477ZM36.7548 9.56583C36.7548 14.7163 32.5645 18.9058 27.4148 18.9058H9.34C4.1903 18.9058 0 14.7163 0 9.56583C0 4.41538 4.1903 0.22583 9.34 0.22583H27.4148C32.5652 0.22583 36.7548 4.41613 36.7548 9.56583ZM18 14.25C18.1472 14.0714 17.4673 13.5686 17.3283 13.384C17.0459 13.0777 17.0444 12.6368 16.8538 12.2789C16.3876 11.1985 15.8518 10.1262 15.1024 9.21166C14.3104 8.21116 13.333 7.38326 12.4745 6.44403C11.8371 5.78873 11.6668 4.85548 11.1041 4.15087C10.3285 3.00541 7.87624 2.69308 7.51683 4.31077C7.51833 4.36158 7.50264 4.39371 7.45855 4.42584C7.2598 4.57005 7.08271 4.73518 6.93402 4.93468C6.57013 5.44129 6.51409 6.30057 6.96839 6.75561C6.98333 6.51576 6.99155 6.28936 7.18134 6.1175C7.53252 6.41862 8.06304 6.52547 8.47026 6.30057C9.36989 7.585 9.14573 9.36184 9.86005 10.7457C10.0573 11.0729 10.2561 11.4069 10.5094 11.6939C10.7148 12.0137 11.4247 12.391 11.4665 12.6869C11.474 13.195 11.4142 13.7502 11.7475 14.1753C11.9044 14.4936 11.5188 14.8134 11.208 14.7738C10.8045 14.8291 10.3121 14.5026 9.95868 14.7036C9.8339 14.8388 9.58957 14.6894 9.48197 14.8769C9.44461 14.9741 9.24286 15.1108 9.36316 15.2042C9.49691 15.1026 9.62095 14.9965 9.80102 15.057C9.77412 15.2035 9.88994 15.2244 9.98184 15.267C9.97886 15.3663 9.92057 15.468 9.99679 15.5524C10.0857 15.4627 10.1388 15.3357 10.28 15.2983C10.7492 15.9238 11.2267 14.6655 12.2421 15.2318C12.0359 15.2214 11.8528 15.2475 11.7139 15.4172C11.6795 15.4553 11.6503 15.5001 11.7109 15.5494C12.2586 15.196 12.2556 15.6705 12.6112 15.5248C12.8847 15.382 13.1567 15.2035 13.4817 15.2543C13.1657 15.3454 13.153 15.5995 12.9677 15.8139C12.9363 15.8468 12.9213 15.8842 12.9579 15.9387C13.614 15.8834 13.6678 15.6652 14.1975 15.3977C14.5928 15.1564 14.9866 15.7414 15.3288 15.4082C15.4043 15.3357 15.5074 15.3604 15.6008 15.3507C15.4812 14.7133 14.1669 15.4672 14.1878 14.6124C14.6107 14.3247 14.5136 13.7741 14.542 13.3295C15.0284 13.5992 15.5694 13.7561 16.0461 14.0139C16.2867 14.4025 16.6641 14.9158 17.1669 14.8822C17.1804 14.8433 17.1923 14.8089 17.2065 14.7693C17.359 14.7955 17.5547 14.8964 17.6384 14.7036C17.8663 14.9419 18.201 14.93 18.4992 14.8687C18.7196 14.6894 18.0845 14.4338 17.9993 14.2493L18 14.25ZM31.3458 7.15387C31.3458 6.28413 31.0081 5.46744 30.3946 4.85399C29.7812 4.24054 28.9645 3.9028 28.094 3.9028C27.2235 3.9028 26.4068 4.24054 25.7933 4.85399L24.3856 6.26171C24.0569 6.59048 23.8073 6.97678 23.6436 7.40941L23.6339 7.43407L23.6085 7.44154C23.0974 7.5992 22.6469 7.86969 22.2696 8.24702L20.8618 9.65475C19.5938 10.9235 19.5938 12.9873 20.8618 14.2553C21.4753 14.8687 22.292 15.2064 23.1617 15.2064C24.0314 15.2064 24.8489 14.8687 25.4623 14.2553L26.8701 12.8475C27.1973 12.5203 27.4454 12.1355 27.609 11.7036L27.6188 11.6789L27.6442 11.6707C28.1463 11.5168 28.6095 11.2373 28.9854 10.8622L30.3931 9.4545C31.0066 8.84105 31.3443 8.02436 31.3443 7.15387H31.3458ZM12.8802 13.1972C12.7592 13.6695 12.7196 14.4742 12.1054 14.4974C12.0546 14.7701 12.2944 14.8724 12.5119 14.785C12.7278 14.6856 12.8302 14.8635 12.9026 15.0406C13.2359 15.0891 13.7291 14.9292 13.7477 14.5347C13.2501 14.2478 13.0962 13.7023 12.8795 13.1972H12.8802Z" fill="#F4F3FF"/>
|
||||
<path d="M43.5142 15.2258L47.1462 3.70583H49.9702L53.6022 15.2258H51.6182L48.3222 4.88983H48.7542L45.4982 15.2258H43.5142ZM45.5382 12.7298V10.9298H51.5862V12.7298H45.5382ZM55.0486 15.2258V3.70583H59.8086C59.9206 3.70583 60.0646 3.71116 60.2406 3.72183C60.4166 3.72716 60.5792 3.74316 60.7286 3.76983C61.3952 3.87116 61.9446 4.0925 62.3766 4.43383C62.8139 4.77516 63.1366 5.20716 63.3446 5.72983C63.5579 6.24716 63.6646 6.82316 63.6646 7.45783C63.6646 8.08716 63.5579 8.66316 63.3446 9.18583C63.1312 9.70316 62.8059 10.1325 62.3686 10.4738C61.9366 10.8152 61.3899 11.0365 60.7286 11.1378C60.5792 11.1592 60.4139 11.1752 60.2326 11.1858C60.0566 11.1965 59.9152 11.2018 59.8086 11.2018H56.9766V15.2258H55.0486ZM56.9766 9.40183H59.7286C59.8352 9.40183 59.9552 9.3965 60.0886 9.38583C60.2219 9.37516 60.3446 9.35383 60.4566 9.32183C60.7766 9.24183 61.0272 9.1005 61.2086 8.89783C61.3952 8.69516 61.5259 8.46583 61.6006 8.20983C61.6806 7.95383 61.7206 7.70316 61.7206 7.45783C61.7206 7.2125 61.6806 6.96183 61.6006 6.70583C61.5259 6.4445 61.3952 6.2125 61.2086 6.00983C61.0272 5.80716 60.7766 5.66583 60.4566 5.58583C60.3446 5.55383 60.2219 5.53516 60.0886 5.52983C59.9552 5.51916 59.8352 5.51383 59.7286 5.51383H56.9766V9.40183ZM65.4273 15.2258V3.70583H67.3553V15.2258H65.4273Z" fill="#F4F3FF"/>
|
||||
</g>
|
||||
<defs>
|
||||
<clipPath id="clip0_4019_2020">
|
||||
<rect width="71.0711" height="18.68" fill="white" transform="translate(0 0.22583)"/>
|
||||
</clipPath>
|
||||
</defs>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 5.7 KiB |
11
docs/api_reference/_static/wordmark-api.svg
Normal file
11
docs/api_reference/_static/wordmark-api.svg
Normal file
@@ -0,0 +1,11 @@
|
||||
<svg width="72" height="20" viewBox="0 0 72 20" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<g clip-path="url(#clip0_4019_689)">
|
||||
<path d="M29.4038 5.97905C30.1256 6.70085 30.1256 7.87545 29.4038 8.59724L27.7869 10.188L27.7704 10.0969C27.6524 9.44307 27.3415 8.84979 26.8723 8.38055C26.5189 8.02787 26.1012 7.7671 25.6305 7.60571C25.3383 7.89936 25.1777 8.28416 25.1777 8.68915C25.1777 8.77134 25.1851 8.85652 25.2001 8.9417C25.4593 9.0351 25.6887 9.17931 25.8815 9.37209C26.6033 10.0939 26.6033 11.2685 25.8815 11.9903L24.4738 13.398C24.1129 13.7589 23.6392 13.939 23.1647 13.939C22.6902 13.939 22.2165 13.7589 21.8556 13.398C21.1338 12.6762 21.1338 11.5016 21.8556 10.7798L23.4725 9.18977L23.489 9.28093C23.6063 9.93323 23.9171 10.5265 24.3879 10.9965C24.742 11.3507 25.1343 11.586 25.6043 11.7467L25.691 11.66C25.954 11.397 26.0982 11.0473 26.0982 10.6745C26.0982 10.5915 26.0907 10.5086 26.0765 10.4271C25.8053 10.3375 25.5819 10.2097 25.3786 10.0065C25.0857 9.71356 24.9034 9.33921 24.8526 8.92451C24.8489 8.89463 24.8466 8.86549 24.8437 8.8356C24.8033 8.29537 24.9983 7.76785 25.3786 7.38827L26.7864 5.98055C27.1353 5.6316 27.6001 5.43883 28.0955 5.43883C28.5909 5.43883 29.0556 5.63086 29.4046 5.98055L29.4038 5.97905ZM36.7548 9.70011C36.7548 14.8506 32.5645 19.0401 27.4148 19.0401H9.34C4.1903 19.0401 0 14.8506 0 9.70011C0 4.54966 4.1903 0.360107 9.34 0.360107H27.4148C32.5652 0.360107 36.7548 4.55041 36.7548 9.70011ZM18 14.3843C18.1472 14.2057 17.4673 13.7029 17.3283 13.5183C17.0459 13.2119 17.0444 12.7711 16.8538 12.4132C16.3876 11.3327 15.8518 10.2605 15.1024 9.34594C14.3104 8.34543 13.333 7.51754 12.4745 6.57831C11.8371 5.92301 11.6668 4.98976 11.1041 4.28515C10.3285 3.13969 7.87624 2.82736 7.51683 4.44505C7.51833 4.49586 7.50264 4.52799 7.45855 4.56012C7.2598 4.70433 7.08271 4.86946 6.93402 5.06896C6.57013 5.57556 6.51409 6.43484 6.96839 6.88989C6.98333 6.65004 6.99155 6.42364 7.18134 6.25178C7.53252 6.5529 8.06304 6.65975 8.47026 6.43484C9.36989 7.71928 9.14573 9.49612 9.86005 10.8799C10.0573 11.2072 10.2561 11.5412 10.5094 11.8281C10.7148 12.1479 11.4247 12.5253 11.4665 12.8212C11.474 13.3293 11.4142 13.8844 11.7475 14.3096C11.9044 14.6279 11.5188 14.9477 11.208 14.9081C10.8045 14.9634 10.3121 14.6369 9.95868 14.8379C9.8339 14.9731 9.58957 14.8237 9.48197 15.0112C9.44461 15.1083 9.24286 15.2451 9.36316 15.3385C9.49691 15.2369 9.62095 15.1308 9.80102 15.1913C9.77412 15.3377 9.88994 15.3587 9.98184 15.4012C9.97886 15.5006 9.92057 15.6022 9.99679 15.6867C10.0857 15.597 10.1388 15.47 10.28 15.4326C10.7492 16.058 11.2267 14.7997 12.2421 15.3661C12.0359 15.3557 11.8528 15.3818 11.7139 15.5514C11.6795 15.5895 11.6503 15.6344 11.7109 15.6837C12.2586 15.3303 12.2556 15.8047 12.6112 15.659C12.8847 15.5163 13.1567 15.3377 13.4817 15.3885C13.1657 15.4797 13.153 15.7337 12.9677 15.9482C12.9363 15.9811 12.9213 16.0184 12.9579 16.073C13.614 16.0177 13.6678 15.7995 14.1975 15.532C14.5928 15.2907 14.9866 15.8757 15.3288 15.5425C15.4043 15.47 15.5074 15.4946 15.6008 15.4849C15.4812 14.8476 14.1669 15.6015 14.1878 14.7467C14.6107 14.459 14.5136 13.9083 14.542 13.4638C15.0284 13.7335 15.5694 13.8904 16.0461 14.1482C16.2867 14.5367 16.6641 15.0501 17.1669 15.0164C17.1804 14.9776 17.1923 14.9432 17.2065 14.9036C17.359 14.9298 17.5547 15.0306 17.6384 14.8379C17.8663 15.0762 18.201 15.0643 18.4992 15.003C18.7196 14.8237 18.0845 14.5681 17.9993 14.3836L18 14.3843ZM31.3458 7.28815C31.3458 6.41841 31.0081 5.60172 30.3946 4.98826C29.7812 4.37481 28.9645 4.03708 28.094 4.03708C27.2235 4.03708 26.4068 4.37481 25.7933 4.98826L24.3856 6.39599C24.0569 6.72476 23.8073 7.11106 23.6436 7.54369L23.6339 7.56835L23.6085 7.57582C23.0974 7.73348 22.6469 8.00396 22.2696 8.3813L20.8618 9.78902C19.5938 11.0578 19.5938 13.1215 20.8618 14.3895C21.4753 15.003 22.292 15.3407 23.1617 15.3407C24.0314 15.3407 24.8489 15.003 25.4623 14.3895L26.8701 12.9818C27.1973 12.6545 27.4454 12.2697 27.609 11.8378L27.6188 11.8132L27.6442 11.805C28.1463 11.651 28.6095 11.3716 28.9854 10.9965L30.3931 9.58878C31.0066 8.97532 31.3443 8.15863 31.3443 7.28815H31.3458ZM12.8802 13.3315C12.7592 13.8037 12.7196 14.6085 12.1054 14.6316C12.0546 14.9044 12.2944 15.0067 12.5119 14.9193C12.7278 14.8199 12.8302 14.9978 12.9026 15.1748C13.2359 15.2234 13.7291 15.0635 13.7477 14.669C13.2501 14.3821 13.0962 13.8366 12.8795 13.3315H12.8802Z" fill="#246161"/>
|
||||
<path d="M43.5142 15.3601L47.1462 3.84011H49.9702L53.6022 15.3601H51.6182L48.3222 5.02411H48.7542L45.4982 15.3601H43.5142ZM45.5382 12.8641V11.0641H51.5862V12.8641H45.5382ZM55.0486 15.3601V3.84011H59.8086C59.9206 3.84011 60.0646 3.84544 60.2406 3.85611C60.4166 3.86144 60.5792 3.87744 60.7286 3.90411C61.3952 4.00544 61.9446 4.22677 62.3766 4.56811C62.8139 4.90944 63.1366 5.34144 63.3446 5.86411C63.5579 6.38144 63.6646 6.95744 63.6646 7.59211C63.6646 8.22144 63.5579 8.79744 63.3446 9.32011C63.1312 9.83744 62.8059 10.2668 62.3686 10.6081C61.9366 10.9494 61.3899 11.1708 60.7286 11.2721C60.5792 11.2934 60.4139 11.3094 60.2326 11.3201C60.0566 11.3308 59.9152 11.3361 59.8086 11.3361H56.9766V15.3601H55.0486ZM56.9766 9.53611H59.7286C59.8352 9.53611 59.9552 9.53077 60.0886 9.52011C60.2219 9.50944 60.3446 9.48811 60.4566 9.45611C60.7766 9.37611 61.0272 9.23477 61.2086 9.03211C61.3952 8.82944 61.5259 8.60011 61.6006 8.34411C61.6806 8.08811 61.7206 7.83744 61.7206 7.59211C61.7206 7.34677 61.6806 7.09611 61.6006 6.84011C61.5259 6.57877 61.3952 6.34677 61.2086 6.14411C61.0272 5.94144 60.7766 5.80011 60.4566 5.72011C60.3446 5.68811 60.2219 5.66944 60.0886 5.66411C59.9552 5.65344 59.8352 5.64811 59.7286 5.64811H56.9766V9.53611ZM65.4273 15.3601V3.84011H67.3553V15.3601H65.4273Z" fill="#246161"/>
|
||||
</g>
|
||||
<defs>
|
||||
<clipPath id="clip0_4019_689">
|
||||
<rect width="71.0711" height="18.68" fill="white" transform="translate(0 0.360107)"/>
|
||||
</clipPath>
|
||||
</defs>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 5.7 KiB |
@@ -15,6 +15,8 @@ from pathlib import Path
|
||||
|
||||
import toml
|
||||
from docutils import nodes
|
||||
from docutils.parsers.rst.directives.admonitions import BaseAdmonition
|
||||
from docutils.statemachine import StringList
|
||||
from sphinx.util.docutils import SphinxDirective
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
@@ -60,26 +62,41 @@ class ExampleLinksDirective(SphinxDirective):
|
||||
item_node.append(para_node)
|
||||
list_node.append(item_node)
|
||||
if list_node.children:
|
||||
title_node = nodes.title()
|
||||
title_node = nodes.rubric()
|
||||
title_node.append(nodes.Text(f"Examples using {class_or_func_name}"))
|
||||
return [title_node, list_node]
|
||||
return [list_node]
|
||||
|
||||
|
||||
class Beta(BaseAdmonition):
|
||||
required_arguments = 0
|
||||
node_class = nodes.admonition
|
||||
|
||||
def run(self):
|
||||
self.content = self.content or StringList(
|
||||
[
|
||||
(
|
||||
"This feature is in beta. It is actively being worked on, so the "
|
||||
"API may change."
|
||||
)
|
||||
]
|
||||
)
|
||||
self.arguments = self.arguments or ["Beta"]
|
||||
return super().run()
|
||||
|
||||
|
||||
def setup(app):
|
||||
app.add_directive("example_links", ExampleLinksDirective)
|
||||
app.add_directive("beta", Beta)
|
||||
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = "🦜🔗 LangChain"
|
||||
copyright = "2023, LangChain, Inc."
|
||||
author = "LangChain, Inc."
|
||||
copyright = "2023, LangChain Inc"
|
||||
author = "LangChain, Inc"
|
||||
|
||||
version = data["tool"]["poetry"]["version"]
|
||||
release = version
|
||||
|
||||
html_title = project + " " + version
|
||||
html_favicon = "_static/img/brand/favicon.png"
|
||||
html_last_updated_fmt = "%b %d, %Y"
|
||||
|
||||
|
||||
@@ -95,11 +112,13 @@ extensions = [
|
||||
"sphinx.ext.napoleon",
|
||||
"sphinx.ext.viewcode",
|
||||
"sphinxcontrib.autodoc_pydantic",
|
||||
"sphinx_copybutton",
|
||||
"sphinx_panels",
|
||||
"IPython.sphinxext.ipython_console_highlighting",
|
||||
"myst_parser",
|
||||
"_extensions.gallery_directive",
|
||||
"sphinx_design",
|
||||
"sphinx_copybutton",
|
||||
]
|
||||
source_suffix = [".rst"]
|
||||
source_suffix = [".rst", ".md"]
|
||||
|
||||
# some autodoc pydantic options are repeated in the actual template.
|
||||
# potentially user error, but there may be bugs in the sphinx extension
|
||||
@@ -131,23 +150,84 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_theme = "scikit-learn-modern"
|
||||
html_theme_path = ["themes"]
|
||||
# The theme to use for HTML and HTML Help pages.
|
||||
html_theme = "pydata_sphinx_theme"
|
||||
|
||||
# redirects dictionary maps from old links to new links
|
||||
html_additional_pages = {}
|
||||
redirects = {
|
||||
"index": "langchain_api_reference",
|
||||
# Theme options are theme-specific and customize the look and feel of a theme
|
||||
# further. For a list of options available for each theme, see the
|
||||
# documentation.
|
||||
html_theme_options = {
|
||||
# # -- General configuration ------------------------------------------------
|
||||
"sidebar_includehidden": True,
|
||||
"use_edit_page_button": False,
|
||||
# # "analytics": {
|
||||
# # "plausible_analytics_domain": "scikit-learn.org",
|
||||
# # "plausible_analytics_url": "https://views.scientific-python.org/js/script.js",
|
||||
# # },
|
||||
# # If "prev-next" is included in article_footer_items, then setting show_prev_next
|
||||
# # to True would repeat prev and next links. See
|
||||
# # https://github.com/pydata/pydata-sphinx-theme/blob/b731dc230bc26a3d1d1bb039c56c977a9b3d25d8/src/pydata_sphinx_theme/theme/pydata_sphinx_theme/layout.html#L118-L129
|
||||
"show_prev_next": False,
|
||||
"search_bar_text": "Search",
|
||||
"navigation_with_keys": True,
|
||||
"collapse_navigation": True,
|
||||
"navigation_depth": 3,
|
||||
"show_nav_level": 1,
|
||||
"show_toc_level": 3,
|
||||
"navbar_align": "left",
|
||||
"header_links_before_dropdown": 5,
|
||||
"header_dropdown_text": "Integrations",
|
||||
"logo": {
|
||||
"image_light": "_static/wordmark-api.svg",
|
||||
"image_dark": "_static/wordmark-api-dark.svg",
|
||||
},
|
||||
"surface_warnings": True,
|
||||
# # -- Template placement in theme layouts ----------------------------------
|
||||
"navbar_start": ["navbar-logo"],
|
||||
# # Note that the alignment of navbar_center is controlled by navbar_align
|
||||
"navbar_center": ["navbar-nav"],
|
||||
"navbar_end": ["langchain_docs", "theme-switcher", "navbar-icon-links"],
|
||||
# # navbar_persistent is persistent right (even when on mobiles)
|
||||
"navbar_persistent": ["search-field"],
|
||||
"article_header_start": ["breadcrumbs"],
|
||||
"article_header_end": [],
|
||||
"article_footer_items": [],
|
||||
"content_footer_items": [],
|
||||
# # Use html_sidebars that map page patterns to list of sidebar templates
|
||||
# "primary_sidebar_end": [],
|
||||
"footer_start": ["copyright"],
|
||||
"footer_center": [],
|
||||
"footer_end": [],
|
||||
# # When specified as a dictionary, the keys should follow glob-style patterns, as in
|
||||
# # https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-exclude_patterns
|
||||
# # In particular, "**" specifies the default for all pages
|
||||
# # Use :html_theme.sidebar_secondary.remove: for file-wide removal
|
||||
# "secondary_sidebar_items": {"**": ["page-toc", "sourcelink"]},
|
||||
# "show_version_warning_banner": True,
|
||||
# "announcement": None,
|
||||
"icon_links": [
|
||||
{
|
||||
# Label for this link
|
||||
"name": "GitHub",
|
||||
# URL where the link will redirect
|
||||
"url": "https://github.com/langchain-ai/langchain", # required
|
||||
# Icon class (if "type": "fontawesome"), or path to local image (if "type": "local")
|
||||
"icon": "fa-brands fa-square-github",
|
||||
# The type of image to be used (see below for details)
|
||||
"type": "fontawesome",
|
||||
},
|
||||
{
|
||||
"name": "X / Twitter",
|
||||
"url": "https://twitter.com/langchainai",
|
||||
"icon": "fab fa-twitter-square",
|
||||
},
|
||||
],
|
||||
"icon_links_label": "Quick Links",
|
||||
"external_links": [
|
||||
{"name": "Legacy reference", "url": "https://api.python.langchain.com/"},
|
||||
],
|
||||
}
|
||||
for old_link in redirects:
|
||||
html_additional_pages[old_link] = "redirects.html"
|
||||
|
||||
partners_dir = Path(__file__).parent.parent.parent / "libs/partners"
|
||||
partners = [
|
||||
(p.name, p.name.replace("-", "_") + "_api_reference")
|
||||
for p in partners_dir.iterdir()
|
||||
]
|
||||
partners = sorted(partners)
|
||||
|
||||
html_context = {
|
||||
"display_github": True, # Integrate GitHub
|
||||
@@ -155,8 +235,6 @@ html_context = {
|
||||
"github_repo": "langchain", # Repo name
|
||||
"github_version": "master", # Version
|
||||
"conf_py_path": "/docs/api_reference", # Path in the checkout to the docs root
|
||||
"redirects": redirects,
|
||||
"partners": partners,
|
||||
}
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
@@ -166,9 +244,7 @@ html_static_path = ["_static"]
|
||||
|
||||
# These paths are either relative to html_static_path
|
||||
# or fully qualified paths (e.g. https://...)
|
||||
html_css_files = [
|
||||
"css/custom.css",
|
||||
]
|
||||
html_css_files = ["css/custom.css"]
|
||||
html_use_index = False
|
||||
|
||||
myst_enable_extensions = ["colon_fence"]
|
||||
@@ -185,3 +261,5 @@ html_baseurl = os.environ.get("READTHEDOCS_CANONICAL_URL", "")
|
||||
# Tell Jinja2 templates the build is running on Read the Docs
|
||||
if os.environ.get("READTHEDOCS", "") == "True":
|
||||
html_context["READTHEDOCS"] = True
|
||||
|
||||
master_doc = "index"
|
||||
|
||||
@@ -38,6 +38,8 @@ class ClassInfo(TypedDict):
|
||||
"""The kind of the class."""
|
||||
is_public: bool
|
||||
"""Whether the class is public or not."""
|
||||
is_deprecated: bool
|
||||
"""Whether the class is deprecated."""
|
||||
|
||||
|
||||
class FunctionInfo(TypedDict):
|
||||
@@ -49,6 +51,8 @@ class FunctionInfo(TypedDict):
|
||||
"""The fully qualified name of the function."""
|
||||
is_public: bool
|
||||
"""Whether the function is public or not."""
|
||||
is_deprecated: bool
|
||||
"""Whether the function is deprecated."""
|
||||
|
||||
|
||||
class ModuleMembers(TypedDict):
|
||||
@@ -121,6 +125,7 @@ def _load_module_members(module_path: str, namespace: str) -> ModuleMembers:
|
||||
qualified_name=f"{namespace}.{name}",
|
||||
kind=kind,
|
||||
is_public=not name.startswith("_"),
|
||||
is_deprecated=".. deprecated::" in (type_.__doc__ or ""),
|
||||
)
|
||||
)
|
||||
elif inspect.isfunction(type_):
|
||||
@@ -129,6 +134,7 @@ def _load_module_members(module_path: str, namespace: str) -> ModuleMembers:
|
||||
name=name,
|
||||
qualified_name=f"{namespace}.{name}",
|
||||
is_public=not name.startswith("_"),
|
||||
is_deprecated=".. deprecated::" in (type_.__doc__ or ""),
|
||||
)
|
||||
)
|
||||
else:
|
||||
@@ -233,7 +239,7 @@ def _construct_doc(
|
||||
package_namespace: str,
|
||||
members_by_namespace: Dict[str, ModuleMembers],
|
||||
package_version: str,
|
||||
) -> str:
|
||||
) -> List[typing.Tuple[str, str]]:
|
||||
"""Construct the contents of the reference.rst file for the given package.
|
||||
|
||||
Args:
|
||||
@@ -245,23 +251,62 @@ def _construct_doc(
|
||||
Returns:
|
||||
The contents of the reference.rst file.
|
||||
"""
|
||||
full_doc = f"""\
|
||||
=======================
|
||||
``{package_namespace}`` {package_version}
|
||||
=======================
|
||||
docs = []
|
||||
index_doc = f"""\
|
||||
:html_theme.sidebar_secondary.remove:
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. _{package_namespace}:
|
||||
|
||||
======================================
|
||||
{package_namespace.replace('_', '-')}: {package_version}
|
||||
======================================
|
||||
|
||||
.. automodule:: {package_namespace}
|
||||
:no-members:
|
||||
:no-inherited-members:
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
:maxdepth: 2
|
||||
|
||||
"""
|
||||
index_autosummary = """
|
||||
"""
|
||||
namespaces = sorted(members_by_namespace)
|
||||
|
||||
for module in namespaces:
|
||||
index_doc += f" {module}\n"
|
||||
module_doc = f"""\
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. _{package_namespace}_{module}:
|
||||
"""
|
||||
_members = members_by_namespace[module]
|
||||
classes = [el for el in _members["classes_"] if el["is_public"]]
|
||||
functions = [el for el in _members["functions"] if el["is_public"]]
|
||||
classes = [
|
||||
el
|
||||
for el in _members["classes_"]
|
||||
if el["is_public"] and not el["is_deprecated"]
|
||||
]
|
||||
functions = [
|
||||
el
|
||||
for el in _members["functions"]
|
||||
if el["is_public"] and not el["is_deprecated"]
|
||||
]
|
||||
deprecated_classes = [
|
||||
el for el in _members["classes_"] if el["is_public"] and el["is_deprecated"]
|
||||
]
|
||||
deprecated_functions = [
|
||||
el
|
||||
for el in _members["functions"]
|
||||
if el["is_public"] and el["is_deprecated"]
|
||||
]
|
||||
if not (classes or functions):
|
||||
continue
|
||||
section = f":mod:`{package_namespace}.{module}`"
|
||||
section = f":mod:`{module}`"
|
||||
underline = "=" * (len(section) + 1)
|
||||
full_doc += f"""\
|
||||
module_doc += f"""
|
||||
{section}
|
||||
{underline}
|
||||
|
||||
@@ -269,16 +314,26 @@ def _construct_doc(
|
||||
:no-members:
|
||||
:no-inherited-members:
|
||||
|
||||
"""
|
||||
|
||||
index_autosummary += f"""
|
||||
:ref:`{package_namespace}_{module}`
|
||||
{'^' * (len(package_namespace) + len(module) + 8)}
|
||||
"""
|
||||
|
||||
if classes:
|
||||
full_doc += f"""\
|
||||
Classes
|
||||
--------------
|
||||
module_doc += f"""\
|
||||
**Classes**
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
"""
|
||||
index_autosummary += """
|
||||
**Classes**
|
||||
|
||||
.. autosummary::
|
||||
"""
|
||||
|
||||
for class_ in sorted(classes, key=lambda c: c["qualified_name"]):
|
||||
@@ -295,19 +350,22 @@ Classes
|
||||
else:
|
||||
template = "class.rst"
|
||||
|
||||
full_doc += f"""\
|
||||
module_doc += f"""\
|
||||
:template: {template}
|
||||
|
||||
{class_["qualified_name"]}
|
||||
|
||||
"""
|
||||
index_autosummary += f"""
|
||||
{class_['qualified_name']}
|
||||
"""
|
||||
|
||||
if functions:
|
||||
_functions = [f["qualified_name"] for f in functions]
|
||||
fstring = "\n ".join(sorted(_functions))
|
||||
full_doc += f"""\
|
||||
Functions
|
||||
--------------
|
||||
module_doc += f"""\
|
||||
**Functions**
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. autosummary::
|
||||
@@ -317,7 +375,80 @@ Functions
|
||||
{fstring}
|
||||
|
||||
"""
|
||||
return full_doc
|
||||
|
||||
index_autosummary += f"""
|
||||
**Functions**
|
||||
|
||||
.. autosummary::
|
||||
|
||||
{fstring}
|
||||
"""
|
||||
if deprecated_classes:
|
||||
module_doc += f"""\
|
||||
**Deprecated classes**
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
"""
|
||||
|
||||
index_autosummary += """
|
||||
**Deprecated classes**
|
||||
|
||||
.. autosummary::
|
||||
"""
|
||||
|
||||
for class_ in sorted(deprecated_classes, key=lambda c: c["qualified_name"]):
|
||||
if class_["kind"] == "TypedDict":
|
||||
template = "typeddict.rst"
|
||||
elif class_["kind"] == "enum":
|
||||
template = "enum.rst"
|
||||
elif class_["kind"] == "Pydantic":
|
||||
template = "pydantic.rst"
|
||||
elif class_["kind"] == "RunnablePydantic":
|
||||
template = "runnable_pydantic.rst"
|
||||
elif class_["kind"] == "RunnableNonPydantic":
|
||||
template = "runnable_non_pydantic.rst"
|
||||
else:
|
||||
template = "class.rst"
|
||||
|
||||
module_doc += f"""\
|
||||
:template: {template}
|
||||
|
||||
{class_["qualified_name"]}
|
||||
|
||||
"""
|
||||
index_autosummary += f"""
|
||||
{class_['qualified_name']}
|
||||
"""
|
||||
|
||||
if deprecated_functions:
|
||||
_functions = [f["qualified_name"] for f in deprecated_functions]
|
||||
fstring = "\n ".join(sorted(_functions))
|
||||
module_doc += f"""\
|
||||
**Deprecated functions**
|
||||
|
||||
.. currentmodule:: {package_namespace}
|
||||
|
||||
.. autosummary::
|
||||
:toctree: {module}
|
||||
:template: function.rst
|
||||
|
||||
{fstring}
|
||||
|
||||
"""
|
||||
index_autosummary += f"""
|
||||
**Deprecated functions**
|
||||
|
||||
.. autosummary::
|
||||
|
||||
{fstring}
|
||||
|
||||
"""
|
||||
docs.append((f"{module}.rst", module_doc))
|
||||
docs.append(("index.rst", index_doc + index_autosummary))
|
||||
return docs
|
||||
|
||||
|
||||
def _build_rst_file(package_name: str = "langchain") -> None:
|
||||
@@ -329,13 +460,14 @@ def _build_rst_file(package_name: str = "langchain") -> None:
|
||||
package_dir = _package_dir(package_name)
|
||||
package_members = _load_package_modules(package_dir)
|
||||
package_version = _get_package_version(package_dir)
|
||||
with open(_out_file_path(package_name), "w") as f:
|
||||
f.write(
|
||||
_doc_first_line(package_name)
|
||||
+ _construct_doc(
|
||||
_package_namespace(package_name), package_members, package_version
|
||||
)
|
||||
)
|
||||
output_dir = _out_file_path(package_name)
|
||||
os.mkdir(output_dir)
|
||||
rsts = _construct_doc(
|
||||
_package_namespace(package_name), package_members, package_version
|
||||
)
|
||||
for name, rst in rsts:
|
||||
with open(output_dir / name, "w") as f:
|
||||
f.write(rst)
|
||||
|
||||
|
||||
def _package_namespace(package_name: str) -> str:
|
||||
@@ -385,12 +517,119 @@ def _get_package_version(package_dir: Path) -> str:
|
||||
|
||||
def _out_file_path(package_name: str) -> Path:
|
||||
"""Return the path to the file containing the documentation."""
|
||||
return HERE / f"{package_name.replace('-', '_')}_api_reference.rst"
|
||||
return HERE / f"{package_name.replace('-', '_')}"
|
||||
|
||||
|
||||
def _doc_first_line(package_name: str) -> str:
|
||||
"""Return the path to the file containing the documentation."""
|
||||
return f".. {package_name.replace('-', '_')}_api_reference:\n\n"
|
||||
def _build_index(dirs: List[str]) -> None:
|
||||
custom_names = {
|
||||
"airbyte": "Airbyte",
|
||||
"aws": "AWS",
|
||||
"ai21": "AI21",
|
||||
}
|
||||
ordered = ["core", "langchain", "text-splitters", "community", "experimental"]
|
||||
main_ = [dir_ for dir_ in ordered if dir_ in dirs]
|
||||
integrations = sorted(dir_ for dir_ in dirs if dir_ not in main_)
|
||||
doc = """# LangChain Python API Reference
|
||||
|
||||
Welcome to the LangChain Python API reference. This is a reference for all
|
||||
`langchain-x` packages.
|
||||
|
||||
For user guides see [https://python.langchain.com](https://python.langchain.com).
|
||||
|
||||
For the legacy API reference hosted on ReadTheDocs see [https://api.python.langchain.com/](https://api.python.langchain.com/).
|
||||
"""
|
||||
|
||||
if main_:
|
||||
main_headers = [
|
||||
" ".join(custom_names.get(x, x.title()) for x in dir_.split("-"))
|
||||
for dir_ in main_
|
||||
]
|
||||
main_tree = "\n".join(
|
||||
f"{header_name}<{dir_.replace('-', '_')}/index>"
|
||||
for header_name, dir_ in zip(main_headers, main_)
|
||||
)
|
||||
main_grid = "\n".join(
|
||||
f'- header: "**{header_name}**"\n content: "{_package_namespace(dir_).replace("_", "-")}: {_get_package_version(_package_dir(dir_))}"\n link: {dir_.replace("-", "_")}/index.html'
|
||||
for header_name, dir_ in zip(main_headers, main_)
|
||||
)
|
||||
doc += f"""## Base packages
|
||||
|
||||
```{{gallery-grid}}
|
||||
:grid-columns: "1 2 2 3"
|
||||
|
||||
{main_grid}
|
||||
```
|
||||
|
||||
```{{toctree}}
|
||||
:maxdepth: 2
|
||||
:hidden:
|
||||
:caption: Base packages
|
||||
|
||||
{main_tree}
|
||||
```
|
||||
"""
|
||||
if integrations:
|
||||
integration_headers = [
|
||||
" ".join(
|
||||
custom_names.get(x, x.title().replace("ai", "AI").replace("db", "DB"))
|
||||
for x in dir_.split("-")
|
||||
)
|
||||
for dir_ in integrations
|
||||
]
|
||||
integration_tree = "\n".join(
|
||||
f"{header_name}<{dir_.replace('-', '_')}/index>"
|
||||
for header_name, dir_ in zip(integration_headers, integrations)
|
||||
)
|
||||
|
||||
integration_grid = ""
|
||||
integrations_to_show = [
|
||||
"openai",
|
||||
"anthropic",
|
||||
"google-vertexai",
|
||||
"aws",
|
||||
"huggingface",
|
||||
"mistralai",
|
||||
]
|
||||
for header_name, dir_ in sorted(
|
||||
zip(integration_headers, integrations),
|
||||
key=lambda h_d: integrations_to_show.index(h_d[1])
|
||||
if h_d[1] in integrations_to_show
|
||||
else len(integrations_to_show),
|
||||
)[: len(integrations_to_show)]:
|
||||
integration_grid += f'\n- header: "**{header_name}**"\n content: {_package_namespace(dir_).replace("_", "-")} {_get_package_version(_package_dir(dir_))}\n link: {dir_.replace("-", "_")}/index.html'
|
||||
doc += f"""## Integrations
|
||||
|
||||
```{{gallery-grid}}
|
||||
:grid-columns: "1 2 2 3"
|
||||
|
||||
{integration_grid}
|
||||
```
|
||||
|
||||
See the full list of integrations in the Section Navigation.
|
||||
|
||||
```{{toctree}}
|
||||
:maxdepth: 2
|
||||
:hidden:
|
||||
:caption: Integrations
|
||||
|
||||
{integration_tree}
|
||||
```
|
||||
"""
|
||||
with open(HERE / "reference.md", "w") as f:
|
||||
f.write(doc)
|
||||
|
||||
dummy_index = """\
|
||||
# API reference
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 3
|
||||
:hidden:
|
||||
|
||||
Reference<reference>
|
||||
```
|
||||
"""
|
||||
with open(HERE / "index.md", "w") as f:
|
||||
f.write(dummy_index)
|
||||
|
||||
|
||||
def main(dirs: Optional[list] = None) -> None:
|
||||
@@ -418,6 +657,8 @@ def main(dirs: Optional[list] = None) -> None:
|
||||
else:
|
||||
print("Building package:", dir_)
|
||||
_build_rst_file(package_name=dir_)
|
||||
|
||||
_build_index(dirs)
|
||||
print("API reference files built.")
|
||||
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,8 +0,0 @@
|
||||
=============
|
||||
LangChain API
|
||||
=============
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
api_reference.rst
|
||||
@@ -1,17 +1,11 @@
|
||||
-e libs/experimental
|
||||
-e libs/langchain
|
||||
-e libs/core
|
||||
-e libs/community
|
||||
pydantic<2
|
||||
autodoc_pydantic==1.8.0
|
||||
myst_parser
|
||||
nbsphinx==0.8.9
|
||||
sphinx>=5
|
||||
sphinx-autobuild==2021.3.14
|
||||
sphinx_rtd_theme==1.0.0
|
||||
sphinx-typlog-theme==0.8.0
|
||||
sphinx-panels
|
||||
toml
|
||||
myst_nb
|
||||
sphinx_copybutton
|
||||
pydata-sphinx-theme==0.13.1
|
||||
autodoc_pydantic>=1,<2
|
||||
sphinx<=7
|
||||
myst-parser>=3
|
||||
sphinx-autobuild>=2024
|
||||
pydata-sphinx-theme>=0.15
|
||||
toml>=0.10.2
|
||||
myst-nb>=1.1.1
|
||||
pyyaml
|
||||
sphinx-design
|
||||
sphinx-copybutton
|
||||
beautifulsoup4
|
||||
41
docs/api_reference/scripts/custom_formatter.py
Normal file
41
docs/api_reference/scripts/custom_formatter.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import sys
|
||||
from glob import glob
|
||||
from pathlib import Path
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
CUR_DIR = Path(__file__).parents[1]
|
||||
|
||||
|
||||
def process_toc_h3_elements(html_content: str) -> str:
|
||||
"""Update Class.method() TOC headers to just method()."""
|
||||
# Create a BeautifulSoup object
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
# Find all <li> elements with class "toc-h3"
|
||||
toc_h3_elements = soup.find_all("li", class_="toc-h3")
|
||||
|
||||
# Process each element
|
||||
for element in toc_h3_elements:
|
||||
element = element.a.code.span
|
||||
# Get the text content of the element
|
||||
content = element.get_text()
|
||||
|
||||
# Apply the regex substitution
|
||||
modified_content = content.split(".")[-1]
|
||||
|
||||
# Update the element's content
|
||||
element.string = modified_content
|
||||
|
||||
# Return the modified HTML
|
||||
return str(soup)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
dir = sys.argv[1]
|
||||
for fn in glob(str(f"{dir.rstrip('/')}/**/*.html"), recursive=True):
|
||||
with open(fn, "r") as f:
|
||||
html = f.read()
|
||||
processed_html = process_toc_h3_elements(html)
|
||||
with open(fn, "w") as f:
|
||||
f.write(processed_html)
|
||||
@@ -1,4 +1,4 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ objname }}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
.. autosummary::
|
||||
{% for item in attributes %}
|
||||
~{{ name }}.{{ item }}
|
||||
~{{ item }}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
@@ -22,11 +22,11 @@
|
||||
|
||||
.. autosummary::
|
||||
{% for item in methods %}
|
||||
~{{ name }}.{{ item }}
|
||||
~{{ item }}
|
||||
{%- endfor %}
|
||||
|
||||
{% for item in methods %}
|
||||
.. automethod:: {{ name }}.{{ item }}
|
||||
.. automethod:: {{ item }}
|
||||
{%- endfor %}
|
||||
|
||||
{% endif %}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ objname }}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ objname }}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
12
docs/api_reference/templates/langchain_docs.html
Normal file
12
docs/api_reference/templates/langchain_docs.html
Normal file
@@ -0,0 +1,12 @@
|
||||
<!-- This will display a link to LangChain docs -->
|
||||
<head>
|
||||
<style>
|
||||
.text-link {
|
||||
text-decoration: none; /* Remove underline */
|
||||
color: inherit; /* Inherit color from parent element */
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<a href="https://python.langchain.com/" class='text-link'>Docs</a>
|
||||
</body>
|
||||
@@ -1,4 +1,4 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ objname }}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ objname }}
|
||||
{{ underline }}==============
|
||||
|
||||
.. NOTE:: {{objname}} implements the standard :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>`. 🏃
|
||||
|
||||
The :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>` has additional methods that are available on runnables, such as :py:meth:`with_types <langchain_core.runnables.base.Runnable.with_types>`, :py:meth:`with_retry <langchain_core.runnables.base.Runnable.with_retry>`, :py:meth:`assign <langchain_core.runnables.base.Runnable.assign>`, :py:meth:`bind <langchain_core.runnables.base.Runnable.bind>`, :py:meth:`get_graph <langchain_core.runnables.base.Runnable.get_graph>`, and more.
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
.. autoclass:: {{ objname }}
|
||||
|
||||
.. NOTE:: {{objname}} implements the standard :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>`. 🏃
|
||||
|
||||
The :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>` has additional methods that are available on runnables, such as :py:meth:`with_types <langchain_core.runnables.base.Runnable.with_types>`, :py:meth:`with_retry <langchain_core.runnables.base.Runnable.with_retry>`, :py:meth:`assign <langchain_core.runnables.base.Runnable.assign>`, :py:meth:`bind <langchain_core.runnables.base.Runnable.bind>`, :py:meth:`get_graph <langchain_core.runnables.base.Runnable.get_graph>`, and more.
|
||||
|
||||
{% block attributes %}
|
||||
{% if attributes %}
|
||||
.. rubric:: {{ _('Attributes') }}
|
||||
|
||||
.. autosummary::
|
||||
{% for item in attributes %}
|
||||
~{{ name }}.{{ item }}
|
||||
~{{ item }}
|
||||
{%- endfor %}
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
@@ -26,11 +26,11 @@
|
||||
|
||||
.. autosummary::
|
||||
{% for item in methods %}
|
||||
~{{ name }}.{{ item }}
|
||||
~{{ item }}
|
||||
{%- endfor %}
|
||||
|
||||
{% for item in methods %}
|
||||
.. automethod:: {{ name }}.{{ item }}
|
||||
.. automethod:: {{ item }}
|
||||
{%- endfor %}
|
||||
|
||||
{% endif %}
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ objname }}
|
||||
{{ underline }}==============
|
||||
|
||||
.. NOTE:: {{objname}} implements the standard :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>`. 🏃
|
||||
|
||||
The :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>` has additional methods that are available on runnables, such as :py:meth:`with_types <langchain_core.runnables.base.Runnable.with_types>`, :py:meth:`with_retry <langchain_core.runnables.base.Runnable.with_retry>`, :py:meth:`assign <langchain_core.runnables.base.Runnable.assign>`, :py:meth:`bind <langchain_core.runnables.base.Runnable.bind>`, :py:meth:`get_graph <langchain_core.runnables.base.Runnable.get_graph>`, and more.
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
.. autopydantic_model:: {{ objname }}
|
||||
@@ -19,6 +15,10 @@
|
||||
:member-order: groupwise
|
||||
:show-inheritance: True
|
||||
:special-members: __call__
|
||||
:exclude-members: construct, copy, dict, from_orm, parse_file, parse_obj, parse_raw, schema, schema_json, update_forward_refs, validate, json, is_lc_serializable, to_json_not_implemented, lc_secrets, lc_attributes, lc_id, get_lc_namespace, astream_log, transform, atransform, get_output_schema, get_prompts, config_schema, map, pick, pipe, with_listeners, with_alisteners, with_config, with_fallbacks, with_types, with_retry, InputType, OutputType, config_specs, output_schema, get_input_schema, get_graph, get_name, input_schema, name, bind, assign
|
||||
:exclude-members: construct, copy, dict, from_orm, parse_file, parse_obj, parse_raw, schema, schema_json, update_forward_refs, validate, json, is_lc_serializable, to_json_not_implemented, lc_secrets, lc_attributes, lc_id, get_lc_namespace, astream_log, transform, atransform, get_output_schema, get_prompts, config_schema, map, pick, pipe, with_listeners, with_alisteners, with_config, with_fallbacks, with_types, with_retry, InputType, OutputType, config_specs, output_schema, get_input_schema, get_graph, get_name, input_schema, name, bind, assign, as_tool
|
||||
|
||||
.. NOTE:: {{objname}} implements the standard :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>`. 🏃
|
||||
|
||||
The :py:class:`Runnable Interface <langchain_core.runnables.base.Runnable>` has additional methods that are available on runnables, such as :py:meth:`with_types <langchain_core.runnables.base.Runnable.with_types>`, :py:meth:`with_retry <langchain_core.runnables.base.Runnable.with_retry>`, :py:meth:`assign <langchain_core.runnables.base.Runnable.assign>`, :py:meth:`bind <langchain_core.runnables.base.Runnable.bind>`, :py:meth:`get_graph <langchain_core.runnables.base.Runnable.get_graph>`, and more.
|
||||
|
||||
.. example_links:: {{ objname }}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
:mod:`{{module}}`.{{objname}}
|
||||
{{ objname }}
|
||||
{{ underline }}==============
|
||||
|
||||
.. currentmodule:: {{ module }}
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
Copyright (c) 2007-2023 The scikit-learn developers.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
@@ -1,67 +0,0 @@
|
||||
<script>
|
||||
$(document).ready(function() {
|
||||
/* Add a [>>>] button on the top-right corner of code samples to hide
|
||||
* the >>> and ... prompts and the output and thus make the code
|
||||
* copyable. */
|
||||
var div = $('.highlight-python .highlight,' +
|
||||
'.highlight-python3 .highlight,' +
|
||||
'.highlight-pycon .highlight,' +
|
||||
'.highlight-default .highlight')
|
||||
var pre = div.find('pre');
|
||||
|
||||
// get the styles from the current theme
|
||||
pre.parent().parent().css('position', 'relative');
|
||||
var hide_text = 'Hide prompts and outputs';
|
||||
var show_text = 'Show prompts and outputs';
|
||||
|
||||
// create and add the button to all the code blocks that contain >>>
|
||||
div.each(function(index) {
|
||||
var jthis = $(this);
|
||||
if (jthis.find('.gp').length > 0) {
|
||||
var button = $('<span class="copybutton">>>></span>');
|
||||
button.attr('title', hide_text);
|
||||
button.data('hidden', 'false');
|
||||
jthis.prepend(button);
|
||||
}
|
||||
// tracebacks (.gt) contain bare text elements that need to be
|
||||
// wrapped in a span to work with .nextUntil() (see later)
|
||||
jthis.find('pre:has(.gt)').contents().filter(function() {
|
||||
return ((this.nodeType == 3) && (this.data.trim().length > 0));
|
||||
}).wrap('<span>');
|
||||
});
|
||||
|
||||
// define the behavior of the button when it's clicked
|
||||
$('.copybutton').click(function(e){
|
||||
e.preventDefault();
|
||||
var button = $(this);
|
||||
if (button.data('hidden') === 'false') {
|
||||
// hide the code output
|
||||
button.parent().find('.go, .gp, .gt').hide();
|
||||
button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'hidden');
|
||||
button.css('text-decoration', 'line-through');
|
||||
button.attr('title', show_text);
|
||||
button.data('hidden', 'true');
|
||||
} else {
|
||||
// show the code output
|
||||
button.parent().find('.go, .gp, .gt').show();
|
||||
button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'visible');
|
||||
button.css('text-decoration', 'none');
|
||||
button.attr('title', hide_text);
|
||||
button.data('hidden', 'false');
|
||||
}
|
||||
});
|
||||
|
||||
/*** Add permalink buttons next to glossary terms ***/
|
||||
$('dl.glossary > dt[id]').append(function() {
|
||||
return ('<a class="headerlink" href="#' +
|
||||
this.getAttribute('id') +
|
||||
'" title="Permalink to this term">¶</a>');
|
||||
});
|
||||
});
|
||||
|
||||
</script>
|
||||
{%- if pagename != 'index' and pagename != 'documentation' %}
|
||||
{% if theme_mathjax_path %}
|
||||
<script id="MathJax-script" async src="{{ theme_mathjax_path }}"></script>
|
||||
{% endif %}
|
||||
{%- endif %}
|
||||
@@ -1,132 +0,0 @@
|
||||
{# TEMPLATE VAR SETTINGS #}
|
||||
{%- set url_root = pathto('', 1) %}
|
||||
{%- if url_root == '#' %}{% set url_root = '' %}{% endif %}
|
||||
{%- if not embedded and docstitle %}
|
||||
{%- set titlesuffix = " — "|safe + docstitle|e %}
|
||||
{%- else %}
|
||||
{%- set titlesuffix = "" %}
|
||||
{%- endif %}
|
||||
{%- set lang_attr = 'en' %}
|
||||
|
||||
<!DOCTYPE html>
|
||||
<!--[if IE 8]><html class="no-js lt-ie9" lang="{{ lang_attr }}" > <![endif]-->
|
||||
<!--[if gt IE 8]><!-->
|
||||
<html class="no-js" lang="{{ lang_attr }}"> <!--<![endif]-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
{{ metatags }}
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
|
||||
{% block htmltitle %}
|
||||
<title>{{ title|striptags|e }}{{ titlesuffix }}</title>
|
||||
{% endblock %}
|
||||
<link rel="canonical"
|
||||
href="https://api.python.langchain.com/en/latest/{{ pagename }}.html"/>
|
||||
|
||||
{% if favicon_url %}
|
||||
<link rel="shortcut icon" href="{{ favicon_url|e }}"/>
|
||||
{% endif %}
|
||||
|
||||
<link rel="stylesheet"
|
||||
href="{{ pathto('_static/css/vendor/bootstrap.min.css', 1) }}"
|
||||
type="text/css"/>
|
||||
{%- for css in css_files %}
|
||||
{%- if css|attr("rel") %}
|
||||
<link rel="{{ css.rel }}" href="{{ pathto(css.filename, 1) }}"
|
||||
type="text/css"{% if css.title is not none %}
|
||||
title="{{ css.title }}"{% endif %} />
|
||||
{%- else %}
|
||||
<link rel="stylesheet" href="{{ pathto(css, 1) }}" type="text/css"/>
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
<link rel="stylesheet" href="{{ pathto('_static/' + style, 1) }}" type="text/css"/>
|
||||
<script id="documentation_options" data-url_root="{{ pathto('', 1) }}"
|
||||
src="{{ pathto('_static/documentation_options.js', 1) }}"></script>
|
||||
<script src="{{ pathto('_static/jquery.js', 1) }}"></script>
|
||||
{%- block extrahead %} {% endblock %}
|
||||
</head>
|
||||
<body>
|
||||
{% include "nav.html" %}
|
||||
{%- block content %}
|
||||
<div class="d-flex" id="sk-doc-wrapper">
|
||||
<input type="checkbox" name="sk-toggle-checkbox" id="sk-toggle-checkbox">
|
||||
<label id="sk-sidemenu-toggle" class="sk-btn-toggle-toc btn sk-btn-primary"
|
||||
for="sk-toggle-checkbox">Toggle Menu</label>
|
||||
<div id="sk-sidebar-wrapper" class="border-right">
|
||||
<div class="sk-sidebar-toc-wrapper">
|
||||
{%- if meta and meta['parenttoc']|tobool %}
|
||||
<div class="sk-sidebar-toc">
|
||||
{% set nav = get_nav_object(maxdepth=3, collapse=True, numbered=True) %}
|
||||
<ul>
|
||||
{% for main_nav_item in nav %}
|
||||
{% if main_nav_item.active %}
|
||||
<li>
|
||||
<a href="{{ main_nav_item.url }}"
|
||||
class="sk-toc-active">{{ main_nav_item.title }}</a>
|
||||
</li>
|
||||
<ul>
|
||||
{% for nav_item in main_nav_item.children %}
|
||||
<li>
|
||||
<a href="{{ nav_item.url }}"
|
||||
class="{% if nav_item.active %}sk-toc-active{% endif %}">{{ nav_item.title }}</a>
|
||||
{% if nav_item.children %}
|
||||
<ul>
|
||||
{% for inner_child in nav_item.children %}
|
||||
<li class="sk-toctree-l3">
|
||||
<a href="{{ inner_child.url }}">{{ inner_child.title }}</a>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
{%- elif meta and meta['globalsidebartoc']|tobool %}
|
||||
<div class="sk-sidebar-toc sk-sidebar-global-toc">
|
||||
{{ toctree(maxdepth=2, titles_only=True) }}
|
||||
</div>
|
||||
{%- else %}
|
||||
<div class="sk-sidebar-toc">
|
||||
{{ toc }}
|
||||
</div>
|
||||
{%- endif %}
|
||||
</div>
|
||||
</div>
|
||||
<div id="sk-page-content-wrapper">
|
||||
<div class="sk-page-content container-fluid body px-md-3" role="main">
|
||||
{% block body %}{% endblock %}
|
||||
</div>
|
||||
<div class="container">
|
||||
<footer class="sk-content-footer">
|
||||
{%- if pagename != 'index' %}
|
||||
{%- if show_copyright %}
|
||||
{%- if hasdoc('copyright') %}
|
||||
{% trans path=pathto('copyright'), copyright=copyright|e %}
|
||||
© {{ copyright }}.{% endtrans %}
|
||||
{%- else %}
|
||||
{% trans copyright=copyright|e %}© {{ copyright }}
|
||||
.{% endtrans %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if last_updated %}
|
||||
{% trans last_updated=last_updated|e %}Last updated
|
||||
on {{ last_updated }}.{% endtrans %}
|
||||
{%- endif %}
|
||||
{%- if show_source and has_source and sourcename %}
|
||||
<a href="{{ pathto('_sources/' + sourcename, true)|e }}"
|
||||
rel="nofollow">{{ _('Show this page source') }}</a>
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{%- endblock %}
|
||||
<script src="{{ pathto('_static/js/vendor/bootstrap.min.js', 1) }}"></script>
|
||||
{% include "javascript.html" %}
|
||||
</body>
|
||||
</html>
|
||||
@@ -1,78 +0,0 @@
|
||||
{%- if pagename != 'index' and pagename != 'documentation' %}
|
||||
{%- set nav_bar_class = "sk-docs-navbar" %}
|
||||
{%- set top_container_cls = "sk-docs-container" %}
|
||||
{%- else %}
|
||||
{%- set nav_bar_class = "sk-landing-navbar" %}
|
||||
{%- set top_container_cls = "sk-landing-container" %}
|
||||
{%- endif %}
|
||||
|
||||
<nav id="navbar" class="{{ nav_bar_class }} navbar navbar-expand-md navbar-light bg-light py-0">
|
||||
<div class="container-fluid {{ top_container_cls }} px-0">
|
||||
{%- if logo_url %}
|
||||
<a class="navbar-brand py-0" href="{{ pathto('index') }}">
|
||||
<img
|
||||
class="sk-brand-img"
|
||||
src="{{ logo_url|e }}"
|
||||
alt="logo"/>
|
||||
</a>
|
||||
{%- endif %}
|
||||
<button
|
||||
id="sk-navbar-toggler"
|
||||
class="navbar-toggler"
|
||||
type="button"
|
||||
data-toggle="collapse"
|
||||
data-target="#navbarSupportedContent"
|
||||
aria-controls="navbarSupportedContent"
|
||||
aria-expanded="false"
|
||||
aria-label="Toggle navigation"
|
||||
>
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
</button>
|
||||
|
||||
<div class="sk-navbar-collapse collapse navbar-collapse" id="navbarSupportedContent">
|
||||
<ul class="navbar-nav mr-auto">
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('langchain_api_reference') }}">LangChain</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('core_api_reference') }}">Core</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('community_api_reference') }}">Community</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('experimental_api_reference') }}">Experimental</a>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" href="{{ pathto('text_splitters_api_reference') }}">Text splitters</a>
|
||||
</li>
|
||||
{%- for title, pathname in partners %}
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link nav-more-item-mobile-items" href="{{ pathto(pathname) }}">{{ title }}</a>
|
||||
</li>
|
||||
{%- endfor %}
|
||||
<li class="nav-item dropdown nav-more-item-dropdown">
|
||||
<a class="sk-nav-link nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Partner libs</a>
|
||||
<div class="dropdown-menu" aria-labelledby="navbarDropdown">
|
||||
{%- for title, pathname in partners %}
|
||||
<a class="sk-nav-dropdown-item dropdown-item" href="{{ pathto(pathname) }}">{{ title }}</a>
|
||||
{%- endfor %}
|
||||
</div>
|
||||
</li>
|
||||
<li class="nav-item">
|
||||
<a class="sk-nav-link nav-link" target="_blank" rel="noopener noreferrer" href="https://python.langchain.com/">Docs</a>
|
||||
</li>
|
||||
</ul>
|
||||
{%- if pagename != "search"%}
|
||||
<div id="searchbox" role="search">
|
||||
<div class="searchformwrapper">
|
||||
<form class="search" action="{{ pathto('search') }}" method="get">
|
||||
<input class="sk-search-text-input" type="text" name="q" aria-labelledby="searchlabel" />
|
||||
<input class="sk-search-text-btn" type="submit" value="{{ _('Go') }}" />
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
{%- endif %}
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
@@ -1,16 +0,0 @@
|
||||
{%- extends "basic/search.html" %}
|
||||
{% block extrahead %}
|
||||
<script type="text/javascript" src="{{ pathto('_static/underscore.js', 1) }}"></script>
|
||||
<script type="text/javascript" src="{{ pathto('searchindex.js', 1) }}" defer></script>
|
||||
<script type="text/javascript" src="{{ pathto('_static/doctools.js', 1) }}"></script>
|
||||
<script type="text/javascript" src="{{ pathto('_static/language_data.js', 1) }}"></script>
|
||||
<script type="text/javascript" src="{{ pathto('_static/searchtools.js', 1) }}"></script>
|
||||
<script type="text/javascript" src="{{ pathto('_static/sphinx_highlight.js', 1) }}"></script>
|
||||
<script type="text/javascript">
|
||||
$(document).ready(function() {
|
||||
if (!Search.out) {
|
||||
Search.init();
|
||||
}
|
||||
});
|
||||
</script>
|
||||
{% endblock %}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,8 +0,0 @@
|
||||
[theme]
|
||||
inherit = basic
|
||||
pygments_style = default
|
||||
stylesheet = css/theme.css
|
||||
|
||||
[options]
|
||||
link_to_live_contributing_page = false
|
||||
mathjax_path =
|
||||
@@ -4,8 +4,11 @@ LangChain implements the latest research in the field of Natural Language Proces
|
||||
This page contains `arXiv` papers referenced in the LangChain Documentation, API Reference,
|
||||
Templates, and Cookbooks.
|
||||
|
||||
From the opposite direction, scientists use LangChain in research and reference LangChain in the research papers.
|
||||
Here you find [such papers](https://arxiv.org/search/?query=langchain&searchtype=all&source=header).
|
||||
From the opposite direction, scientists use `LangChain` in research and reference it in the research papers.
|
||||
Here you find papers that reference:
|
||||
- [LangChain](https://arxiv.org/search/?query=langchain&searchtype=all&source=header)
|
||||
- [LangGraph](https://arxiv.org/search/?query=langgraph&searchtype=all&source=header)
|
||||
- [LangSmith](https://arxiv.org/search/?query=langsmith&searchtype=all&source=header)
|
||||
|
||||
## Summary
|
||||
|
||||
@@ -23,32 +26,30 @@ Here you find [such papers](https://arxiv.org/search/?query=langchain&searchtype
|
||||
| `2305.14283v3` [Query Rewriting for Retrieval-Augmented Large Language Models](http://arxiv.org/abs/2305.14283v3) | Xinbei Ma, Yeyun Gong, Pengcheng He, et al. | 2023-05-23 | `Template:` [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read), `Cookbook:` [rewrite](https://github.com/langchain-ai/langchain/blob/master/cookbook/rewrite.ipynb)
|
||||
| `2305.08291v1` [Large Language Model Guided Tree-of-Thought](http://arxiv.org/abs/2305.08291v1) | Jieyi Long | 2023-05-15 | `API:` [langchain_experimental.tot](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.tot), `Cookbook:` [tree_of_thought](https://github.com/langchain-ai/langchain/blob/master/cookbook/tree_of_thought.ipynb)
|
||||
| `2305.04091v3` [Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models](http://arxiv.org/abs/2305.04091v3) | Lei Wang, Wanyu Xu, Yihuai Lan, et al. | 2023-05-06 | `Cookbook:` [plan_and_execute_agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
|
||||
| `2305.02156v1` [Zero-Shot Listwise Document Reranking with a Large Language Model](http://arxiv.org/abs/2305.02156v1) | Xueguang Ma, Xinyu Zhang, Ronak Pradeep, et al. | 2023-05-03 | `API:` [langchain...LLMListwiseRerank](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
|
||||
| `2304.08485v2` [Visual Instruction Tuning](http://arxiv.org/abs/2304.08485v2) | Haotian Liu, Chunyuan Li, Qingyang Wu, et al. | 2023-04-17 | `Cookbook:` [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb), [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb)
|
||||
| `2304.03442v2` [Generative Agents: Interactive Simulacra of Human Behavior](http://arxiv.org/abs/2304.03442v2) | Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai, et al. | 2023-04-07 | `Cookbook:` [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb), [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb)
|
||||
| `2303.17760v2` [CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society](http://arxiv.org/abs/2303.17760v2) | Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani, et al. | 2023-03-31 | `Cookbook:` [camel_role_playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
|
||||
| `2303.17580v4` [HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face](http://arxiv.org/abs/2303.17580v4) | Yongliang Shen, Kaitao Song, Xu Tan, et al. | 2023-03-30 | `API:` [langchain_experimental.autonomous_agents](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.autonomous_agents), `Cookbook:` [hugginggpt](https://github.com/langchain-ai/langchain/blob/master/cookbook/hugginggpt.ipynb)
|
||||
| `2303.08774v6` [GPT-4 Technical Report](http://arxiv.org/abs/2303.08774v6) | OpenAI, Josh Achiam, Steven Adler, et al. | 2023-03-15 | `Docs:` [docs/integrations/vectorstores/mongodb_atlas](https://python.langchain.com/docs/integrations/vectorstores/mongodb_atlas)
|
||||
| `2301.10226v4` [A Watermark for Large Language Models](http://arxiv.org/abs/2301.10226v4) | John Kirchenbauer, Jonas Geiping, Yuxin Wen, et al. | 2023-01-24 | `API:` [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
| `2301.10226v4` [A Watermark for Large Language Models](http://arxiv.org/abs/2301.10226v4) | John Kirchenbauer, Jonas Geiping, Yuxin Wen, et al. | 2023-01-24 | `API:` [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
| `2212.10496v1` [Precise Zero-Shot Dense Retrieval without Relevance Labels](http://arxiv.org/abs/2212.10496v1) | Luyu Gao, Xueguang Ma, Jimmy Lin, et al. | 2022-12-20 | `API:` [langchain...HypotheticalDocumentEmbedder](https://api.python.langchain.com/en/latest/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder), `Template:` [hyde](https://python.langchain.com/docs/templates/hyde), `Cookbook:` [hypothetical_document_embeddings](https://github.com/langchain-ai/langchain/blob/master/cookbook/hypothetical_document_embeddings.ipynb)
|
||||
| `2212.07425v3` [Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments](http://arxiv.org/abs/2212.07425v3) | Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande, et al. | 2022-12-12 | `API:` [langchain_experimental.fallacy_removal](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)
|
||||
| `2211.13892v2` [Complementary Explanations for Effective In-Context Learning](http://arxiv.org/abs/2211.13892v2) | Xi Ye, Srinivasan Iyer, Asli Celikyilmaz, et al. | 2022-11-25 | `API:` [langchain_core...MaxMarginalRelevanceExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
|
||||
| `2211.10435v2` [PAL: Program-aided Language Models](http://arxiv.org/abs/2211.10435v2) | Luyu Gao, Aman Madaan, Shuyan Zhou, et al. | 2022-11-18 | `API:` [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), `Cookbook:` [program_aided_language_model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
|
||||
| `2210.03629v3` [ReAct: Synergizing Reasoning and Acting in Language Models](http://arxiv.org/abs/2210.03629v3) | Shunyu Yao, Jeffrey Zhao, Dian Yu, et al. | 2022-10-06 | `Docs:` [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/chat/huggingface](https://python.langchain.com/docs/integrations/chat/huggingface), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping), `API:` [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent), [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain)
|
||||
| `2211.10435v2` [PAL: Program-aided Language Models](http://arxiv.org/abs/2211.10435v2) | Luyu Gao, Aman Madaan, Shuyan Zhou, et al. | 2022-11-18 | `API:` [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), `Cookbook:` [program_aided_language_model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
|
||||
| `2210.03629v3` [ReAct: Synergizing Reasoning and Acting in Language Models](http://arxiv.org/abs/2210.03629v3) | Shunyu Yao, Jeffrey Zhao, Dian Yu, et al. | 2022-10-06 | `Docs:` [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping), `API:` [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain), [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent)
|
||||
| `2209.10785v2` [Deep Lake: a Lakehouse for Deep Learning](http://arxiv.org/abs/2209.10785v2) | Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan, et al. | 2022-09-22 | `Docs:` [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/docs/integrations/providers/activeloop_deeplake)
|
||||
| `2205.13147v4` [Matryoshka Representation Learning](http://arxiv.org/abs/2205.13147v4) | Aditya Kusupati, Gantavya Bhatt, Aniket Rege, et al. | 2022-05-26 | `Docs:` [docs/integrations/providers/snowflake](https://python.langchain.com/docs/integrations/providers/snowflake)
|
||||
| `2205.12654v1` [Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages](http://arxiv.org/abs/2205.12654v1) | Kevin Heffernan, Onur Çelebi, Holger Schwenk | 2022-05-25 | `API:` [langchain_community...LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
|
||||
| `2204.00498v1` [Evaluating the Text-to-SQL Capabilities of Large Language Models](http://arxiv.org/abs/2204.00498v1) | Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau | 2022-03-15 | `API:` [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL), [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase)
|
||||
| `2202.00666v5` [Locally Typical Sampling](http://arxiv.org/abs/2202.00666v5) | Clara Meister, Tiago Pimentel, Gian Wiher, et al. | 2022-02-01 | `API:` [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
| `2204.00498v1` [Evaluating the Text-to-SQL Capabilities of Large Language Models](http://arxiv.org/abs/2204.00498v1) | Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau | 2022-03-15 | `API:` [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
|
||||
| `2202.00666v5` [Locally Typical Sampling](http://arxiv.org/abs/2202.00666v5) | Clara Meister, Tiago Pimentel, Gian Wiher, et al. | 2022-02-01 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
| `2103.00020v1` [Learning Transferable Visual Models From Natural Language Supervision](http://arxiv.org/abs/2103.00020v1) | Alec Radford, Jong Wook Kim, Chris Hallacy, et al. | 2021-02-26 | `API:` [langchain_experimental.open_clip](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.open_clip)
|
||||
| `1909.05858v2` [CTRL: A Conditional Transformer Language Model for Controllable Generation](http://arxiv.org/abs/1909.05858v2) | Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney, et al. | 2019-09-11 | `API:` [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
| `1908.10084v1` [Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks](http://arxiv.org/abs/1908.10084v1) | Nils Reimers, Iryna Gurevych | 2019-08-27 | `Docs:` [docs/integrations/text_embedding/sentence_transformers](https://python.langchain.com/docs/integrations/text_embedding/sentence_transformers)
|
||||
| `1909.05858v2` [CTRL: A Conditional Transformer Language Model for Controllable Generation](http://arxiv.org/abs/1909.05858v2) | Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney, et al. | 2019-09-11 | `API:` [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
|
||||
## Self-Discover: Large Language Models Self-Compose Reasoning Structures
|
||||
|
||||
- **arXiv id:** 2402.03620v1
|
||||
- **arXiv id:** [2402.03620v1](http://arxiv.org/abs/2402.03620v1) **Published Date:** 2024-02-06
|
||||
- **Title:** Self-Discover: Large Language Models Self-Compose Reasoning Structures
|
||||
- **Authors:** Pei Zhou, Jay Pujara, Xiang Ren, et al.
|
||||
- **Published Date:** 2024-02-06
|
||||
- **URL:** http://arxiv.org/abs/2402.03620v1
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [self-discover](https://github.com/langchain-ai/langchain/blob/master/cookbook/self-discover.ipynb)
|
||||
@@ -70,11 +71,9 @@ commonalities with human reasoning patterns.
|
||||
|
||||
## RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval
|
||||
|
||||
- **arXiv id:** 2401.18059v1
|
||||
- **arXiv id:** [2401.18059v1](http://arxiv.org/abs/2401.18059v1) **Published Date:** 2024-01-31
|
||||
- **Title:** RAPTOR: Recursive Abstractive Processing for Tree-Organized Retrieval
|
||||
- **Authors:** Parth Sarthi, Salman Abdullah, Aditi Tuli, et al.
|
||||
- **Published Date:** 2024-01-31
|
||||
- **URL:** http://arxiv.org/abs/2401.18059v1
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [RAPTOR](https://github.com/langchain-ai/langchain/blob/master/cookbook/RAPTOR.ipynb)
|
||||
@@ -96,11 +95,9 @@ benchmark by 20% in absolute accuracy.
|
||||
|
||||
## Corrective Retrieval Augmented Generation
|
||||
|
||||
- **arXiv id:** 2401.15884v2
|
||||
- **arXiv id:** [2401.15884v2](http://arxiv.org/abs/2401.15884v2) **Published Date:** 2024-01-29
|
||||
- **Title:** Corrective Retrieval Augmented Generation
|
||||
- **Authors:** Shi-Qi Yan, Jia-Chen Gu, Yun Zhu, et al.
|
||||
- **Published Date:** 2024-01-29
|
||||
- **URL:** http://arxiv.org/abs/2401.15884v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [langgraph_crag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_crag.ipynb)
|
||||
@@ -126,11 +123,9 @@ performance of RAG-based approaches.
|
||||
|
||||
## Mixtral of Experts
|
||||
|
||||
- **arXiv id:** 2401.04088v1
|
||||
- **arXiv id:** [2401.04088v1](http://arxiv.org/abs/2401.04088v1) **Published Date:** 2024-01-08
|
||||
- **Title:** Mixtral of Experts
|
||||
- **Authors:** Albert Q. Jiang, Alexandre Sablayrolles, Antoine Roux, et al.
|
||||
- **Published Date:** 2024-01-08
|
||||
- **URL:** http://arxiv.org/abs/2401.04088v1
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [together_ai](https://github.com/langchain-ai/langchain/blob/master/cookbook/together_ai.ipynb)
|
||||
@@ -152,11 +147,9 @@ the base and instruct models are released under the Apache 2.0 license.
|
||||
|
||||
## Dense X Retrieval: What Retrieval Granularity Should We Use?
|
||||
|
||||
- **arXiv id:** 2312.06648v2
|
||||
- **arXiv id:** [2312.06648v2](http://arxiv.org/abs/2312.06648v2) **Published Date:** 2023-12-11
|
||||
- **Title:** Dense X Retrieval: What Retrieval Granularity Should We Use?
|
||||
- **Authors:** Tong Chen, Hongwei Wang, Sihao Chen, et al.
|
||||
- **Published Date:** 2023-12-11
|
||||
- **URL:** http://arxiv.org/abs/2312.06648v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Template:** [propositional-retrieval](https://python.langchain.com/docs/templates/propositional-retrieval)
|
||||
@@ -181,11 +174,9 @@ information.
|
||||
|
||||
## Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models
|
||||
|
||||
- **arXiv id:** 2311.09210v1
|
||||
- **arXiv id:** [2311.09210v1](http://arxiv.org/abs/2311.09210v1) **Published Date:** 2023-11-15
|
||||
- **Title:** Chain-of-Note: Enhancing Robustness in Retrieval-Augmented Language Models
|
||||
- **Authors:** Wenhao Yu, Hongming Zhang, Xiaoman Pan, et al.
|
||||
- **Published Date:** 2023-11-15
|
||||
- **URL:** http://arxiv.org/abs/2311.09210v1
|
||||
- **LangChain:**
|
||||
|
||||
- **Template:** [chain-of-note-wiki](https://python.langchain.com/docs/templates/chain-of-note-wiki)
|
||||
@@ -215,11 +206,9 @@ outside the pre-training knowledge scope.
|
||||
|
||||
## Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection
|
||||
|
||||
- **arXiv id:** 2310.11511v1
|
||||
- **arXiv id:** [2310.11511v1](http://arxiv.org/abs/2310.11511v1) **Published Date:** 2023-10-17
|
||||
- **Title:** Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection
|
||||
- **Authors:** Akari Asai, Zeqiu Wu, Yizhong Wang, et al.
|
||||
- **Published Date:** 2023-10-17
|
||||
- **URL:** http://arxiv.org/abs/2310.11511v1
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [langgraph_self_rag](https://github.com/langchain-ai/langchain/blob/master/cookbook/langgraph_self_rag.ipynb)
|
||||
@@ -248,11 +237,9 @@ to these models.
|
||||
|
||||
## Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models
|
||||
|
||||
- **arXiv id:** 2310.06117v2
|
||||
- **arXiv id:** [2310.06117v2](http://arxiv.org/abs/2310.06117v2) **Published Date:** 2023-10-09
|
||||
- **Title:** Take a Step Back: Evoking Reasoning via Abstraction in Large Language Models
|
||||
- **Authors:** Huaixiu Steven Zheng, Swaroop Mishra, Xinyun Chen, et al.
|
||||
- **Published Date:** 2023-10-09
|
||||
- **URL:** http://arxiv.org/abs/2310.06117v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Template:** [stepback-qa-prompting](https://python.langchain.com/docs/templates/stepback-qa-prompting)
|
||||
@@ -271,11 +258,9 @@ and 11% respectively, TimeQA by 27%, and MuSiQue by 7%.
|
||||
|
||||
## Llama 2: Open Foundation and Fine-Tuned Chat Models
|
||||
|
||||
- **arXiv id:** 2307.09288v2
|
||||
- **arXiv id:** [2307.09288v2](http://arxiv.org/abs/2307.09288v2) **Published Date:** 2023-07-18
|
||||
- **Title:** Llama 2: Open Foundation and Fine-Tuned Chat Models
|
||||
- **Authors:** Hugo Touvron, Louis Martin, Kevin Stone, et al.
|
||||
- **Published Date:** 2023-07-18
|
||||
- **URL:** http://arxiv.org/abs/2307.09288v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [Semi_Structured_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_Structured_RAG.ipynb)
|
||||
@@ -292,11 +277,9 @@ contribute to the responsible development of LLMs.
|
||||
|
||||
## Query Rewriting for Retrieval-Augmented Large Language Models
|
||||
|
||||
- **arXiv id:** 2305.14283v3
|
||||
- **arXiv id:** [2305.14283v3](http://arxiv.org/abs/2305.14283v3) **Published Date:** 2023-05-23
|
||||
- **Title:** Query Rewriting for Retrieval-Augmented Large Language Models
|
||||
- **Authors:** Xinbei Ma, Yeyun Gong, Pengcheng He, et al.
|
||||
- **Published Date:** 2023-05-23
|
||||
- **URL:** http://arxiv.org/abs/2305.14283v3
|
||||
- **LangChain:**
|
||||
|
||||
- **Template:** [rewrite-retrieve-read](https://python.langchain.com/docs/templates/rewrite-retrieve-read)
|
||||
@@ -322,11 +305,9 @@ for retrieval-augmented LLM.
|
||||
|
||||
## Large Language Model Guided Tree-of-Thought
|
||||
|
||||
- **arXiv id:** 2305.08291v1
|
||||
- **arXiv id:** [2305.08291v1](http://arxiv.org/abs/2305.08291v1) **Published Date:** 2023-05-15
|
||||
- **Title:** Large Language Model Guided Tree-of-Thought
|
||||
- **Authors:** Jieyi Long
|
||||
- **Published Date:** 2023-05-15
|
||||
- **URL:** http://arxiv.org/abs/2305.08291v1
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_experimental.tot](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.tot)
|
||||
@@ -352,11 +333,9 @@ implementation of the ToT-based Sudoku solver is available on GitHub:
|
||||
|
||||
## Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models
|
||||
|
||||
- **arXiv id:** 2305.04091v3
|
||||
- **arXiv id:** [2305.04091v3](http://arxiv.org/abs/2305.04091v3) **Published Date:** 2023-05-06
|
||||
- **Title:** Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models
|
||||
- **Authors:** Lei Wang, Wanyu Xu, Yihuai Lan, et al.
|
||||
- **Published Date:** 2023-05-06
|
||||
- **URL:** http://arxiv.org/abs/2305.04091v3
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [plan_and_execute_agent](https://github.com/langchain-ai/langchain/blob/master/cookbook/plan_and_execute_agent.ipynb)
|
||||
@@ -383,13 +362,35 @@ Prompting, and has comparable performance with 8-shot CoT prompting on the math
|
||||
reasoning problem. The code can be found at
|
||||
https://github.com/AGI-Edgerunners/Plan-and-Solve-Prompting.
|
||||
|
||||
## Zero-Shot Listwise Document Reranking with a Large Language Model
|
||||
|
||||
- **arXiv id:** [2305.02156v1](http://arxiv.org/abs/2305.02156v1) **Published Date:** 2023-05-03
|
||||
- **Title:** Zero-Shot Listwise Document Reranking with a Large Language Model
|
||||
- **Authors:** Xueguang Ma, Xinyu Zhang, Ronak Pradeep, et al.
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain...LLMListwiseRerank](https://api.python.langchain.com/en/latest/retrievers/langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank.html#langchain.retrievers.document_compressors.listwise_rerank.LLMListwiseRerank)
|
||||
|
||||
**Abstract:** Supervised ranking methods based on bi-encoder or cross-encoder architectures
|
||||
have shown success in multi-stage text ranking tasks, but they require large
|
||||
amounts of relevance judgments as training data. In this work, we propose
|
||||
Listwise Reranker with a Large Language Model (LRL), which achieves strong
|
||||
reranking effectiveness without using any task-specific training data.
|
||||
Different from the existing pointwise ranking methods, where documents are
|
||||
scored independently and ranked according to the scores, LRL directly generates
|
||||
a reordered list of document identifiers given the candidate documents.
|
||||
Experiments on three TREC web search datasets demonstrate that LRL not only
|
||||
outperforms zero-shot pointwise methods when reranking first-stage retrieval
|
||||
results, but can also act as a final-stage reranker to improve the top-ranked
|
||||
results of a pointwise method for improved efficiency. Additionally, we apply
|
||||
our approach to subsets of MIRACL, a recent multilingual retrieval dataset,
|
||||
with results showing its potential to generalize across different languages.
|
||||
|
||||
## Visual Instruction Tuning
|
||||
|
||||
- **arXiv id:** 2304.08485v2
|
||||
- **arXiv id:** [2304.08485v2](http://arxiv.org/abs/2304.08485v2) **Published Date:** 2023-04-17
|
||||
- **Title:** Visual Instruction Tuning
|
||||
- **Authors:** Haotian Liu, Chunyuan Li, Qingyang Wu, et al.
|
||||
- **Published Date:** 2023-04-17
|
||||
- **URL:** http://arxiv.org/abs/2304.08485v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [Semi_structured_and_multi_modal_RAG](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_and_multi_modal_RAG.ipynb), [Semi_structured_multi_modal_RAG_LLaMA2](https://github.com/langchain-ai/langchain/blob/master/cookbook/Semi_structured_multi_modal_RAG_LLaMA2.ipynb)
|
||||
@@ -412,11 +413,9 @@ publicly available.
|
||||
|
||||
## Generative Agents: Interactive Simulacra of Human Behavior
|
||||
|
||||
- **arXiv id:** 2304.03442v2
|
||||
- **arXiv id:** [2304.03442v2](http://arxiv.org/abs/2304.03442v2) **Published Date:** 2023-04-07
|
||||
- **Title:** Generative Agents: Interactive Simulacra of Human Behavior
|
||||
- **Authors:** Joon Sung Park, Joseph C. O'Brien, Carrie J. Cai, et al.
|
||||
- **Published Date:** 2023-04-07
|
||||
- **URL:** http://arxiv.org/abs/2304.03442v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [multiagent_bidding](https://github.com/langchain-ai/langchain/blob/master/cookbook/multiagent_bidding.ipynb), [generative_agents_interactive_simulacra_of_human_behavior](https://github.com/langchain-ai/langchain/blob/master/cookbook/generative_agents_interactive_simulacra_of_human_behavior.ipynb)
|
||||
@@ -448,11 +447,9 @@ interaction patterns for enabling believable simulations of human behavior.
|
||||
|
||||
## CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society
|
||||
|
||||
- **arXiv id:** 2303.17760v2
|
||||
- **arXiv id:** [2303.17760v2](http://arxiv.org/abs/2303.17760v2) **Published Date:** 2023-03-31
|
||||
- **Title:** CAMEL: Communicative Agents for "Mind" Exploration of Large Language Model Society
|
||||
- **Authors:** Guohao Li, Hasan Abed Al Kader Hammoud, Hani Itani, et al.
|
||||
- **Published Date:** 2023-03-31
|
||||
- **URL:** http://arxiv.org/abs/2303.17760v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Cookbook:** [camel_role_playing](https://github.com/langchain-ai/langchain/blob/master/cookbook/camel_role_playing.ipynb)
|
||||
@@ -478,11 +475,9 @@ agents and beyond: https://github.com/camel-ai/camel.
|
||||
|
||||
## HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face
|
||||
|
||||
- **arXiv id:** 2303.17580v4
|
||||
- **arXiv id:** [2303.17580v4](http://arxiv.org/abs/2303.17580v4) **Published Date:** 2023-03-30
|
||||
- **Title:** HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face
|
||||
- **Authors:** Yongliang Shen, Kaitao Song, Xu Tan, et al.
|
||||
- **Published Date:** 2023-03-30
|
||||
- **URL:** http://arxiv.org/abs/2303.17580v4
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_experimental.autonomous_agents](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.autonomous_agents)
|
||||
@@ -508,40 +503,14 @@ modalities and domains and achieve impressive results in language, vision,
|
||||
speech, and other challenging tasks, which paves a new way towards the
|
||||
realization of artificial general intelligence.
|
||||
|
||||
## GPT-4 Technical Report
|
||||
|
||||
- **arXiv id:** 2303.08774v6
|
||||
- **Title:** GPT-4 Technical Report
|
||||
- **Authors:** OpenAI, Josh Achiam, Steven Adler, et al.
|
||||
- **Published Date:** 2023-03-15
|
||||
- **URL:** http://arxiv.org/abs/2303.08774v6
|
||||
- **LangChain:**
|
||||
|
||||
- **Documentation:** [docs/integrations/vectorstores/mongodb_atlas](https://python.langchain.com/docs/integrations/vectorstores/mongodb_atlas)
|
||||
|
||||
**Abstract:** We report the development of GPT-4, a large-scale, multimodal model which can
|
||||
accept image and text inputs and produce text outputs. While less capable than
|
||||
humans in many real-world scenarios, GPT-4 exhibits human-level performance on
|
||||
various professional and academic benchmarks, including passing a simulated bar
|
||||
exam with a score around the top 10% of test takers. GPT-4 is a
|
||||
Transformer-based model pre-trained to predict the next token in a document.
|
||||
The post-training alignment process results in improved performance on measures
|
||||
of factuality and adherence to desired behavior. A core component of this
|
||||
project was developing infrastructure and optimization methods that behave
|
||||
predictably across a wide range of scales. This allowed us to accurately
|
||||
predict some aspects of GPT-4's performance based on models trained with no
|
||||
more than 1/1,000th the compute of GPT-4.
|
||||
|
||||
## A Watermark for Large Language Models
|
||||
|
||||
- **arXiv id:** 2301.10226v4
|
||||
- **arXiv id:** [2301.10226v4](http://arxiv.org/abs/2301.10226v4) **Published Date:** 2023-01-24
|
||||
- **Title:** A Watermark for Large Language Models
|
||||
- **Authors:** John Kirchenbauer, Jonas Geiping, Yuxin Wen, et al.
|
||||
- **Published Date:** 2023-01-24
|
||||
- **URL:** http://arxiv.org/abs/2301.10226v4
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
- **API Reference:** [langchain_community...OCIModelDeploymentTGI](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI.html#langchain_community.llms.oci_data_science_model_deployment_endpoint.OCIModelDeploymentTGI), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
|
||||
**Abstract:** Potential harms of large language models can be mitigated by watermarking
|
||||
model output, i.e., embedding signals into generated text that are invisible to
|
||||
@@ -559,11 +528,9 @@ family, and discuss robustness and security.
|
||||
|
||||
## Precise Zero-Shot Dense Retrieval without Relevance Labels
|
||||
|
||||
- **arXiv id:** 2212.10496v1
|
||||
- **arXiv id:** [2212.10496v1](http://arxiv.org/abs/2212.10496v1) **Published Date:** 2022-12-20
|
||||
- **Title:** Precise Zero-Shot Dense Retrieval without Relevance Labels
|
||||
- **Authors:** Luyu Gao, Xueguang Ma, Jimmy Lin, et al.
|
||||
- **Published Date:** 2022-12-20
|
||||
- **URL:** http://arxiv.org/abs/2212.10496v1
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain...HypotheticalDocumentEmbedder](https://api.python.langchain.com/en/latest/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html#langchain.chains.hyde.base.HypotheticalDocumentEmbedder)
|
||||
@@ -590,11 +557,9 @@ search, QA, fact verification) and languages~(e.g. sw, ko, ja).
|
||||
|
||||
## Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments
|
||||
|
||||
- **arXiv id:** 2212.07425v3
|
||||
- **arXiv id:** [2212.07425v3](http://arxiv.org/abs/2212.07425v3) **Published Date:** 2022-12-12
|
||||
- **Title:** Robust and Explainable Identification of Logical Fallacies in Natural Language Arguments
|
||||
- **Authors:** Zhivar Sourati, Vishnu Priya Prasanna Venkatesh, Darshan Deshpande, et al.
|
||||
- **Published Date:** 2022-12-12
|
||||
- **URL:** http://arxiv.org/abs/2212.07425v3
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_experimental.fallacy_removal](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.fallacy_removal)
|
||||
@@ -623,11 +588,9 @@ further work on logical fallacy identification.
|
||||
|
||||
## Complementary Explanations for Effective In-Context Learning
|
||||
|
||||
- **arXiv id:** 2211.13892v2
|
||||
- **arXiv id:** [2211.13892v2](http://arxiv.org/abs/2211.13892v2) **Published Date:** 2022-11-25
|
||||
- **Title:** Complementary Explanations for Effective In-Context Learning
|
||||
- **Authors:** Xi Ye, Srinivasan Iyer, Asli Celikyilmaz, et al.
|
||||
- **Published Date:** 2022-11-25
|
||||
- **URL:** http://arxiv.org/abs/2211.13892v2
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_core...MaxMarginalRelevanceExampleSelector](https://api.python.langchain.com/en/latest/example_selectors/langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector.html#langchain_core.example_selectors.semantic_similarity.MaxMarginalRelevanceExampleSelector)
|
||||
@@ -651,14 +614,12 @@ performance across three real-world tasks on multiple LLMs.
|
||||
|
||||
## PAL: Program-aided Language Models
|
||||
|
||||
- **arXiv id:** 2211.10435v2
|
||||
- **arXiv id:** [2211.10435v2](http://arxiv.org/abs/2211.10435v2) **Published Date:** 2022-11-18
|
||||
- **Title:** PAL: Program-aided Language Models
|
||||
- **Authors:** Luyu Gao, Aman Madaan, Shuyan Zhou, et al.
|
||||
- **Published Date:** 2022-11-18
|
||||
- **URL:** http://arxiv.org/abs/2211.10435v2
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain), [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain)
|
||||
- **API Reference:** [langchain_experimental.pal_chain](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.pal_chain), [langchain_experimental...PALChain](https://api.python.langchain.com/en/latest/pal_chain/langchain_experimental.pal_chain.base.PALChain.html#langchain_experimental.pal_chain.base.PALChain)
|
||||
- **Cookbook:** [program_aided_language_model](https://github.com/langchain-ai/langchain/blob/master/cookbook/program_aided_language_model.ipynb)
|
||||
|
||||
**Abstract:** Large language models (LLMs) have recently demonstrated an impressive ability
|
||||
@@ -686,15 +647,13 @@ publicly available at http://reasonwithpal.com/ .
|
||||
|
||||
## ReAct: Synergizing Reasoning and Acting in Language Models
|
||||
|
||||
- **arXiv id:** 2210.03629v3
|
||||
- **arXiv id:** [2210.03629v3](http://arxiv.org/abs/2210.03629v3) **Published Date:** 2022-10-06
|
||||
- **Title:** ReAct: Synergizing Reasoning and Acting in Language Models
|
||||
- **Authors:** Shunyu Yao, Jeffrey Zhao, Dian Yu, et al.
|
||||
- **Published Date:** 2022-10-06
|
||||
- **URL:** http://arxiv.org/abs/2210.03629v3
|
||||
- **LangChain:**
|
||||
|
||||
- **Documentation:** [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/chat/huggingface](https://python.langchain.com/docs/integrations/chat/huggingface), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping)
|
||||
- **API Reference:** [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent), [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain)
|
||||
- **Documentation:** [docs/integrations/providers/cohere](https://python.langchain.com/docs/integrations/providers/cohere), [docs/integrations/tools/ionic_shopping](https://python.langchain.com/docs/integrations/tools/ionic_shopping)
|
||||
- **API Reference:** [langchain...TrajectoryEvalChain](https://api.python.langchain.com/en/latest/evaluation/langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain.html#langchain.evaluation.agents.trajectory_eval_chain.TrajectoryEvalChain), [langchain...create_react_agent](https://api.python.langchain.com/en/latest/agents/langchain.agents.react.agent.create_react_agent.html#langchain.agents.react.agent.create_react_agent)
|
||||
|
||||
**Abstract:** While large language models (LLMs) have demonstrated impressive capabilities
|
||||
across tasks in language understanding and interactive decision making, their
|
||||
@@ -721,11 +680,9 @@ Project site with code: https://react-lm.github.io
|
||||
|
||||
## Deep Lake: a Lakehouse for Deep Learning
|
||||
|
||||
- **arXiv id:** 2209.10785v2
|
||||
- **arXiv id:** [2209.10785v2](http://arxiv.org/abs/2209.10785v2) **Published Date:** 2022-09-22
|
||||
- **Title:** Deep Lake: a Lakehouse for Deep Learning
|
||||
- **Authors:** Sasun Hambardzumyan, Abhinav Tuli, Levon Ghukasyan, et al.
|
||||
- **Published Date:** 2022-09-22
|
||||
- **URL:** http://arxiv.org/abs/2209.10785v2
|
||||
- **LangChain:**
|
||||
|
||||
- **Documentation:** [docs/integrations/providers/activeloop_deeplake](https://python.langchain.com/docs/integrations/providers/activeloop_deeplake)
|
||||
@@ -747,13 +704,43 @@ visualization engine, or (c) deep learning frameworks without sacrificing GPU
|
||||
utilization. Datasets stored in Deep Lake can be accessed from PyTorch,
|
||||
TensorFlow, JAX, and integrate with numerous MLOps tools.
|
||||
|
||||
## Matryoshka Representation Learning
|
||||
|
||||
- **arXiv id:** [2205.13147v4](http://arxiv.org/abs/2205.13147v4) **Published Date:** 2022-05-26
|
||||
- **Title:** Matryoshka Representation Learning
|
||||
- **Authors:** Aditya Kusupati, Gantavya Bhatt, Aniket Rege, et al.
|
||||
- **LangChain:**
|
||||
|
||||
- **Documentation:** [docs/integrations/providers/snowflake](https://python.langchain.com/docs/integrations/providers/snowflake)
|
||||
|
||||
**Abstract:** Learned representations are a central component in modern ML systems, serving
|
||||
a multitude of downstream tasks. When training such representations, it is
|
||||
often the case that computational and statistical constraints for each
|
||||
downstream task are unknown. In this context rigid, fixed capacity
|
||||
representations can be either over or under-accommodating to the task at hand.
|
||||
This leads us to ask: can we design a flexible representation that can adapt to
|
||||
multiple downstream tasks with varying computational resources? Our main
|
||||
contribution is Matryoshka Representation Learning (MRL) which encodes
|
||||
information at different granularities and allows a single embedding to adapt
|
||||
to the computational constraints of downstream tasks. MRL minimally modifies
|
||||
existing representation learning pipelines and imposes no additional cost
|
||||
during inference and deployment. MRL learns coarse-to-fine representations that
|
||||
are at least as accurate and rich as independently trained low-dimensional
|
||||
representations. The flexibility within the learned Matryoshka Representations
|
||||
offer: (a) up to 14x smaller embedding size for ImageNet-1K classification at
|
||||
the same level of accuracy; (b) up to 14x real-world speed-ups for large-scale
|
||||
retrieval on ImageNet-1K and 4K; and (c) up to 2% accuracy improvements for
|
||||
long-tail few-shot classification, all while being as robust as the original
|
||||
representations. Finally, we show that MRL extends seamlessly to web-scale
|
||||
datasets (ImageNet, JFT) across various modalities -- vision (ViT, ResNet),
|
||||
vision + language (ALIGN) and language (BERT). MRL code and pretrained models
|
||||
are open-sourced at https://github.com/RAIVNLab/MRL.
|
||||
|
||||
## Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages
|
||||
|
||||
- **arXiv id:** 2205.12654v1
|
||||
- **arXiv id:** [2205.12654v1](http://arxiv.org/abs/2205.12654v1) **Published Date:** 2022-05-25
|
||||
- **Title:** Bitext Mining Using Distilled Sentence Representations for Low-Resource Languages
|
||||
- **Authors:** Kevin Heffernan, Onur Çelebi, Holger Schwenk
|
||||
- **Published Date:** 2022-05-25
|
||||
- **URL:** http://arxiv.org/abs/2205.12654v1
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_community...LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html#langchain_community.embeddings.laser.LaserEmbeddings)
|
||||
@@ -778,14 +765,12 @@ encoders, mine bitexts, and validate the bitexts by training NMT systems.
|
||||
|
||||
## Evaluating the Text-to-SQL Capabilities of Large Language Models
|
||||
|
||||
- **arXiv id:** 2204.00498v1
|
||||
- **arXiv id:** [2204.00498v1](http://arxiv.org/abs/2204.00498v1) **Published Date:** 2022-03-15
|
||||
- **Title:** Evaluating the Text-to-SQL Capabilities of Large Language Models
|
||||
- **Authors:** Nitarshan Rajkumar, Raymond Li, Dzmitry Bahdanau
|
||||
- **Published Date:** 2022-03-15
|
||||
- **URL:** http://arxiv.org/abs/2204.00498v1
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL), [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase)
|
||||
- **API Reference:** [langchain_community...SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), [langchain_community...SparkSQL](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.spark_sql.SparkSQL.html#langchain_community.utilities.spark_sql.SparkSQL)
|
||||
|
||||
**Abstract:** We perform an empirical evaluation of Text-to-SQL capabilities of the Codex
|
||||
language model. We find that, without any finetuning, Codex is a strong
|
||||
@@ -797,14 +782,12 @@ few-shot examples.
|
||||
|
||||
## Locally Typical Sampling
|
||||
|
||||
- **arXiv id:** 2202.00666v5
|
||||
- **arXiv id:** [2202.00666v5](http://arxiv.org/abs/2202.00666v5) **Published Date:** 2022-02-01
|
||||
- **Title:** Locally Typical Sampling
|
||||
- **Authors:** Clara Meister, Tiago Pimentel, Gian Wiher, et al.
|
||||
- **Published Date:** 2022-02-01
|
||||
- **URL:** http://arxiv.org/abs/2202.00666v5
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
- **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
|
||||
**Abstract:** Today's probabilistic language generators fall short when it comes to
|
||||
producing coherent and fluent text despite the fact that the underlying models
|
||||
@@ -829,11 +812,9 @@ reducing degenerate repetitions.
|
||||
|
||||
## Learning Transferable Visual Models From Natural Language Supervision
|
||||
|
||||
- **arXiv id:** 2103.00020v1
|
||||
- **arXiv id:** [2103.00020v1](http://arxiv.org/abs/2103.00020v1) **Published Date:** 2021-02-26
|
||||
- **Title:** Learning Transferable Visual Models From Natural Language Supervision
|
||||
- **Authors:** Alec Radford, Jong Wook Kim, Chris Hallacy, et al.
|
||||
- **Published Date:** 2021-02-26
|
||||
- **URL:** http://arxiv.org/abs/2103.00020v1
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_experimental.open_clip](https://api.python.langchain.com/en/latest/experimental_api_reference.html#module-langchain_experimental.open_clip)
|
||||
@@ -861,14 +842,12 @@ https://github.com/OpenAI/CLIP.
|
||||
|
||||
## CTRL: A Conditional Transformer Language Model for Controllable Generation
|
||||
|
||||
- **arXiv id:** 1909.05858v2
|
||||
- **arXiv id:** [1909.05858v2](http://arxiv.org/abs/1909.05858v2) **Published Date:** 2019-09-11
|
||||
- **Title:** CTRL: A Conditional Transformer Language Model for Controllable Generation
|
||||
- **Authors:** Nitish Shirish Keskar, Bryan McCann, Lav R. Varshney, et al.
|
||||
- **Published Date:** 2019-09-11
|
||||
- **URL:** http://arxiv.org/abs/1909.05858v2
|
||||
- **LangChain:**
|
||||
|
||||
- **API Reference:** [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
- **API Reference:** [langchain_huggingface...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceEndpoint](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_community.llms.huggingface_endpoint.HuggingFaceEndpoint), [langchain_community...HuggingFaceTextGenInference](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference.html#langchain_community.llms.huggingface_text_gen_inference.HuggingFaceTextGenInference)
|
||||
|
||||
**Abstract:** Large-scale language models show promising text generation capabilities, but
|
||||
users cannot easily control particular aspects of the generated text. We
|
||||
@@ -881,32 +860,4 @@ codes also allow CTRL to predict which parts of the training data are most
|
||||
likely given a sequence. This provides a potential method for analyzing large
|
||||
amounts of data via model-based source attribution. We have released multiple
|
||||
full-sized, pretrained versions of CTRL at https://github.com/salesforce/ctrl.
|
||||
|
||||
## Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks
|
||||
|
||||
- **arXiv id:** 1908.10084v1
|
||||
- **Title:** Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks
|
||||
- **Authors:** Nils Reimers, Iryna Gurevych
|
||||
- **Published Date:** 2019-08-27
|
||||
- **URL:** http://arxiv.org/abs/1908.10084v1
|
||||
- **LangChain:**
|
||||
|
||||
- **Documentation:** [docs/integrations/text_embedding/sentence_transformers](https://python.langchain.com/docs/integrations/text_embedding/sentence_transformers)
|
||||
|
||||
**Abstract:** BERT (Devlin et al., 2018) and RoBERTa (Liu et al., 2019) has set a new
|
||||
state-of-the-art performance on sentence-pair regression tasks like semantic
|
||||
textual similarity (STS). However, it requires that both sentences are fed into
|
||||
the network, which causes a massive computational overhead: Finding the most
|
||||
similar pair in a collection of 10,000 sentences requires about 50 million
|
||||
inference computations (~65 hours) with BERT. The construction of BERT makes it
|
||||
unsuitable for semantic similarity search as well as for unsupervised tasks
|
||||
like clustering.
|
||||
In this publication, we present Sentence-BERT (SBERT), a modification of the
|
||||
pretrained BERT network that use siamese and triplet network structures to
|
||||
derive semantically meaningful sentence embeddings that can be compared using
|
||||
cosine-similarity. This reduces the effort for finding the most similar pair
|
||||
from 65 hours with BERT / RoBERTa to about 5 seconds with SBERT, while
|
||||
maintaining the accuracy from BERT.
|
||||
We evaluate SBERT and SRoBERTa on common STS tasks and transfer learning
|
||||
tasks, where it outperforms other state-of-the-art sentence embeddings methods.
|
||||
|
||||
@@ -209,7 +209,7 @@ Some language models take a list of messages as input and return a message.
|
||||
There are a few different types of messages.
|
||||
All messages have a `role`, `content`, and `response_metadata` property.
|
||||
|
||||
The `role` describes WHO is saying the message.
|
||||
The `role` describes WHO is saying the message. The standard roles are "user", "assistant", "system", and "tool".
|
||||
LangChain has different message classes for different roles.
|
||||
|
||||
The `content` property describes the content of the message.
|
||||
@@ -218,13 +218,16 @@ This can be a few different things:
|
||||
- A string (most models deal this type of content)
|
||||
- A List of dictionaries (this is used for multimodal input, where the dictionary contains information about that input type and that input location)
|
||||
|
||||
Optionally, messages can have a `name` property which allows for differentiating between multiple speakers with the same role.
|
||||
For example, if there are two users in the chat history it can be useful to differentiate between them. Not all models support this.
|
||||
|
||||
#### HumanMessage
|
||||
|
||||
This represents a message from the user.
|
||||
This represents a message with role "user".
|
||||
|
||||
#### AIMessage
|
||||
|
||||
This represents a message from the model. In addition to the `content` property, these messages also have:
|
||||
This represents a message with role "assistant". In addition to the `content` property, these messages also have:
|
||||
|
||||
**`response_metadata`**
|
||||
|
||||
@@ -244,11 +247,11 @@ This property returns a list of `ToolCall`s. A `ToolCall` is a dictionary with t
|
||||
|
||||
#### SystemMessage
|
||||
|
||||
This represents a system message, which tells the model how to behave. Not every model provider supports this.
|
||||
This represents a message with role "system", which tells the model how to behave. Not every model provider supports this.
|
||||
|
||||
#### ToolMessage
|
||||
|
||||
This represents the result of a tool call. In addition to `role` and `content`, this message has:
|
||||
This represents a message with role "tool", which contains the result of calling a tool. In addition to `role` and `content`, this message has:
|
||||
|
||||
- a `tool_call_id` field which conveys the id of the call to the tool that was called to produce this result.
|
||||
- an `artifact` field which can be used to pass along arbitrary artifacts of the tool execution which are useful to track but which should not be sent to the model.
|
||||
@@ -343,6 +346,7 @@ For specifics on how to use prompt templates, see the [relevant how-to guides he
|
||||
|
||||
### Example selectors
|
||||
One common prompting technique for achieving better performance is to include examples as part of the prompt.
|
||||
This is known as [few-shot prompting](/docs/concepts/#few-shot-prompting).
|
||||
This gives the language model concrete examples of how it should behave.
|
||||
Sometimes these examples are hardcoded into the prompt, but for more advanced situations it may be nice to dynamically select them.
|
||||
Example Selectors are classes responsible for selecting and then formatting examples into prompts.
|
||||
@@ -542,7 +546,8 @@ Typical usage may look like the following:
|
||||
```python
|
||||
tools = [...] # Define a list of tools
|
||||
llm_with_tools = llm.bind_tools(tools)
|
||||
ai_msg = llm_with_tools.invoke("do xyz...") # AIMessage(tool_calls=[ToolCall(...), ...], ...)
|
||||
ai_msg = llm_with_tools.invoke("do xyz...")
|
||||
# -> AIMessage(tool_calls=[ToolCall(...), ...], ...)
|
||||
```
|
||||
|
||||
The `AIMessage` returned from the model MAY have `tool_calls` associated with it.
|
||||
@@ -559,9 +564,14 @@ This generally looks like:
|
||||
|
||||
```python
|
||||
# You will want to previously check that the LLM returned tool calls
|
||||
tool_call = ai_msg.tool_calls[0] # ToolCall(args={...}, id=..., ...)
|
||||
tool_call = ai_msg.tool_calls[0]
|
||||
# ToolCall(args={...}, id=..., ...)
|
||||
tool_output = tool.invoke(tool_call["args"])
|
||||
tool_message = ToolMessage(content=tool_output, tool_call_id=tool_call["id"], name=tool_call["name"])
|
||||
tool_message = ToolMessage(
|
||||
content=tool_output,
|
||||
tool_call_id=tool_call["id"],
|
||||
name=tool_call["name"]
|
||||
)
|
||||
```
|
||||
|
||||
Note that the `content` field will generally be passed back to the model.
|
||||
@@ -571,7 +581,12 @@ you can transform the tool output but also pass it as an artifact (read more abo
|
||||
```python
|
||||
... # Same code as above
|
||||
response_for_llm = transform(response)
|
||||
tool_message = ToolMessage(content=response_for_llm, tool_call_id=tool_call["id"], name=tool_call["name"], artifact=tool_output)
|
||||
tool_message = ToolMessage(
|
||||
content=response_for_llm,
|
||||
tool_call_id=tool_call["id"],
|
||||
name=tool_call["name"],
|
||||
artifact=tool_output
|
||||
)
|
||||
```
|
||||
|
||||
#### Invoke with `ToolCall`
|
||||
@@ -582,9 +597,14 @@ The benefits of this are that you don't have to write the logic yourself to tran
|
||||
This generally looks like:
|
||||
|
||||
```python
|
||||
tool_call = ai_msg.tool_calls[0] # ToolCall(args={...}, id=..., ...)
|
||||
tool_call = ai_msg.tool_calls[0]
|
||||
# -> ToolCall(args={...}, id=..., ...)
|
||||
tool_message = tool.invoke(tool_call)
|
||||
# -> ToolMessage(content="tool result foobar...", tool_call_id=..., name="tool_name")
|
||||
# -> ToolMessage(
|
||||
content="tool result foobar...",
|
||||
tool_call_id=...,
|
||||
name="tool_name"
|
||||
)
|
||||
```
|
||||
|
||||
If you are invoking the tool this way and want to include an [artifact](/docs/concepts/#toolmessage) for the ToolMessage, you will need to have the tool return two things.
|
||||
@@ -1085,6 +1105,81 @@ The following how-to guides are good practical resources for using function/tool
|
||||
|
||||
For a full list of model providers that support tool calling, [see this table](/docs/integrations/chat/#advanced-features).
|
||||
|
||||
### Few-shot prompting
|
||||
|
||||
One of the most effective ways to improve model performance is to give a model examples of what you want it to do. The technique of adding example inputs and expected outputs to a model prompt is known as "few-shot prompting". There are a few things to think about when doing few-shot prompting:
|
||||
|
||||
1. How are examples generated?
|
||||
2. How many examples are in each prompt?
|
||||
3. How are examples selected at runtime?
|
||||
4. How are examples formatted in the prompt?
|
||||
|
||||
Here are the considerations for each.
|
||||
|
||||
#### 1. Generating examples
|
||||
|
||||
The first and most important step of few-shot prompting is coming up with a good dataset of examples. Good examples should be relevant at runtime, clear, informative, and provide information that was not already known to the model.
|
||||
|
||||
At a high-level, the basic ways to generate examples are:
|
||||
- Manual: a person/people generates examples they think are useful.
|
||||
- Better model: a better (presumably more expensive/slower) model's responses are used as examples for a worse (presumably cheaper/faster) model.
|
||||
- User feedback: users (or labelers) leave feedback on interactions with the application and examples are generated based on that feedback (for example, all interactions with positive feedback could be turned into examples).
|
||||
- LLM feedback: same as user feedback but the process is automated by having models evaluate themselves.
|
||||
|
||||
Which approach is best depends on your task. For tasks where a small number core principles need to be understood really well, it can be valuable hand-craft a few really good examples.
|
||||
For tasks where the space of correct behaviors is broader and more nuanced, it can be useful to generate many examples in a more automated fashion so that there's a higher likelihood of there being some highly relevant examples for any runtime input.
|
||||
|
||||
**Single-turn v.s. multi-turn examples**
|
||||
|
||||
Another dimension to think about when generating examples is what the example is actually showing.
|
||||
|
||||
The simplest types of examples just have a user input and an expected model output. These are single-turn examples.
|
||||
|
||||
One more complex type if example is where the example is an entire conversation, usually in which a model initially responds incorrectly and a user then tells the model how to correct its answer.
|
||||
This is called a multi-turn example. Multi-turn examples can be useful for more nuanced tasks where its useful to show common errors and spell out exactly why they're wrong and what should be done instead.
|
||||
|
||||
#### 2. Number of examples
|
||||
|
||||
Once we have a dataset of examples, we need to think about how many examples should be in each prompt.
|
||||
The key tradeoff is that more examples generally improve performance, but larger prompts increase costs and latency.
|
||||
And beyond some threshold having too many examples can start to confuse the model.
|
||||
Finding the right number of examples is highly dependent on the model, the task, the quality of the examples, and your cost and latency constraints.
|
||||
Anecdotally, the better the model is the fewer examples it needs to perform well and the more quickly you hit steeply diminishing returns on adding more examples.
|
||||
But, the best/only way to reliably answer this question is to run some experiments with different numbers of examples.
|
||||
|
||||
#### 3. Selecting examples
|
||||
|
||||
Assuming we are not adding our entire example dataset into each prompt, we need to have a way of selecting examples from our dataset based on a given input. We can do this:
|
||||
- Randomly
|
||||
- By (semantic or keyword-based) similarity of the inputs
|
||||
- Based on some other constraints, like token size
|
||||
|
||||
LangChain has a number of [`ExampleSelectors`](/docs/concepts/#example-selectors) which make it easy to use any of these techniques.
|
||||
|
||||
Generally, selecting by semantic similarity leads to the best model performance. But how important this is is again model and task specific, and is something worth experimenting with.
|
||||
|
||||
#### 4. Formatting examples
|
||||
|
||||
Most state-of-the-art models these days are chat models, so we'll focus on formatting examples for those. Our basic options are to insert the examples:
|
||||
- In the system prompt as a string
|
||||
- As their own messages
|
||||
|
||||
If we insert our examples into the system prompt as a string, we'll need to make sure it's clear to the model where each example begins and which parts are the input versus output. Different models respond better to different syntaxes, like [ChatML](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/chat-markup-language), XML, TypeScript, etc.
|
||||
|
||||
If we insert our examples as messages, where each example is represented as a sequence of Human, AI messages, we might want to also assign [names](/docs/concepts/#messages) to our messages like `"example_user"` and `"example_assistant"` to make it clear that these messages correspond to different actors than the latest input message.
|
||||
|
||||
**Formatting tool call examples**
|
||||
|
||||
One area where formatting examples as messages can be tricky is when our example outputs have tool calls. This is because different models have different constraints on what types of message sequences are allowed when any tool calls are generated.
|
||||
- Some models require that any AIMessage with tool calls be immediately followed by ToolMessages for every tool call,
|
||||
- Some models additionally require that any ToolMessages be immediately followed by an AIMessage before the next HumanMessage,
|
||||
- Some models require that tools are passed in to the model if there are any tool calls / ToolMessages in the chat history.
|
||||
|
||||
These requirements are model-specific and should be checked for the model you are using. If your model requires ToolMessages after tool calls and/or AIMessages after ToolMessages and your examples only include expected tool calls and not the actual tool outputs, you can try adding dummy ToolMessages / AIMessages to the end of each example with generic contents to satisfy the API constraints.
|
||||
In these cases it's especially worth experimenting with inserting your examples as strings versus messages, as having dummy messages can adversely affect certain models.
|
||||
|
||||
You can see a case study of how Anthropic and OpenAI respond to different few-shot prompting techniques on two different tool calling benchmarks [here](https://blog.langchain.dev/few-shot-prompting-to-improve-tool-calling-performance/).
|
||||
|
||||
### Retrieval
|
||||
|
||||
LLMs are trained on a large but fixed dataset, limiting their ability to reason over private or recent information. Fine-tuning an LLM with specific facts is one way to mitigate this, but is often [poorly suited for factual recall](https://www.anyscale.com/blog/fine-tuning-is-for-form-not-facts) and [can be costly](https://www.glean.com/blog/how-to-build-an-ai-assistant-for-the-enterprise).
|
||||
|
||||
@@ -63,7 +63,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# <!-- ruff: noqa: F821 -->\n",
|
||||
"from langchain.globals import set_llm_cache"
|
||||
"from langchain_core.globals import set_llm_cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -103,7 +103,7 @@
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"from langchain.cache import InMemoryCache\n",
|
||||
"from langchain_core.caches import InMemoryCache\n",
|
||||
"\n",
|
||||
"set_llm_cache(InMemoryCache())\n",
|
||||
"\n",
|
||||
|
||||
@@ -409,7 +409,7 @@
|
||||
" # When configuring the end runnable, we can then use this id to configure this field\n",
|
||||
" ConfigurableField(id=\"prompt\"),\n",
|
||||
" # This sets a default_key.\n",
|
||||
" # If we specify this key, the default LLM (ChatAnthropic initialized above) will be used\n",
|
||||
" # If we specify this key, the default prompt (asking for a joke, as initialized above) will be used\n",
|
||||
" default_key=\"joke\",\n",
|
||||
" # This adds a new option, with name `poem`\n",
|
||||
" poem=PromptTemplate.from_template(\"Write a short poem about {topic}\"),\n",
|
||||
@@ -494,7 +494,7 @@
|
||||
" # When configuring the end runnable, we can then use this id to configure this field\n",
|
||||
" ConfigurableField(id=\"prompt\"),\n",
|
||||
" # This sets a default_key.\n",
|
||||
" # If we specify this key, the default LLM (ChatAnthropic initialized above) will be used\n",
|
||||
" # If we specify this key, the default prompt (asking for a joke, as initialized above) will be used\n",
|
||||
" default_key=\"joke\",\n",
|
||||
" # This adds a new option, with name `poem`\n",
|
||||
" poem=PromptTemplate.from_template(\"Write a short poem about {topic}\"),\n",
|
||||
|
||||
@@ -63,7 +63,7 @@
|
||||
"* The `load` methods is a convenience method meant solely for prototyping work -- it just invokes `list(self.lazy_load())`.\n",
|
||||
"* The `alazy_load` has a default implementation that will delegate to `lazy_load`. If you're using async, we recommend overriding the default implementation and providing a native async implementation.\n",
|
||||
"\n",
|
||||
"::: {.callout-important}\n",
|
||||
":::{.callout-important}\n",
|
||||
"When implementing a document loader do **NOT** provide parameters via the `lazy_load` or `alazy_load` methods.\n",
|
||||
"\n",
|
||||
"All configuration is expected to be passed through the initializer (__init__). This was a design choice made by LangChain to make sure that once a document loader has been instantiated it has all the information needed to load documents.\n",
|
||||
@@ -235,7 +235,7 @@
|
||||
"id": "56cb443e-f987-4386-b4ec-975ee129adb2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"::: {.callout-tip}\n",
|
||||
":::{.callout-tip}\n",
|
||||
"\n",
|
||||
"`load()` can be helpful in an interactive environment such as a jupyter notebook.\n",
|
||||
"\n",
|
||||
@@ -276,7 +276,7 @@
|
||||
"source": [
|
||||
"## Working with Files\n",
|
||||
"\n",
|
||||
"Many document loaders invovle parsing files. The difference between such loaders usually stems from how the file is parsed rather than how the file is loaded. For example, you can use `open` to read the binary content of either a PDF or a markdown file, but you need different parsing logic to convert that binary data into text.\n",
|
||||
"Many document loaders involve parsing files. The difference between such loaders usually stems from how the file is parsed, rather than how the file is loaded. For example, you can use `open` to read the binary content of either a PDF or a markdown file, but you need different parsing logic to convert that binary data into text.\n",
|
||||
"\n",
|
||||
"As a result, it can be helpful to decouple the parsing logic from the loading logic, which makes it easier to re-use a given parser regardless of how the data was loaded.\n",
|
||||
"\n",
|
||||
@@ -355,7 +355,7 @@
|
||||
"id": "433bfb7c-7767-43bc-b71e-42413d7494a8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Using the **blob** API also allows one to load content direclty from memory without having to read it from a file!"
|
||||
"Using the **blob** API also allows one to load content directly from memory without having to read it from a file!"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -182,7 +182,7 @@ pprint(data)
|
||||
</CodeOutputBlock>
|
||||
|
||||
|
||||
Another option is set `jq_schema='.'` and provide `content_key`:
|
||||
Another option is to set `jq_schema='.'` and provide `content_key`:
|
||||
|
||||
```python
|
||||
loader = JSONLoader(
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install \"unstructured[md]\""
|
||||
"%pip install \"unstructured[md]\" nltk"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
File diff suppressed because one or more lines are too long
353
docs/docs/how_to/example_selectors_langsmith.ipynb
Normal file
353
docs/docs/how_to/example_selectors_langsmith.ipynb
Normal file
@@ -0,0 +1,353 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4f7e423b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# How to select examples from a LangSmith dataset\n",
|
||||
"\n",
|
||||
"import Prerequisites from \"@theme/Prerequisites\";\n",
|
||||
"import Compatibility from \"@theme/Compatibility\";\n",
|
||||
"\n",
|
||||
"<Prerequisites titlesAndLinks={[\n",
|
||||
" [\"Chat models\", \"/docs/concepts/#chat-models\"],\n",
|
||||
" [\"Few-shot-prompting\", \"/docs/concepts/#few-shot-prompting\"],\n",
|
||||
" [\"LangSmith\", \"/docs/concepts/#langsmith\"],\n",
|
||||
"]} />\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"<Compatibility packagesAndVersions={[\n",
|
||||
" [\"langsmith\", \"0.1.101\"],\n",
|
||||
" [\"langchain-core\", \"0.2.34\"],\n",
|
||||
"]} />\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"LangSmith datasets have built-in support for similarity search, making them a great tool for building and querying few-shot examples.\n",
|
||||
"\n",
|
||||
"In this guide we'll see how to use an indexed LangSmith dataset as a few-shot example selector.\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"Before getting started make sure you've [created a LangSmith account](https://smith.langchain.com/) and set your credentials:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "85445e0e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Set LangSmith API key:\n",
|
||||
"\n",
|
||||
"········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.environ.get(\"LANGSMITH_API_KEY\"):\n",
|
||||
" os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Set LangSmith API key:\\n\\n\")\n",
|
||||
"\n",
|
||||
"os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ca899e29",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We'll need to install the `langsmith` SDK. In this example we'll also make use of `langchain`, `langchain-openai`, and `langchain-benchmarks`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b4fa7810",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU \"langsmith>=0.1.101\" \"langchain-core>=0.2.34\" langchain langchain-openai langchain-benchmarks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fc716e12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now we'll clone a public dataset and turn on indexing for the dataset. We can also turn on indexing via the [LangSmith UI](https://docs.smith.langchain.com/how_to_guides/datasets/index_datasets_for_dynamic_few_shot_example_selection).\n",
|
||||
"\n",
|
||||
"We'll clone the [Multiverse math few shot example dataset](https://blog.langchain.dev/few-shot-prompting-to-improve-tool-calling-performance/).\n",
|
||||
"\n",
|
||||
"This enables searching over the dataset and will make sure that anytime we update/add examples they are also indexed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cf53d280",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langsmith import Client as LangSmith\n",
|
||||
"\n",
|
||||
"ls_client = LangSmith()\n",
|
||||
"\n",
|
||||
"dataset_name = \"multiverse-math-few-shot-examples-v2\"\n",
|
||||
"dataset_public_url = (\n",
|
||||
" \"https://smith.langchain.com/public/620596ee-570b-4d2b-8c8f-f828adbe5242/d\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"ls_client.clone_public_dataset(dataset_public_url)\n",
|
||||
"\n",
|
||||
"dataset_id = ls_client.read_dataset(dataset_name=dataset_name).id\n",
|
||||
"\n",
|
||||
"ls_client.index_dataset(dataset_id=dataset_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5767d171",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Querying dataset\n",
|
||||
"\n",
|
||||
"Indexing can take a few seconds. Once the dataset is indexed, we can search for similar examples. Note that the input to the `similar_examples` method must have the same schema as the examples inputs. In this case our example inputs are a dictionary with a \"question\" key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "5013a56f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"3"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"examples = ls_client.similar_examples(\n",
|
||||
" {\"question\": \"whats the negation of the negation of the negation of 3\"},\n",
|
||||
" limit=3,\n",
|
||||
" dataset_id=dataset_id,\n",
|
||||
")\n",
|
||||
"len(examples)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "a142db06",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'evaluate the negation of -100'"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"examples[0].inputs[\"question\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d2627125",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For this dataset, the outputs are the conversation that followed the question in OpenAI message format:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "af5b9191",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'role': 'assistant',\n",
|
||||
" 'content': None,\n",
|
||||
" 'tool_calls': [{'id': 'toolu_01HTpq4cYNUac6F7omUc2Wz3',\n",
|
||||
" 'type': 'function',\n",
|
||||
" 'function': {'name': 'negate', 'arguments': '{\"a\": -100}'}}]},\n",
|
||||
" {'role': 'tool',\n",
|
||||
" 'content': '-100.0',\n",
|
||||
" 'tool_call_id': 'toolu_01HTpq4cYNUac6F7omUc2Wz3'},\n",
|
||||
" {'role': 'assistant', 'content': 'So the answer is 100.'},\n",
|
||||
" {'role': 'user',\n",
|
||||
" 'content': '100 is incorrect. Please refer to the output of your tool call.'},\n",
|
||||
" {'role': 'assistant',\n",
|
||||
" 'content': [{'text': \"You're right, my previous answer was incorrect. Let me re-evaluate using the tool output:\",\n",
|
||||
" 'type': 'text'}],\n",
|
||||
" 'tool_calls': [{'id': 'toolu_01XsJQboYghGDygQpPjJkeRq',\n",
|
||||
" 'type': 'function',\n",
|
||||
" 'function': {'name': 'negate', 'arguments': '{\"a\": -100}'}}]},\n",
|
||||
" {'role': 'tool',\n",
|
||||
" 'content': '-100.0',\n",
|
||||
" 'tool_call_id': 'toolu_01XsJQboYghGDygQpPjJkeRq'},\n",
|
||||
" {'role': 'assistant', 'content': 'The answer is -100.0'},\n",
|
||||
" {'role': 'user',\n",
|
||||
" 'content': 'You have the correct numerical answer but are returning additional text. Please only respond with the numerical answer.'},\n",
|
||||
" {'role': 'assistant', 'content': '-100.0'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"examples[0].outputs[\"conversation\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e852c8ef",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating dynamic few-shot prompts\n",
|
||||
"\n",
|
||||
"The search returns the examples whose inputs are most similar to the query input. We can use this for few-shot prompting a model like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "12cba1e1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import init_chat_model\n",
|
||||
"from langchain_benchmarks.tool_usage.tasks.multiverse_math import (\n",
|
||||
" add,\n",
|
||||
" cos,\n",
|
||||
" divide,\n",
|
||||
" log,\n",
|
||||
" multiply,\n",
|
||||
" negate,\n",
|
||||
" pi,\n",
|
||||
" power,\n",
|
||||
" sin,\n",
|
||||
" subtract,\n",
|
||||
")\n",
|
||||
"from langchain_core.runnables import RunnableLambda\n",
|
||||
"from langsmith import AsyncClient as AsyncLangSmith\n",
|
||||
"\n",
|
||||
"async_ls_client = AsyncLangSmith()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def similar_examples(input_: dict) -> dict:\n",
|
||||
" examples = ls_client.similar_examples(input_, limit=5, dataset_id=dataset_id)\n",
|
||||
" return {**input_, \"examples\": examples}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def asimilar_examples(input_: dict) -> dict:\n",
|
||||
" examples = await async_ls_client.similar_examples(\n",
|
||||
" input_, limit=5, dataset_id=dataset_id\n",
|
||||
" )\n",
|
||||
" return {**input_, \"examples\": examples}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def construct_prompt(input_: dict) -> list:\n",
|
||||
" instructions = \"\"\"You are great at using mathematical tools.\"\"\"\n",
|
||||
" examples = []\n",
|
||||
" for ex in input_[\"examples\"]:\n",
|
||||
" examples.append({\"role\": \"user\", \"content\": ex.inputs[\"question\"]})\n",
|
||||
" for msg in ex.outputs[\"conversation\"]:\n",
|
||||
" if msg[\"role\"] == \"assistant\":\n",
|
||||
" msg[\"name\"] = \"example_assistant\"\n",
|
||||
" if msg[\"role\"] == \"user\":\n",
|
||||
" msg[\"name\"] = \"example_user\"\n",
|
||||
" examples.append(msg)\n",
|
||||
" return [\n",
|
||||
" {\"role\": \"system\", \"content\": instructions},\n",
|
||||
" *examples,\n",
|
||||
" {\"role\": \"user\", \"content\": input_[\"question\"]},\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"tools = [add, cos, divide, log, multiply, negate, pi, power, sin, subtract]\n",
|
||||
"llm = init_chat_model(\"gpt-4o-2024-08-06\")\n",
|
||||
"llm_with_tools = llm.bind_tools(tools)\n",
|
||||
"\n",
|
||||
"example_selector = RunnableLambda(func=similar_examples, afunc=asimilar_examples)\n",
|
||||
"\n",
|
||||
"chain = example_selector | construct_prompt | llm_with_tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "c423b367",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'name': 'negate',\n",
|
||||
" 'args': {'a': 3},\n",
|
||||
" 'id': 'call_uMSdoTl6ehfHh5a6JQUb2NoZ',\n",
|
||||
" 'type': 'tool_call'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ai_msg = await chain.ainvoke({\"question\": \"whats the negation of the negation of 3\"})\n",
|
||||
"ai_msg.tool_calls"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "94489b4a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Looking at the LangSmith trace, we can see that relevant examples were pulled in in the `similar_examples` step and passed as messages to ChatOpenAI: https://smith.langchain.com/public/9585e30f-765a-4ed9-b964-2211420cd2f8/r/fdea98d6-e90f-49d4-ac22-dfd012e9e0d9."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "poetry-venv-311",
|
||||
"language": "python",
|
||||
"name": "poetry-venv-311"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -28,7 +28,7 @@
|
||||
"\n",
|
||||
"You can use arbitrary functions as [Runnables](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable). This is useful for formatting or when you need functionality not provided by other LangChain components, and custom functions used as Runnables are called [`RunnableLambdas`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableLambda.html).\n",
|
||||
"\n",
|
||||
"Note that all inputs to these functions need to be a SINGLE argument. If you have a function that accepts multiple arguments, you should write a wrapper that accepts a single dict input and unpacks it into multiple argument.\n",
|
||||
"Note that all inputs to these functions need to be a SINGLE argument. If you have a function that accepts multiple arguments, you should write a wrapper that accepts a single dict input and unpacks it into multiple arguments.\n",
|
||||
"\n",
|
||||
"This guide will cover:\n",
|
||||
"\n",
|
||||
|
||||
@@ -364,7 +364,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.graph_qa.cypher_utils import CypherQueryCorrector, Schema\n",
|
||||
"from langchain_community.chains.graph_qa.cypher_utils import (\n",
|
||||
" CypherQueryCorrector,\n",
|
||||
" Schema,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Cypher validation tool for relationship directions\n",
|
||||
"corrector_schema = [\n",
|
||||
|
||||
@@ -69,6 +69,7 @@ These are the core building blocks you can use when building applications.
|
||||
- [How to: select examples by semantic similarity](/docs/how_to/example_selectors_similarity)
|
||||
- [How to: select examples by semantic ngram overlap](/docs/how_to/example_selectors_ngram)
|
||||
- [How to: select examples by maximal marginal relevance](/docs/how_to/example_selectors_mmr)
|
||||
- [How to: select examples from LangSmith few-shot datasets](/docs/how_to/example_selectors_langsmith/)
|
||||
|
||||
### Chat models
|
||||
|
||||
@@ -315,6 +316,15 @@ For a high-level tutorial, check out [this guide](/docs/tutorials/graph/).
|
||||
- [How to: improve results with prompting](/docs/how_to/graph_prompting)
|
||||
- [How to: construct knowledge graphs](/docs/how_to/graph_constructing)
|
||||
|
||||
### Summarization
|
||||
|
||||
LLMs can summarize and otherwise distill desired information from text, including
|
||||
large volumes of text. For a high-level tutorial, check out [this guide](/docs/tutorials/summarization).
|
||||
|
||||
- [How to: summarize text in a single LLM call](/docs/how_to/summarize_stuff)
|
||||
- [How to: summarize text through parallelization](/docs/how_to/summarize_map_reduce)
|
||||
- [How to: summarize text through iterative refinement](/docs/how_to/summarize_refine)
|
||||
|
||||
## [LangGraph](https://langchain-ai.github.io/langgraph)
|
||||
|
||||
LangGraph is an extension of LangChain aimed at
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.globals import set_llm_cache\n",
|
||||
"from langchain_core.globals import set_llm_cache\n",
|
||||
"from langchain_openai import OpenAI\n",
|
||||
"\n",
|
||||
"# To make the caching really obvious, lets use a slower and older model.\n",
|
||||
@@ -71,7 +71,7 @@
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"from langchain.cache import InMemoryCache\n",
|
||||
"from langchain_core.caches import InMemoryCache\n",
|
||||
"\n",
|
||||
"set_llm_cache(InMemoryCache())\n",
|
||||
"\n",
|
||||
|
||||
@@ -13,9 +13,14 @@ the v1 namespace of Pydantic 2.
|
||||
Because Pydantic does not support mixing .v1 and .v2 objects, users should be aware of a number of issues
|
||||
when using LangChain with Pydantic.
|
||||
|
||||
:::caution
|
||||
While LangChain supports Pydantic V2 objects in some APIs (listed below), it's suggested that users keep using Pydantic V1 objects until LangChain 0.3 is released.
|
||||
:::
|
||||
|
||||
|
||||
## 1. Passing Pydantic objects to LangChain APIs
|
||||
|
||||
Most LangChain APIs that accept Pydantic objects have been updated to accept both Pydantic v1 and v2 objects.
|
||||
Most LangChain APIs for *tool usage* (see list below) have been updated to accept either Pydantic v1 or v2 objects.
|
||||
|
||||
* Pydantic v1 objects correspond to subclasses of `pydantic.BaseModel` if `pydantic 1` is installed or subclasses of `pydantic.v1.BaseModel` if `pydantic 2` is installed.
|
||||
* Pydantic v2 objects correspond to subclasses of `pydantic.BaseModel` if `pydantic 2` is installed.
|
||||
@@ -38,6 +43,7 @@ Partner packages that accept pydantic v2 objects via `bind_tools` or `with_struc
|
||||
| langchain-robocorp | Yes | >=0.0.10 |
|
||||
| langchain-openai | Yes | >=0.1.19 |
|
||||
| langchain-fireworks | Yes | >=0.1.5 |
|
||||
| langchain-aws | Yes | >=0.1.15 |
|
||||
|
||||
Additional partner packages will be updated to accept Pydantic v2 objects in the future.
|
||||
|
||||
@@ -169,4 +175,4 @@ If you need OpenAPI docs, your options are to either install Pydantic 1:
|
||||
or else to use the `APIHandler` object in LangChain to manually create the
|
||||
routes for your API.
|
||||
|
||||
See: https://python.langchain.com/v0.2/docs/langserve/#pydantic
|
||||
See: https://python.langchain.com/v0.2/docs/langserve/#pydantic
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%capture --no-stderr\n",
|
||||
"%pip install --upgrade --quiet langchain langchain-community langchain-chroma bs4"
|
||||
"%pip install --upgrade --quiet langchain langchain-community langchain-chroma beautifulsoup4"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -721,9 +721,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
|
||||
"from langgraph.checkpoint.memory import MemorySaver\n",
|
||||
"\n",
|
||||
"memory = SqliteSaver.from_conn_string(\":memory:\")\n",
|
||||
"memory = MemorySaver()\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(llm, tools, checkpointer=memory)"
|
||||
]
|
||||
@@ -890,9 +890,9 @@
|
||||
"from langchain_community.document_loaders import WebBaseLoader\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
|
||||
"from langgraph.checkpoint.memory import MemorySaver\n",
|
||||
"\n",
|
||||
"memory = SqliteSaver.from_conn_string(\":memory:\")\n",
|
||||
"memory = MemorySaver()\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
@@ -14,7 +14,9 @@
|
||||
"We will cover two approaches:\n",
|
||||
"\n",
|
||||
"1. Using the built-in [create_retrieval_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval.create_retrieval_chain.html), which returns sources by default;\n",
|
||||
"2. Using a simple [LCEL](/docs/concepts#langchain-expression-language-lcel) implementation, to show the operating principle."
|
||||
"2. Using a simple [LCEL](/docs/concepts#langchain-expression-language-lcel) implementation, to show the operating principle.\n",
|
||||
"\n",
|
||||
"We will also show how to structure sources into the model response, such that a model can report what specific sources it used in generating its answer."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -38,7 +40,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai langchain-chroma bs4"
|
||||
"%pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai langchain-chroma beautifulsoup4"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -130,8 +132,8 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "820244ae-74b4-4593-b392-822979dd91b8",
|
||||
"execution_count": null,
|
||||
"id": "24a69b8c-024e-4e34-b827-9c9de46512a3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -211,11 +213,11 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'What is Task Decomposition?',\n",
|
||||
" 'context': [Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content='Resources:\\n1. Internet access for searches and information gathering.\\n2. Long Term memory management.\\n3. GPT-3.5 powered Agents for delegation of simple tasks.\\n4. File output.\\n\\nPerformance Evaluation:\\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\\n2. Constructively self-criticize your big-picture behavior constantly.\\n3. Reflect on past decisions and strategies to refine your approach.\\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content=\"(3) Task execution: Expert models execute on the specific tasks and log results.\\nInstruction:\\n\\nWith the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.\", metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})],\n",
|
||||
" 'answer': 'Task decomposition involves breaking down a complex task into smaller and simpler steps. This process helps agents or models handle challenging tasks by dividing them into more manageable subtasks. Techniques like Chain of Thought and Tree of Thoughts are used to decompose tasks into multiple steps for better problem-solving.'}"
|
||||
" 'context': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Resources:\\n1. Internet access for searches and information gathering.\\n2. Long Term memory management.\\n3. GPT-3.5 powered Agents for delegation of simple tasks.\\n4. File output.\\n\\nPerformance Evaluation:\\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\\n2. Constructively self-criticize your big-picture behavior constantly.\\n3. Reflect on past decisions and strategies to refine your approach.\\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content=\"(3) Task execution: Expert models execute on the specific tasks and log results.\\nInstruction:\\n\\nWith the input and the inference results, the AI assistant needs to describe the process and results. The previous stages can be formed as - User Input: {{ User Input }}, Task Planning: {{ Tasks }}, Model Selection: {{ Model Assignment }}, Task Execution: {{ Predictions }}. You must first answer the user's request in a straightforward manner. Then describe the task process and show your analysis and model inference results to the user in the first person. If inference results contain a file path, must tell the user the complete file path.\")],\n",
|
||||
" 'answer': 'Task decomposition involves breaking down a complex task into smaller and more manageable steps. This process helps agents or models tackle difficult tasks by dividing them into simpler subtasks or components. Task decomposition can be achieved through techniques like Chain of Thought or Tree of Thoughts, which guide the agent in breaking down tasks into sequential or branching steps.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
@@ -251,18 +253,18 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "22ea137c-1a7a-44dd-ac73-281213979957",
|
||||
"id": "1950953a-e6f1-439d-b7b9-c3bd456e388d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': 'What is Task Decomposition',\n",
|
||||
" 'context': [Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content='The AI assistant can parse user input to several tasks: [{\"task\": task, \"id\", task_id, \"dep\": dependency_task_ids, \"args\": {\"text\": text, \"image\": URL, \"audio\": URL, \"video\": URL}}]. The \"dep\" field denotes the id of the previous task which generates a new resource that the current task relies on. A special tag \"-task_id\" refers to the generated text image, audio and video in the dependency task with id as task_id. The task MUST be selected from the following options: {{ Available Task List }}. There is a logical relationship between tasks, please note their order. If the user input can\\'t be parsed, you need to reply empty JSON. Here are several cases for your reference: {{ Demonstrations }}. The chat history is recorded as {{ Chat History }}. From this chat history, you can find the path of the user-mentioned resources for your task planning.', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}),\n",
|
||||
" Document(page_content='Fig. 11. Illustration of how HuggingGPT works. (Image source: Shen et al. 2023)\\nThe system comprises of 4 stages:\\n(1) Task planning: LLM works as the brain and parses the user requests into multiple tasks. There are four attributes associated with each task: task type, ID, dependencies, and arguments. They use few-shot examples to guide LLM to do task parsing and planning.\\nInstruction:', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'})],\n",
|
||||
" 'answer': 'Task decomposition involves breaking down complex tasks into smaller and simpler steps to make them more manageable for autonomous agents or models. This process can be achieved by techniques like Chain of Thought (CoT) or Tree of Thoughts, which guide the model to think step by step or explore multiple reasoning possibilities at each step. Task decomposition can be done through simple prompting with language models, task-specific instructions, or human inputs.'}"
|
||||
" 'context': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\\nComponent One: Planning#\\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\\nTask Decomposition#\\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\\nTask decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\", \"What are the subgoals for achieving XYZ?\", (2) by using task-specific instructions; e.g. \"Write a story outline.\" for writing a novel, or (3) with human inputs.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='The AI assistant can parse user input to several tasks: [{\"task\": task, \"id\", task_id, \"dep\": dependency_task_ids, \"args\": {\"text\": text, \"image\": URL, \"audio\": URL, \"video\": URL}}]. The \"dep\" field denotes the id of the previous task which generates a new resource that the current task relies on. A special tag \"-task_id\" refers to the generated text image, audio and video in the dependency task with id as task_id. The task MUST be selected from the following options: {{ Available Task List }}. There is a logical relationship between tasks, please note their order. If the user input can\\'t be parsed, you need to reply empty JSON. Here are several cases for your reference: {{ Demonstrations }}. The chat history is recorded as {{ Chat History }}. From this chat history, you can find the path of the user-mentioned resources for your task planning.'),\n",
|
||||
" Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 11. Illustration of how HuggingGPT works. (Image source: Shen et al. 2023)\\nThe system comprises of 4 stages:\\n(1) Task planning: LLM works as the brain and parses the user requests into multiple tasks. There are four attributes associated with each task: task type, ID, dependencies, and arguments. They use few-shot examples to guide LLM to do task parsing and planning.\\nInstruction:')],\n",
|
||||
" 'answer': 'Task decomposition is a technique used in artificial intelligence to break down complex tasks into smaller and more manageable subtasks. This approach helps agents or models to tackle difficult problems by dividing them into simpler steps, improving performance and interpretability. Different methods like Chain of Thought and Tree of Thoughts have been developed to enhance task decomposition in AI systems.'}"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
@@ -279,15 +281,25 @@
|
||||
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# This Runnable takes a dict with keys 'input' and 'context',\n",
|
||||
"# formats them into a prompt, and generates a response.\n",
|
||||
"rag_chain_from_docs = (\n",
|
||||
" RunnablePassthrough.assign(context=(lambda x: format_docs(x[\"context\"])))\n",
|
||||
" | prompt\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
" {\n",
|
||||
" \"input\": lambda x: x[\"input\"], # input query\n",
|
||||
" \"context\": lambda x: format_docs(x[\"context\"]), # context\n",
|
||||
" }\n",
|
||||
" | prompt # format query and context into prompt\n",
|
||||
" | llm # generate response\n",
|
||||
" | StrOutputParser() # coerce to string\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Pass input query to retriever\n",
|
||||
"retrieve_docs = (lambda x: x[\"input\"]) | retriever\n",
|
||||
"\n",
|
||||
"# Below, we chain `.assign` calls. This takes a dict and successively\n",
|
||||
"# adds keys-- \"context\" and \"answer\"-- where the value for each key\n",
|
||||
"# is determined by a Runnable. The Runnable operates on all existing\n",
|
||||
"# keys in the dict.\n",
|
||||
"chain = RunnablePassthrough.assign(context=retrieve_docs).assign(\n",
|
||||
" answer=rag_chain_from_docs\n",
|
||||
")\n",
|
||||
@@ -302,7 +314,105 @@
|
||||
"source": [
|
||||
":::{.callout-tip}\n",
|
||||
"\n",
|
||||
"Check out the [LangSmith trace](https://smith.langchain.com/public/0cb42685-e29e-4280-a503-bef2014d7ba2/r)\n",
|
||||
"Check out the [LangSmith trace](https://smith.langchain.com/public/1c055a3b-0236-4670-a3fb-023d418ba796/r)\n",
|
||||
"\n",
|
||||
":::"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c1c17797-d965-4fd2-b8d4-d386f25dd352",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Structure sources in model response\n",
|
||||
"\n",
|
||||
"Up to this point, we've simply propagated the documents returned from the retrieval step through to the final response. But this may not illustrate what subset of information the model relied on when generating its answer. Below, we show how to structure sources into the model response, allowing the model to report what specific context it relied on for its answer.\n",
|
||||
"\n",
|
||||
"Because the above LCEL implementation is composed of [Runnable](/docs/concepts/#runnable-interface) primitives, it is straightforward to extend. Below, we make a simple change:\n",
|
||||
"\n",
|
||||
"- We use the model's tool-calling features to generate [structured output](/docs/how_to/structured_output/), consisting of an answer and list of sources. The schema for the response is represented in the `AnswerWithSources` TypedDict, below.\n",
|
||||
"- We remove the `StrOutputParser()`, as we expect `dict` output in this scenario."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "8f916b14-1b0a-4975-a62f-52f1353bde15",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from typing_extensions import Annotated, TypedDict\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Desired schema for response\n",
|
||||
"class AnswerWithSources(TypedDict):\n",
|
||||
" \"\"\"An answer to the question, with sources.\"\"\"\n",
|
||||
"\n",
|
||||
" answer: str\n",
|
||||
" sources: Annotated[\n",
|
||||
" List[str],\n",
|
||||
" ...,\n",
|
||||
" \"List of sources (author + year) used to answer the question\",\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Our rag_chain_from_docs has the following changes:\n",
|
||||
"# - add `.with_structured_output` to the LLM;\n",
|
||||
"# - remove the output parser\n",
|
||||
"rag_chain_from_docs = (\n",
|
||||
" {\n",
|
||||
" \"input\": lambda x: x[\"input\"],\n",
|
||||
" \"context\": lambda x: format_docs(x[\"context\"]),\n",
|
||||
" }\n",
|
||||
" | prompt\n",
|
||||
" | llm.with_structured_output(AnswerWithSources)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"retrieve_docs = (lambda x: x[\"input\"]) | retriever\n",
|
||||
"\n",
|
||||
"chain = RunnablePassthrough.assign(context=retrieve_docs).assign(\n",
|
||||
" answer=rag_chain_from_docs\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"response = chain.invoke({\"input\": \"What is Chain of Thought?\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "7a8fc0c5-afb3-4012-a467-3951996a6850",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\n",
|
||||
" \"answer\": \"Chain of Thought (CoT) is a prompting technique that enhances model performance on complex tasks by instructing the model to \\\"think step by step\\\" to decompose hard tasks into smaller and simpler steps. It transforms big tasks into multiple manageable tasks and sheds light on the interpretation of the model's thinking process.\",\n",
|
||||
" \"sources\": [\n",
|
||||
" \"Wei et al. 2022\"\n",
|
||||
" ]\n",
|
||||
"}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"print(json.dumps(response[\"answer\"], indent=2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7440f785-29c5-4c6b-9656-0d9d5efbac05",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
":::{.callout-tip}\n",
|
||||
"\n",
|
||||
"View [LangSmith trace](https://smith.langchain.com/public/0eeddf06-3a7b-4f27-974c-310ca8160f60/r)\n",
|
||||
"\n",
|
||||
":::"
|
||||
]
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai langchain-chroma bs4"
|
||||
"%pip install --upgrade --quiet langchain langchain-community langchainhub langchain-openai langchain-chroma beautifulsoup4"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -38,8 +38,8 @@
|
||||
" Operator,\n",
|
||||
" StructuredQuery,\n",
|
||||
")\n",
|
||||
"from langchain.retrievers.self_query.chroma import ChromaTranslator\n",
|
||||
"from langchain.retrievers.self_query.elasticsearch import ElasticsearchTranslator\n",
|
||||
"from langchain_community.query_constructors.chroma import ChromaTranslator\n",
|
||||
"from langchain_community.query_constructors.elasticsearch import ElasticsearchTranslator\n",
|
||||
"from langchain_core.pydantic_v1 import BaseModel"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -512,7 +512,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.retrievers.self_query.chroma import ChromaTranslator\n",
|
||||
"from langchain_community.query_constructors.chroma import ChromaTranslator\n",
|
||||
"\n",
|
||||
"retriever = SelfQueryRetriever(\n",
|
||||
" query_constructor=query_constructor,\n",
|
||||
|
||||
@@ -299,16 +299,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "423c6e099e94ca69",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": [
|
||||
"### Gradient\n",
|
||||
"\n",
|
||||
"In this method, the gradient of distance is used to split chunks along with the percentile method.\n",
|
||||
"This method is useful when chunks are highly correlated with each other or specific to a domain e.g. legal or medical. The idea is to apply anomaly detection on gradient array so that the distribution become wider and easy to identify boundaries in highly semantic data."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "423c6e099e94ca69"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -325,6 +325,8 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "e9f393d316ce1f6c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -337,13 +339,13 @@
|
||||
"source": [
|
||||
"docs = text_splitter.create_documents([state_of_the_union])\n",
|
||||
"print(docs[0].page_content)"
|
||||
],
|
||||
"metadata": {},
|
||||
"id": "e9f393d316ce1f6c"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "a407cd57f02a0db4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@@ -355,9 +357,7 @@
|
||||
],
|
||||
"source": [
|
||||
"print(len(docs))"
|
||||
],
|
||||
"metadata": {},
|
||||
"id": "a407cd57f02a0db4"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -761,7 +761,7 @@
|
||||
"* [SQL tutorial](/docs/tutorials/sql_qa): Many of the challenges of working with SQL db's and CSV's are generic to any structured data type, so it's useful to read the SQL techniques even if you're using Pandas for CSV data analysis.\n",
|
||||
"* [Tool use](/docs/how_to/tool_calling): Guides on general best practices when working with chains and agents that invoke tools\n",
|
||||
"* [Agents](/docs/tutorials/agents): Understand the fundamentals of building LLM agents.\n",
|
||||
"* Integrations: Sandboxed envs like [E2B](/docs/integrations/tools/e2b_data_analysis) and [Bearly](/docs/integrations/tools/bearly), utilities like [SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), related agents like [Spark DataFrame agent](/docs/integrations/toolkits/spark)."
|
||||
"* Integrations: Sandboxed envs like [E2B](/docs/integrations/tools/e2b_data_analysis) and [Bearly](/docs/integrations/tools/bearly), utilities like [SQLDatabase](https://api.python.langchain.com/en/latest/utilities/langchain_community.utilities.sql_database.SQLDatabase.html#langchain_community.utilities.sql_database.SQLDatabase), related agents like [Spark DataFrame agent](/docs/integrations/tools/spark_sql)."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
449
docs/docs/how_to/summarize_map_reduce.ipynb
Normal file
449
docs/docs/how_to/summarize_map_reduce.ipynb
Normal file
File diff suppressed because one or more lines are too long
333
docs/docs/how_to/summarize_refine.ipynb
Normal file
333
docs/docs/how_to/summarize_refine.ipynb
Normal file
File diff suppressed because one or more lines are too long
209
docs/docs/how_to/summarize_stuff.ipynb
Normal file
209
docs/docs/how_to/summarize_stuff.ipynb
Normal file
@@ -0,0 +1,209 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c47f5b2f-e14c-43e7-a0ab-d71562636624",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_position: 3\n",
|
||||
"keywords: [summarize, summarization, stuff, create_stuff_documents_chain]\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "682a4f53-27db-43ef-a909-dd9ded76051b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# How to summarize text in a single LLM call\n",
|
||||
"\n",
|
||||
"LLMs can summarize and otherwise distill desired information from text, including large volumes of text. In many cases, especially for models with larger context windows, this can be adequately achieved via a single LLM call.\n",
|
||||
"\n",
|
||||
"LangChain implements a simple [pre-built chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) that \"stuffs\" a prompt with the desired context for summarization and other purposes. In this guide we demonstrate how to use the chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4aa52e84-d1b5-4b33-b4c4-541156686ef3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load chat model\n",
|
||||
"\n",
|
||||
"Let's first load a chat model:\n",
|
||||
"```{=mdx}\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs\n",
|
||||
" customVarName=\"llm\"\n",
|
||||
"/>\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e5f426fc-cea6-4351-8931-1e422d3c8b69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-4o-mini\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b137fe82-0a53-4910-b53e-b87a297f329d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load documents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a81dc91d-ae72-4996-b809-d4a9050e815e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we need some documents to summarize. Below, we generate some toy documents for illustrative purposes. See the document loader [how-to guides](/docs/how_to/#document-loaders) and [integration pages](/docs/integrations/document_loaders/) for additional sources of data. The [summarization tutorial](/docs/tutorials/summarization) also includes an example summarizing a blog post."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "27c8fed0-b2d7-4549-a086-f5ee657efc41",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"documents = [\n",
|
||||
" Document(page_content=\"Apples are red\", metadata={\"title\": \"apple_book\"}),\n",
|
||||
" Document(page_content=\"Blueberries are blue\", metadata={\"title\": \"blueberry_book\"}),\n",
|
||||
" Document(page_content=\"Bananas are yelow\", metadata={\"title\": \"banana_book\"}),\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "84216044-6f1e-4b90-b4fa-29ec305abf51",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load chain\n",
|
||||
"\n",
|
||||
"Below, we define a simple prompt and instantiate the chain with our chat model and documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "669afa40-2708-4fa1-841e-c74a67bd9175",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.combine_documents import create_stuff_documents_chain\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(\"Summarize this content: {context}\")\n",
|
||||
"chain = create_stuff_documents_chain(llm, prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "74f3e276-f003-4112-ba14-c6952076c4f8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invoke chain\n",
|
||||
"\n",
|
||||
"Because the chain is a [Runnable](/docs/concepts/#runnable-interface), it implements the usual methods for invocation:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "0701bb7d-fbc6-497e-a577-25d56e6e43c6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'The content describes the colors of three fruits: apples are red, blueberries are blue, and bananas are yellow.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = chain.invoke({\"context\": documents})\n",
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "14fb5647-1458-43af-afb7-5aae7b8cab1d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Streaming\n",
|
||||
"\n",
|
||||
"Note that the chain also supports streaming of individual output tokens:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "0d7a5f67-2ec8-4f90-b085-2969fcb14dce",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"|The| content| describes| the| colors| of| three| fruits|:| apples| are| red|,| blueberries| are| blue|,| and| bananas| are| yellow|.||"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for chunk in chain.stream({\"context\": documents}):\n",
|
||||
" print(chunk, end=\"|\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f15c225a-db1d-48cf-b135-f588e7d615e6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"See the summarization [how-to guides](/docs/how_to/#summarization) for additional summarization strategies, including those designed for larger volumes of text.\n",
|
||||
"\n",
|
||||
"See also [this tutorial](/docs/tutorials/summarization) for more detail on summarization."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -5,7 +5,6 @@ sidebar_position: 3
|
||||
|
||||
|
||||
Toolkits are collections of tools that are designed to be used together for specific tasks. They have convenient loading methods.
|
||||
For a complete list of available ready-made toolkits, visit [Integrations](/docs/integrations/toolkits/).
|
||||
|
||||
All Toolkits expose a `get_tools` method which returns a list of tools.
|
||||
You can therefore do:
|
||||
|
||||
@@ -196,8 +196,6 @@
|
||||
"\n",
|
||||
"Toolkits are collections of tools that are designed to be used together for specific tasks. They have convenient loading methods.\n",
|
||||
"\n",
|
||||
"For a complete list of available ready-made toolkits, visit [Integrations](/docs/integrations/toolkits/).\n",
|
||||
"\n",
|
||||
"All Toolkits expose a `get_tools` method which returns a list of tools.\n",
|
||||
"\n",
|
||||
"You're usually meant to use them this way:\n",
|
||||
|
||||
@@ -37,7 +37,8 @@
|
||||
"%pip install --upgrade --quiet infinopy\n",
|
||||
"%pip install --upgrade --quiet matplotlib\n",
|
||||
"%pip install --upgrade --quiet tiktoken\n",
|
||||
"%pip install --upgrade --quiet langchain langchain-openai langchain-community"
|
||||
"%pip install --upgrade --quiet langchain langchain-openai langchain-community\n",
|
||||
"%pip install --upgrade --quiet beautifulsoup4"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -95,7 +95,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 1,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -223,34 +223,28 @@
|
||||
"id": "d1ee55bc-ffc8-4cfa-801c-993953a08cfd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ***Beta***: Bedrock Converse API\n",
|
||||
"## Bedrock Converse API\n",
|
||||
"\n",
|
||||
"AWS has recently recently the Bedrock Converse API which provides a unified conversational interface for Bedrock models. This API does not yet support custom models. You can see a list of all [models that are supported here](https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html). To improve reliability the ChatBedrock integration will switch to using the Bedrock Converse API as soon as it has feature parity with the existing Bedrock API. Until then a separate [ChatBedrockConverse](https://api.python.langchain.com/en/latest/chat_models/langchain_aws.chat_models.bedrock_converse.ChatBedrockConverse.html#langchain_aws.chat_models.bedrock_converse.ChatBedrockConverse) integration has been released in beta for users who do not need to use custom models.\n",
|
||||
"AWS has recently released the Bedrock Converse API which provides a unified conversational interface for Bedrock models. This API does not yet support custom models. You can see a list of all [models that are supported here](https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html). To improve reliability the ChatBedrock integration will switch to using the Bedrock Converse API as soon as it has feature parity with the existing Bedrock API. Until then a separate [ChatBedrockConverse](https://python.langchain.com/v0.2/api_reference/aws/chat_models/langchain_aws.chat_models.bedrock_converse.ChatBedrockConverse.html) integration has been released.\n",
|
||||
"\n",
|
||||
"We recommend using `ChatBedrockConverse` for users who do not need to use custom models.\n",
|
||||
"\n",
|
||||
"You can use it like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 3,
|
||||
"id": "ae728e59-94d4-40cf-9d24-25ad8723fc59",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/bagatur/langchain/libs/core/langchain_core/_api/beta_decorator.py:87: LangChainBetaWarning: The class `ChatBedrockConverse` is in beta. It is actively being worked on, so the API may change.\n",
|
||||
" warn_beta(\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Voici la traduction en français :\\n\\nJ'aime la programmation.\", response_metadata={'ResponseMetadata': {'RequestId': '122fb1c8-c3c5-4b06-941e-c95d210bfbc7', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Mon, 01 Jul 2024 21:48:25 GMT', 'content-type': 'application/json', 'content-length': '243', 'connection': 'keep-alive', 'x-amzn-requestid': '122fb1c8-c3c5-4b06-941e-c95d210bfbc7'}, 'RetryAttempts': 0}, 'stopReason': 'end_turn', 'metrics': {'latencyMs': 830}}, id='run-0e3df22f-fcd8-4fbb-a4fb-565227e7e430-0', usage_metadata={'input_tokens': 29, 'output_tokens': 21, 'total_tokens': 50})"
|
||||
"AIMessage(content=\"Voici la traduction en français :\\n\\nJ'aime la programmation.\", response_metadata={'ResponseMetadata': {'RequestId': '4fcbfbe9-f916-4df2-b0bd-ea1147b550aa', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Wed, 21 Aug 2024 17:23:49 GMT', 'content-type': 'application/json', 'content-length': '243', 'connection': 'keep-alive', 'x-amzn-requestid': '4fcbfbe9-f916-4df2-b0bd-ea1147b550aa'}, 'RetryAttempts': 0}, 'stopReason': 'end_turn', 'metrics': {'latencyMs': 672}}, id='run-77ee9810-e32b-45dc-9ccb-6692253b1f45-0', usage_metadata={'input_tokens': 29, 'output_tokens': 21, 'total_tokens': 50})"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -268,6 +262,87 @@
|
||||
"llm.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4da16f3e-e80b-48c0-8036-c1cc5f7c8c05",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Streaming\n",
|
||||
"\n",
|
||||
"Note that `ChatBedrockConverse` emits content blocks while streaming:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "7794b32e-d8de-4973-bf0f-39807dc745f0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"content=[] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': 'Vo', 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': 'ici', 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': ' la', 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': ' tra', 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': 'duction', 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': ' en', 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': ' français', 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': ' :', 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': '\\n\\nJ', 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': \"'\", 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': 'a', 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': 'ime', 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': ' la', 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': ' programm', 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': 'ation', 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'type': 'text', 'text': '.', 'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[{'index': 0}] id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[] response_metadata={'stopReason': 'end_turn'} id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8'\n",
|
||||
"content=[] response_metadata={'metrics': {'latencyMs': 713}} id='run-2c92c5af-d771-4cc2-98d9-c11bbd30a1d8' usage_metadata={'input_tokens': 29, 'output_tokens': 21, 'total_tokens': 50}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for chunk in llm.stream(messages):\n",
|
||||
" print(chunk)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0ef05abb-9c04-4dc3-995e-f857779644d5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"An output parser can be used to filter to text, if desired:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "2a4e743f-ea7d-4e5a-9b12-f9992362de8b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"|Vo|ici| la| tra|duction| en| français| :|\n",
|
||||
"\n",
|
||||
"J|'|a|ime| la| programm|ation|.||||"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"\n",
|
||||
"chain = llm | StrOutputParser()\n",
|
||||
"\n",
|
||||
"for chunk in chain.stream(messages):\n",
|
||||
" print(chunk, end=\"|\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3",
|
||||
@@ -297,7 +372,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ChatDatabricks](https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.databricks.ChatDatabricks.html) | [langchain-community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ❌ | beta |  |  |\n",
|
||||
"| [ChatDatabricks](https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.databricks.ChatDatabricks.html) | [langchain-databricks](https://api.python.langchain.com/en/latest/databricks_api_reference.html) | ❌ | beta |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling/) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
@@ -99,7 +99,7 @@
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Databricks integration lives in the `langchain-community` package. Also, `mlflow >= 2.9 ` is required to run the code in this notebook."
|
||||
"The LangChain Databricks integration lives in the `langchain-databricks` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -108,7 +108,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community mlflow>=2.9.0"
|
||||
"%pip install -qU langchain-databricks"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -133,7 +133,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models import ChatDatabricks\n",
|
||||
"from langchain_databricks import ChatDatabricks\n",
|
||||
"\n",
|
||||
"chat_model = ChatDatabricks(\n",
|
||||
" endpoint=\"databricks-dbrx-instruct\",\n",
|
||||
@@ -245,9 +245,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation (streaming)\n",
|
||||
"\n",
|
||||
"`ChatDatabricks` supports streaming response by `stream` method since `langchain-community>=0.2.1`."
|
||||
"## Invocation (streaming)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -299,7 +297,7 @@
|
||||
"* An LLM was registered and deployed to [a Databricks serving endpoint](https://docs.databricks.com/machine-learning/model-serving/index.html) via MLflow. The endpoint must have OpenAI-compatible chat input/output format ([reference](https://mlflow.org/docs/latest/llms/deployments/index.html#chat))\n",
|
||||
"* You have [\"Can Query\" permission](https://docs.databricks.com/security/auth-authz/access-control/serving-endpoint-acl.html) to the endpoint.\n",
|
||||
"\n",
|
||||
"Once the endpoint is ready, the usage pattern is completely same as Foundation Models."
|
||||
"Once the endpoint is ready, the usage pattern is identical to that of Foundation Models."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -332,7 +330,7 @@
|
||||
"\n",
|
||||
"First, create a new Databricks serving endpoint that proxies requests to the target external model. The endpoint creation should be fairy quick for proxying external models.\n",
|
||||
"\n",
|
||||
"This requires registering OpenAI API Key in Databricks secret manager with the following comment:\n",
|
||||
"This requires registering your OpenAI API Key within the Databricks secret manager as follows:\n",
|
||||
"```sh\n",
|
||||
"# Replace `<scope>` with your scope\n",
|
||||
"databricks secrets create-scope <scope>\n",
|
||||
@@ -417,8 +415,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models.databricks import ChatDatabricks\n",
|
||||
"\n",
|
||||
"llm = ChatDatabricks(endpoint=\"databricks-meta-llama-3-70b-instruct\")\n",
|
||||
"tools = [\n",
|
||||
" {\n",
|
||||
@@ -461,7 +457,7 @@
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ChatDatabricks features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.ChatDatabricks.html"
|
||||
"For detailed documentation of all ChatDatabricks features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_databricks.chat_models.ChatDatabricks.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -1,29 +1,15 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Hugging Face\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ChatHuggingFace\n",
|
||||
"\n",
|
||||
"This will help you getting started with `langchain_huggingface` [chat models](/docs/concepts/#chat-models). For detailed documentation of all `ChatHuggingFace` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_huggingface.chat_models.huggingface.ChatHuggingFace.html). For a list of models supported by Hugging Face check out [this page](https://huggingface.co/models).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"This notebook shows how to get started using Hugging Face LLMs as chat models.\n",
|
||||
"\n",
|
||||
"In particular, we will:\n",
|
||||
"1. Utilize the [HuggingFaceEndpoint](https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/llms/huggingface_endpoint.py) integrations to instantiate an LLM.\n",
|
||||
"2. Utilize the `ChatHuggingFace` class to enable any of these LLMs to interface with LangChain's [Chat Messages](/docs/concepts/#message-types) abstraction.\n",
|
||||
"3. Explore tool calling with the `ChatHuggingFace`.\n",
|
||||
"4. Demonstrate how to use an open-source LLM to power an `ChatAgent` pipeline\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
@@ -64,7 +50,22 @@
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Below we install additional packages as well for demonstration purposes:"
|
||||
"| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ChatHuggingFace](https://api.python.langchain.com/en/latest/chat_models/langchain_huggingface.chat_models.huggingface.ChatHuggingFace.html) | [langchain_huggingface](https://api.python.langchain.com/en/latest/huggingface_api_reference.html) | ✅ | ❌ | ❌ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access `langchain_huggingface` models you'll need to create a/an `Hugging Face` account, get an API key, and install the `langchain_huggingface` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"You'll need to have a [Hugging Face Access Token](https://huggingface.co/docs/hub/security-tokens) saved as an environment variable: `HUGGINGFACEHUB_API_TOKEN`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -73,14 +74,41 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-huggingface text-generation transformers google-search-results numexpr langchainhub sentencepiece jinja2"
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = getpass.getpass(\n",
|
||||
" \"Enter your Hugging Face API key: \"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.1.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-huggingface text-generation transformers google-search-results numexpr langchainhub sentencepiece jinja2 bitsandbytes accelerate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation"
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"You can instantiate a `ChatHuggingFace` model in two different ways, either from a `HuggingFaceEndpoint` or from a `HuggingFacePipeline`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -92,19 +120,32 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
|
||||
"Token is valid (permission: fineGrained).\n",
|
||||
"Your token has been saved to /Users/isaachershenson/.cache/huggingface/token\n",
|
||||
"Login successful\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_huggingface import HuggingFaceEndpoint\n",
|
||||
"from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint\n",
|
||||
"\n",
|
||||
"llm = HuggingFaceEndpoint(\n",
|
||||
" repo_id=\"meta-llama/Meta-Llama-3-70B-Instruct\",\n",
|
||||
" repo_id=\"HuggingFaceH4/zephyr-7b-beta\",\n",
|
||||
" task=\"text-generation\",\n",
|
||||
" max_new_tokens=512,\n",
|
||||
" do_sample=False,\n",
|
||||
" repetition_penalty=1.03,\n",
|
||||
")"
|
||||
")\n",
|
||||
"\n",
|
||||
"chat_model = ChatHuggingFace(llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -116,11 +157,194 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "da32ae8ec8864ccfb480044fe2eec065",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"config.json: 0%| | 0.00/638 [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "ee1891b7e5f64fba88ba35f444e598fb",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model.safetensors.index.json: 0%| | 0.00/23.9k [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "9ff1ec7f575b42adb608c15955de7888",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Downloading shards: 0%| | 0/8 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "5214696698814b919f561647a684d1e4",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00001-of-00008.safetensors: 0%| | 0.00/1.89G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "9ac334c69a2048a0a77340cca44d8c80",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00002-of-00008.safetensors: 0%| | 0.00/1.95G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "465ad1a51d414e0daf1cd9308455be94",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00003-of-00008.safetensors: 0%| | 0.00/1.98G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "a329c43c3d574df0afd38c7457cc639c",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00004-of-00008.safetensors: 0%| | 0.00/1.95G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "a736a6c4023542af8c6ecc232b823d18",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00005-of-00008.safetensors: 0%| | 0.00/1.98G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "8bdee70b843d433e8236fff83ecda022",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00006-of-00008.safetensors: 0%| | 0.00/1.95G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "5ecb6103e0304ae188a14d598119a361",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00007-of-00008.safetensors: 0%| | 0.00/1.98G [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "174e3cb487bd453c9c70d7614254a35e",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"model-00008-of-00008.safetensors: 0%| | 0.00/816M [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "28f8c233b04b45d7800e12c785a8c4bc",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"Loading checkpoint shards: 0%| | 0/8 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "449dfa023dc8430fbcde94544ba01c4f",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
"generation_config.json: 0%| | 0.00/111 [00:00<?, ?B/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_huggingface import HuggingFacePipeline\n",
|
||||
"from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline\n",
|
||||
"\n",
|
||||
"llm = HuggingFacePipeline.from_model_id(\n",
|
||||
" model_id=\"HuggingFaceH4/zephyr-7b-beta\",\n",
|
||||
@@ -129,8 +353,34 @@
|
||||
" max_new_tokens=512,\n",
|
||||
" do_sample=False,\n",
|
||||
" repetition_penalty=1.03,\n",
|
||||
" return_full_text=False,\n",
|
||||
" ),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chat_model = ChatHuggingFace(llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Instatiating with Quantization\n",
|
||||
"\n",
|
||||
"To run a quantized version of your model, you can specify a `bitsandbytes` quantization config as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from transformers import BitsAndBytesConfig\n",
|
||||
"\n",
|
||||
"quantization_config = BitsAndBytesConfig(\n",
|
||||
" load_in_4bit=True,\n",
|
||||
" bnb_4bit_quant_type=\"nf4\",\n",
|
||||
" bnb_4bit_compute_dtype=\"float16\",\n",
|
||||
" bnb_4bit_use_double_quant=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -138,30 +388,27 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To run a quantized version, you might specify a `bitsandbytes` quantization config as follows:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"from transformers import BitsAndBytesConfig\n",
|
||||
"\n",
|
||||
"quantization_config = BitsAndBytesConfig(\n",
|
||||
" load_in_4bit=True,\n",
|
||||
" bnb_4bit_quant_type=\"nf4\",\n",
|
||||
" bnb_4bit_compute_dtype=\"float16\",\n",
|
||||
" bnb_4bit_use_double_quant=True\n",
|
||||
")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"and pass it to the `HuggingFacePipeline` as a part of its `model_kwargs`:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"pipeline = HuggingFacePipeline(\n",
|
||||
" ...\n",
|
||||
"\n",
|
||||
"and pass it to the `HuggingFacePipeline` as a part of its `model_kwargs`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = HuggingFacePipeline.from_model_id(\n",
|
||||
" model_id=\"HuggingFaceH4/zephyr-7b-beta\",\n",
|
||||
" task=\"text-generation\",\n",
|
||||
" pipeline_kwargs=dict(\n",
|
||||
" max_new_tokens=512,\n",
|
||||
" do_sample=False,\n",
|
||||
" repetition_penalty=1.03,\n",
|
||||
" ),\n",
|
||||
" model_kwargs={\"quantization_config\": quantization_config},\n",
|
||||
" \n",
|
||||
" ...\n",
|
||||
")\n",
|
||||
"```"
|
||||
"\n",
|
||||
"chat_model = ChatHuggingFace(llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -171,34 +418,16 @@
|
||||
"## Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Instantiate the chat model and some messages to pass. \n",
|
||||
"\n",
|
||||
"**Note**: you need to pass the `model_id` explicitly if you are using self-hosted `text-generation-inference`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.messages import (\n",
|
||||
" HumanMessage,\n",
|
||||
" SystemMessage,\n",
|
||||
")\n",
|
||||
"from langchain_huggingface import ChatHuggingFace\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" SystemMessage(content=\"You're a helpful assistant\"),\n",
|
||||
@@ -207,343 +436,35 @@
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"chat_model = ChatHuggingFace(llm=llm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Check the `model_id`"
|
||||
"ai_msg = chat_model.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'meta-llama/Meta-Llama-3-70B-Instruct'"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat_model.model_id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Inspect how the chat messages are formatted for the LLM call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\\n\\nYou're a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\nWhat happens when an unstoppable force meets an immovable object?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n\""
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat_model._to_chat_prompt(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Call the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"One of the classic thought experiments in physics!\n",
|
||||
"According to the popular phrase and hypothetical scenario, when an unstoppable force meets an immovable object, a paradoxical situation arises as both forces are seemingly contradictory. On one hand, an unstoppable force is an entity that cannot be stopped or prevented from moving forward, while on the other hand, an immovable object is something that cannot be moved or displaced from its position. \n",
|
||||
"\n",
|
||||
"The concept of an unstoppable force meeting an immovable object is a paradox that has puzzled philosophers and physicists for centuries. It's a mind-bending scenario that challenges our understanding of the fundamental laws of physics.\n",
|
||||
"\n",
|
||||
"In essence, an unstoppable force is something that cannot be halted or slowed down, while an immovable object is something that cannot be moved or displaced. If we assume that both entities exist in the same universe, we run into a logical contradiction.\n",
|
||||
"\n",
|
||||
"Here\n"
|
||||
"In this scenario, it is un\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"res = chat_model.invoke(messages)\n",
|
||||
"print(res.content)"
|
||||
"print(ai_msg.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate(\n",
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | llm\n",
|
||||
"chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"input_language\": \"English\",\n",
|
||||
" \"output_language\": \"German\",\n",
|
||||
" \"input\": \"I love programming.\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tool calling with `ChatHuggingFace`\n",
|
||||
"\n",
|
||||
"`text-generation-inference` supports tool with open source LLMs starting from v2.0.1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create a basic tool (`Calculator`):"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Calculator(BaseModel):\n",
|
||||
" \"\"\"Multiply two integers together.\"\"\"\n",
|
||||
"\n",
|
||||
" a: int = Field(..., description=\"First integer\")\n",
|
||||
" b: int = Field(..., description=\"Second integer\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Bind the tool to the `chat_model` and give it a try:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Calculator(a=3, b=12)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.output_parsers.openai_tools import PydanticToolsParser\n",
|
||||
"\n",
|
||||
"llm_with_multiply = chat_model.bind_tools([Calculator], tool_choice=\"auto\")\n",
|
||||
"parser = PydanticToolsParser(tools=[Calculator])\n",
|
||||
"tool_chain = llm_with_multiply | parser\n",
|
||||
"tool_chain.invoke(\"How much is 3 multiplied by 12?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use with agents\n",
|
||||
"\n",
|
||||
"Here we'll test out `Zephyr-7B-beta` as a zero-shot `ReAct` Agent. \n",
|
||||
"\n",
|
||||
"The agent is based on the paper [ReAct: Synergizing Reasoning and Acting in Language Models](https://arxiv.org/abs/2210.03629)\n",
|
||||
"\n",
|
||||
"The example below is taken from [here](https://python.langchain.com/v0.1/docs/modules/agents/agent_types/react/#using-chat-models).\n",
|
||||
"\n",
|
||||
"> Note: To run this section, you'll need to have a [SerpAPI Token](https://serpapi.com/) saved as an environment variable: `SERPAPI_API_KEY`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor, load_tools\n",
|
||||
"from langchain.agents.format_scratchpad import format_log_to_str\n",
|
||||
"from langchain.agents.output_parsers import (\n",
|
||||
" ReActJsonSingleInputOutputParser,\n",
|
||||
")\n",
|
||||
"from langchain.tools.render import render_text_description\n",
|
||||
"from langchain_community.utilities import SerpAPIWrapper"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Configure the agent with a `react-json` style prompt and access to a search engine and calculator."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# setup tools\n",
|
||||
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
|
||||
"\n",
|
||||
"# setup ReAct style prompt\n",
|
||||
"prompt = hub.pull(\"hwchase17/react-json\")\n",
|
||||
"prompt = prompt.partial(\n",
|
||||
" tools=render_text_description(tools),\n",
|
||||
" tool_names=\", \".join([t.name for t in tools]),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# define the agent\n",
|
||||
"chat_model_with_stop = chat_model.bind(stop=[\"\\nObservation\"])\n",
|
||||
"agent = (\n",
|
||||
" {\n",
|
||||
" \"input\": lambda x: x[\"input\"],\n",
|
||||
" \"agent_scratchpad\": lambda x: format_log_to_str(x[\"intermediate_steps\"]),\n",
|
||||
" }\n",
|
||||
" | prompt\n",
|
||||
" | chat_model_with_stop\n",
|
||||
" | ReActJsonSingleInputOutputParser()\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# instantiate AgentExecutor\n",
|
||||
"agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3mQuestion: Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\n",
|
||||
"\n",
|
||||
"Thought: I need to use the Search tool to find out who Leo DiCaprio's current girlfriend is. Then, I can use the Calculator tool to raise her current age to the power of 0.43.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Search\",\n",
|
||||
" \"action_input\": \"leo dicaprio girlfriend\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3mLeonardo DiCaprio may have found The One in Vittoria Ceretti. “They are in love,” a source exclusively reveals in the latest issue of Us Weekly. “Leo was clearly very proud to be showing Vittoria off and letting everyone see how happy they are together.”\u001b[0m\u001b[32;1m\u001b[1;3mNow that we know Leo DiCaprio's current girlfriend is Vittoria Ceretti, let's find out her current age.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Search\",\n",
|
||||
" \"action_input\": \"vittoria ceretti age\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m25 years\u001b[0m\u001b[32;1m\u001b[1;3mNow that we know Vittoria Ceretti's current age is 25, let's use the Calculator tool to raise it to the power of 0.43.\n",
|
||||
"\n",
|
||||
"Action:\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"action\": \"Calculator\",\n",
|
||||
" \"action_input\": \"25^0.43\"\n",
|
||||
"}\n",
|
||||
"```\n",
|
||||
"\u001b[0m\u001b[33;1m\u001b[1;3mAnswer: 3.991298452658078\u001b[0m\u001b[32;1m\u001b[1;3mFinal Answer: Vittoria Ceretti, Leo DiCaprio's current girlfriend, when raised to the power of 0.43 is approximately 4.0 rounded to two decimal places. Her current age is 25 years old.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'input': \"Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\",\n",
|
||||
" 'output': \"Vittoria Ceretti, Leo DiCaprio's current girlfriend, when raised to the power of 0.43 is approximately 4.0 rounded to two decimal places. Her current age is 25 years old.\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\n",
|
||||
" \"input\": \"Who is Leo DiCaprio's girlfriend? What is her current age raised to the 0.43 power?\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Wahoo! Our open-source 7b parameter Zephyr model was able to:\n",
|
||||
"\n",
|
||||
"1. Plan out a series of actions: `I need to use the Search tool to find out who Leo DiCaprio's current girlfriend is. Then, I can use the Calculator tool to raise her current age to the power of 0.43.`\n",
|
||||
"2. Then execute a search using the SerpAPI tool to find who Leo DiCaprio's current girlfriend is\n",
|
||||
"3. Execute another search to find her age\n",
|
||||
"4. And finally use a calculator tool to calculate her age raised to the power of 0.43\n",
|
||||
"\n",
|
||||
"It's exciting to see how far open-source LLM's can go as general purpose reasoning agents. Give it a try yourself!"
|
||||
"For detailed documentation of all `ChatHuggingFace` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_huggingface.chat_models.huggingface.ChatHuggingFace.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -572,7 +493,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
32
docs/docs/integrations/chat/index.mdx
Normal file
32
docs/docs/integrations/chat/index.mdx
Normal file
@@ -0,0 +1,32 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
sidebar_class_name: hidden
|
||||
keywords: [compatibility]
|
||||
---
|
||||
|
||||
# Chat models
|
||||
|
||||
[Chat models](/docs/concepts/#chat-models) are language models that use a sequence of [messages](/docs/concepts/#messages) as inputs and return messages as outputs (as opposed to using plain text). These are generally newer models.
|
||||
|
||||
:::info
|
||||
|
||||
If you'd like to write your own chat model, see [this how-to](/docs/how_to/custom_chat_model/).
|
||||
If you'd like to contribute an integration, see [Contributing integrations](/docs/contributing/integrations/).
|
||||
|
||||
:::
|
||||
|
||||
## Featured Providers
|
||||
|
||||
:::info
|
||||
While all these LangChain classes support the indicated advanced feature, you may have
|
||||
to open the provider-specific documentation to learn which hosted models or backends support
|
||||
the feature.
|
||||
:::
|
||||
|
||||
import { CategoryTable, IndexTable } from "@theme/FeatureTables";
|
||||
|
||||
<CategoryTable category="chat" />
|
||||
|
||||
## All chat models
|
||||
|
||||
<IndexTable />
|
||||
@@ -13,7 +13,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Kinetica SqlAssist LLM Demo\n",
|
||||
"# Kinetica Language To SQL Chat Model\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to use Kinetica to transform natural language into SQL\n",
|
||||
"and simplify the process of data retrieval. This demo is intended to show the mechanics\n",
|
||||
|
||||
@@ -4,9 +4,23 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ChatLlamaCpp\n",
|
||||
"# Llama.cpp\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with chat model intergrated with [llama cpp python](https://github.com/abetlen/llama-cpp-python)."
|
||||
">[llama.cpp python](https://github.com/abetlen/llama-cpp-python) library is a simple Python bindings for `@ggerganov`\n",
|
||||
">[llama.cpp](https://github.com/ggerganov/llama.cpp).\n",
|
||||
">\n",
|
||||
">This package provides:\n",
|
||||
">\n",
|
||||
"> - Low-level access to C API via ctypes interface.\n",
|
||||
"> - High-level Python API for text completion\n",
|
||||
"> - `OpenAI`-like API\n",
|
||||
"> - `LangChain` compatibility\n",
|
||||
"> - `LlamaIndex` compatibility\n",
|
||||
"> - OpenAI compatible web server\n",
|
||||
"> - Local Copilot replacement\n",
|
||||
"> - Function Calling support\n",
|
||||
"> - Vision API support\n",
|
||||
"> - Multiple Models\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -212,8 +226,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.tools import tool\n",
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"from langchain_core.tools import tool\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class WeatherInput(BaseModel):\n",
|
||||
@@ -410,7 +424,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.8"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -12,12 +12,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a14c83bf-af26-4f22-8c1a-d632c5795ecf",
|
||||
"id": "d295c2a2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MistralAI\n",
|
||||
"# ChatMistralAI\n",
|
||||
"\n",
|
||||
"This will help you getting started with Mistral [chat models](/docs/concepts/#chat-models), accessed via their [API](https://docs.mistral.ai/api/). For detailed documentation of all ChatMistralAI features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_mistralai.chat_models.ChatMistralAI.html).\n",
|
||||
"This will help you getting started with Mistral [chat models](/docs/concepts/#chat-models). For detailed documentation of all `ChatMistralAI` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_mistralai.chat_models.ChatMistralAI.html). The `ChatMistralAI` class is built on top of the [Mistral API](https://docs.mistral.ai/api/). For a list of all the models supported by Mistral, check out [this page](https://docs.mistral.ai/getting-started/models/).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
@@ -29,36 +29,35 @@
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | \n",
|
||||
"| ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Mistral models you'll need to create a Mistral account, get an API key, and install the `langchain-mistralai` integration package.\n",
|
||||
"\n",
|
||||
"To access `ChatMistralAI` models you'll need to create a Mistral account, get an API key, and install the `langchain_mistralai` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"A valid [API key](https://console.mistral.ai/users/api-keys/) is needed to communicate with the API. Once you've obtained an API key, store it in the `MISTRAL_API_KEY` environment variable:"
|
||||
"\n",
|
||||
"A valid [API key](https://console.mistral.ai/users/api-keys/) is needed to communicate with the API. Once you've done this set the MISTRAL_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9acd8340-09d4-4ece-871a-a35b0732c7d8",
|
||||
"id": "2461605e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"__MODULE_NAME___API_KEY\"):\n",
|
||||
" os.environ[\"__MODULE_NAME___API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter your __ModuleName__ API key: \"\n",
|
||||
" )"
|
||||
"os.environ[\"MISTRAL_API_KEY\"] = getpass.getpass(\"Enter your Mistral API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "42c979b1-df49-4f6c-9fe6-d9dbf3ea8c2a",
|
||||
"id": "788f37ac",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
@@ -67,37 +66,37 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cc4f11ec-5cb3-4caf-b3cd-7a20c41b0cfe",
|
||||
"id": "007209d5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0fc42221-97b2-466b-95db-10368e17ca56",
|
||||
"id": "0f5c74f9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain MistralAI integration lives in the `langchain-mistralai` package:"
|
||||
"The LangChain Mistral integration lives in the `langchain_mistralai` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "85cb1ab8-9f2c-4b93-8415-ad65819dcb38",
|
||||
"id": "1ab11a65",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-mistralai"
|
||||
"%pip install -qU langchain_mistralai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "502127fd",
|
||||
"id": "fb1a335e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
@@ -107,19 +106,24 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "2dfa801a-d040-4c09-9634-58604e8eaf16",
|
||||
"execution_count": 5,
|
||||
"id": "e6c38580",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_mistralai.chat_models import ChatMistralAI\n",
|
||||
"from langchain_mistralai import ChatMistralAI\n",
|
||||
"\n",
|
||||
"llm = ChatMistralAI(model=\"mistral-large-latest\")"
|
||||
"llm = ChatMistralAI(\n",
|
||||
" model=\"mistral-large-latest\",\n",
|
||||
" temperature=0,\n",
|
||||
" max_retries=2,\n",
|
||||
" # other params...\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f668acff-eb14-4b3a-959a-df5bfc02968b",
|
||||
"id": "aec79099",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
@@ -127,17 +131,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "86e3f9e6-67ec-4fbf-8ff1-85331200f412",
|
||||
"execution_count": 6,
|
||||
"id": "8838c3cc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore la programmation.\", response_metadata={'token_usage': {'prompt_tokens': 27, 'total_tokens': 36, 'completion_tokens': 9}, 'model': 'mistral-large-latest', 'finish_reason': 'stop'}, id='run-d6196c33-9410-413b-b454-4ed0bec1f0c7-0', usage_metadata={'input_tokens': 27, 'output_tokens': 9, 'total_tokens': 36})"
|
||||
"AIMessage(content='Sure, I\\'d be happy to help you translate that sentence into French! The English sentence \"I love programming\" translates to \"J\\'aime programmer\" in French. Let me know if you have any other questions or need further assistance!', response_metadata={'token_usage': {'prompt_tokens': 32, 'total_tokens': 84, 'completion_tokens': 52}, 'model': 'mistral-small', 'finish_reason': 'stop'}, id='run-64bac156-7160-4b68-b67e-4161f63e021f-0', usage_metadata={'input_tokens': 32, 'output_tokens': 52, 'total_tokens': 84})"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -156,15 +160,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "8f8a24bc-b7f0-4d3a-b310-8a4e0ba125dd",
|
||||
"execution_count": 7,
|
||||
"id": "bbf6a048",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"J'adore la programmation.\n"
|
||||
"Sure, I'd be happy to help you translate that sentence into French! The English sentence \"I love programming\" translates to \"J'aime programmer\" in French. Let me know if you have any other questions or need further assistance!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -174,116 +178,27 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c361ab1e-8c0c-4206-9e3c-9d1424a12b9c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Async"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "c5fac0e9-05a4-4fc1-a3b3-e5bbb24b971b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'aime programmer.\", response_metadata={'token_usage': {'prompt_tokens': 27, 'total_tokens': 34, 'completion_tokens': 7}, 'model': 'mistral-large-latest', 'finish_reason': 'stop'}, id='run-1873888a-186f-49a8-ab81-24335bd3099b-0', usage_metadata={'input_tokens': 27, 'output_tokens': 7, 'total_tokens': 34})"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"await llm.ainvoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "86ccef97",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Streaming\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "025be980-e50d-4a68-93dc-c9c7b500ce34",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"J'adore programmer."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for chunk in llm.stream(messages):\n",
|
||||
" print(chunk.content, end=\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f6189577",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Batch"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "e63aebcb",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[AIMessage(content=\"J'adore la programmation.\", response_metadata={'token_usage': {'prompt_tokens': 27, 'total_tokens': 36, 'completion_tokens': 9}, 'model': 'mistral-large-latest', 'finish_reason': 'stop'}, id='run-2aa2a189-c405-4cf5-bd31-e9025e4c8536-0', usage_metadata={'input_tokens': 27, 'output_tokens': 9, 'total_tokens': 36})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm.batch([messages])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "38e39e71",
|
||||
"id": "32b87f87",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"You can also easily combine with a prompt template for easy structuring of user input. We can do this using [LCEL](/docs/concepts#langchain-expression-language-lcel)"
|
||||
"We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "ee43a1ae",
|
||||
"execution_count": 8,
|
||||
"id": "24e2c51c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Ich liebe Programmieren.', response_metadata={'token_usage': {'prompt_tokens': 21, 'total_tokens': 28, 'completion_tokens': 7}, 'model': 'mistral-large-latest', 'finish_reason': 'stop'}, id='run-409ebc9a-b4a0-4734-ab6f-e11f6b4f808f-0', usage_metadata={'input_tokens': 21, 'output_tokens': 7, 'total_tokens': 28})"
|
||||
"AIMessage(content='Ich liebe Programmierung. (German translation)', response_metadata={'token_usage': {'prompt_tokens': 26, 'total_tokens': 38, 'completion_tokens': 12}, 'model': 'mistral-small', 'finish_reason': 'stop'}, id='run-dfd4094f-e347-47b0-9056-8ebd7ea35fe7-0', usage_metadata={'input_tokens': 26, 'output_tokens': 12, 'total_tokens': 38})"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -291,7 +206,7 @@
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate(\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
@@ -313,12 +228,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eb7e01fb-a433-48b1-a4c2-e6009523a896",
|
||||
"id": "cb9b5834",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ChatMistralAI features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_mistralai.chat_models.ChatMistralAI.html"
|
||||
"Head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_mistralai.chat_models.ChatMistralAI.html) for detailed documentation of all attributes and methods."
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -338,7 +253,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -99,7 +99,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@@ -110,7 +110,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 9,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -118,7 +118,7 @@
|
||||
"from langchain_ollama import ChatOllama\n",
|
||||
"\n",
|
||||
"llm = ChatOllama(\n",
|
||||
" model=\"llama3\",\n",
|
||||
" model=\"llama3.1\",\n",
|
||||
" temperature=0,\n",
|
||||
" # other params...\n",
|
||||
")"
|
||||
@@ -134,7 +134,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 10,
|
||||
"id": "62e0dbc3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -143,10 +143,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Je adore le programmation.\\n\\n(Note: \"programmation\" is not commonly used in French, but I translated it as \"le programmation\" to maintain the same grammatical structure and meaning as the original English sentence.)', response_metadata={'model': 'llama3', 'created_at': '2024-07-22T17:43:54.731273Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 11094839375, 'load_duration': 10121854667, 'prompt_eval_count': 36, 'prompt_eval_duration': 146569000, 'eval_count': 46, 'eval_duration': 816593000}, id='run-befccbdc-e1f9-42a9-85cf-e69b926d6b8b-0', usage_metadata={'input_tokens': 36, 'output_tokens': 46, 'total_tokens': 82})"
|
||||
"AIMessage(content='The translation of \"I love programming\" from English to French is:\\n\\n\"J\\'adore programmer.\"', response_metadata={'model': 'llama3.1', 'created_at': '2024-08-19T16:05:32.81965Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 2167842917, 'load_duration': 54222584, 'prompt_eval_count': 35, 'prompt_eval_duration': 893007000, 'eval_count': 22, 'eval_duration': 1218962000}, id='run-0863daa2-43bf-4a43-86cc-611b23eae466-0', usage_metadata={'input_tokens': 35, 'output_tokens': 22, 'total_tokens': 57})"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -167,7 +167,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 11,
|
||||
"id": "d86145b3-bfef-46e8-b227-4dda5c9c2705",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -175,9 +175,9 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Je adore le programmation.\n",
|
||||
"The translation of \"I love programming\" from English to French is:\n",
|
||||
"\n",
|
||||
"(Note: \"programmation\" is not commonly used in French, but I translated it as \"le programmation\" to maintain the same grammatical structure and meaning as the original English sentence.)\n"
|
||||
"\"J'adore programmer.\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -197,17 +197,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 12,
|
||||
"id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Ich liebe Programmieren!\\n\\n(Note: \"Ich liebe\" means \"I love\", \"Programmieren\" is the verb for \"programming\")', response_metadata={'model': 'llama3', 'created_at': '2024-07-04T04:22:33.864132Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1310800083, 'load_duration': 1782000, 'prompt_eval_count': 16, 'prompt_eval_duration': 250199000, 'eval_count': 29, 'eval_duration': 1057192000}, id='run-cbadbe59-2de2-4ec0-a18a-b3220226c3d2-0')"
|
||||
"AIMessage(content='Das Programmieren ist mir ein Leidenschaft! (That\\'s \"Programming is my passion!\" in German.) Would you like me to translate anything else?', response_metadata={'model': 'llama3.1', 'created_at': '2024-08-19T16:05:34.893548Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 2045997333, 'load_duration': 22584792, 'prompt_eval_count': 30, 'prompt_eval_duration': 213210000, 'eval_count': 32, 'eval_duration': 1808541000}, id='run-d18e1c6b-50e0-4b1d-b23a-973fa058edad-0', usage_metadata={'input_tokens': 30, 'output_tokens': 32, 'total_tokens': 62})"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -242,33 +242,32 @@
|
||||
"source": [
|
||||
"## Tool calling\n",
|
||||
"\n",
|
||||
"We can use [tool calling](https://blog.langchain.dev/improving-core-tool-interfaces-and-docs-in-langchain/) with an LLM [that has been fine-tuned for tool use](https://ollama.com/library/llama3-groq-tool-use): \n",
|
||||
"We can use [tool calling](https://blog.langchain.dev/improving-core-tool-interfaces-and-docs-in-langchain/) with an LLM [that has been fine-tuned for tool use](https://ollama.com/library/llama3.1): \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"ollama pull llama3-groq-tool-use\n",
|
||||
"ollama pull llama3.1\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"We can just pass normal Python functions directly as tools."
|
||||
"Details on creating custom tools are available in [this guide](/docs/how_to/custom_tools/). Below, we demonstrate how to create a tool using the `@tool` decorator on a normal python function."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "5250bceb-1029-41ff-b447-983518704d88",
|
||||
"execution_count": 13,
|
||||
"id": "f767015f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'name': 'validate_user',\n",
|
||||
" 'args': {'addresses': ['123 Fake St, Boston MA',\n",
|
||||
" '234 Pretend Boulevard, Houston TX'],\n",
|
||||
" 'user_id': 123},\n",
|
||||
" 'id': 'fe2148d3-95fb-48e9-845a-4bfecc1f1f96',\n",
|
||||
" 'args': {'addresses': '[\"123 Fake St, Boston, MA\", \"234 Pretend Boulevard, Houston, TX\"]',\n",
|
||||
" 'user_id': '123'},\n",
|
||||
" 'id': '40fe3de0-500c-4b91-9616-5932a929e640',\n",
|
||||
" 'type': 'tool_call'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -276,22 +275,23 @@
|
||||
"source": [
|
||||
"from typing import List\n",
|
||||
"\n",
|
||||
"from langchain_core.tools import tool\n",
|
||||
"from langchain_ollama import ChatOllama\n",
|
||||
"from typing_extensions import TypedDict\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def validate_user(user_id: int, addresses: List) -> bool:\n",
|
||||
"@tool\n",
|
||||
"def validate_user(user_id: int, addresses: List[str]) -> bool:\n",
|
||||
" \"\"\"Validate user using historical addresses.\n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" user_id: (int) the user ID.\n",
|
||||
" addresses: Previous addresses.\n",
|
||||
" user_id (int): the user ID.\n",
|
||||
" addresses (List[str]): Previous addresses as a list of strings.\n",
|
||||
" \"\"\"\n",
|
||||
" return True\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"llm = ChatOllama(\n",
|
||||
" model=\"llama3-groq-tool-use\",\n",
|
||||
" model=\"llama3.1\",\n",
|
||||
" temperature=0,\n",
|
||||
").bind_tools([validate_user])\n",
|
||||
"\n",
|
||||
@@ -303,18 +303,6 @@
|
||||
"result.tool_calls"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2bb034ff-218f-4865-afea-3f5e57d3bdee",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We look at the LangSmith trace to see that the tool call was performed: \n",
|
||||
"\n",
|
||||
"https://smith.langchain.com/public/4169348a-d6be-45df-a7cf-032f6baa4697/r\n",
|
||||
"\n",
|
||||
"In particular, the trace shows how the tool schema was populated."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4c5e0197",
|
||||
@@ -331,7 +319,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 15,
|
||||
"id": "36c9b1c2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -391,7 +379,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 16,
|
||||
"id": "32b3ba7b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -467,7 +455,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.8"
|
||||
"version": "3.12.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,322 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Ollama Functions\n",
|
||||
"sidebar_class_name: hidden\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# OllamaFunctions\n",
|
||||
"\n",
|
||||
":::warning\n",
|
||||
"\n",
|
||||
"This was an experimental wrapper that attempts to bolt-on tool calling support to models that do not natively support it. The [primary Ollama integration](/docs/integrations/chat/ollama/) now supports tool calling, and should be used instead.\n",
|
||||
"\n",
|
||||
":::\n",
|
||||
"This notebook shows how to use an experimental wrapper around Ollama that gives it [tool calling capabilities](https://python.langchain.com/v0.2/docs/concepts/#functiontool-calling).\n",
|
||||
"\n",
|
||||
"Note that more powerful and capable models will perform better with complex schema and/or multiple functions. The examples below use llama3 and phi3 models.\n",
|
||||
"For a complete list of supported models and model variants, see the [Ollama model library](https://ollama.ai/library).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
|
||||
"|:-----------------------------------------------------------------------------------------------------------------------------------:|:-------:|:-----:|:------------:|:----------:|:-----------------:|:--------------:|\n",
|
||||
"| [OllamaFunctions](https://api.python.langchain.com/en/latest/llms/langchain_experimental.llms.ollama_function.OllamaFunctions.html) | [langchain-experimental](https://api.python.langchain.com/en/latest/openai_api_reference.html) | ✅ | ❌ | ❌ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling/) | [Structured output](/docs/how_to/structured_output/) | JSON mode | Image input | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access `OllamaFunctions` you will need to install `langchain-experimental` integration package.\n",
|
||||
"Follow [these instructions](https://github.com/jmorganca/ollama) to set up and run a local Ollama instance as well as download and serve [supported models](https://ollama.com/library).\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Credentials support is not present at this time.\n",
|
||||
"\n",
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The `OllamaFunctions` class lives in the `langchain-experimental` package:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-experimental"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"`OllamaFunctions` takes the same init parameters as `ChatOllama`. \n",
|
||||
"\n",
|
||||
"In order to use tool calling, you must also specify `format=\"json\"`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-23T15:20:21.818089Z",
|
||||
"start_time": "2024-06-23T15:20:21.815759Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_experimental.llms.ollama_functions import OllamaFunctions\n",
|
||||
"\n",
|
||||
"llm = OllamaFunctions(model=\"phi3\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-06-23T15:20:46.794689Z",
|
||||
"start_time": "2024-06-23T15:20:44.982632Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore programmer.\", id='run-94815fcf-ae11-438a-ba3f-00819328b5cd-0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"I love programming.\"),\n",
|
||||
"]\n",
|
||||
"ai_msg = llm.invoke(messages)\n",
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"J'adore programmer.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ai_msg.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"We can [chain](https://python.langchain.com/v0.2/docs/how_to/sequence/) our model with a prompt template like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Programmieren ist sehr verrückt! Es freut mich, dass Sie auf Programmierung so positiv eingestellt sind.', id='run-ee99be5e-4d48-4ab6-b602-35415f0bdbde-0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | llm\n",
|
||||
"chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"input_language\": \"English\",\n",
|
||||
" \"output_language\": \"German\",\n",
|
||||
" \"input\": \"I love programming.\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tool Calling\n",
|
||||
"\n",
|
||||
"### OllamaFunctions.bind_tools()\n",
|
||||
"\n",
|
||||
"With `OllamaFunctions.bind_tools`, we can easily pass in Pydantic classes, dict schemas, LangChain tools, or even functions as tools to the model. Under the hood these are converted to a tool definition schemas, which looks like:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class GetWeather(BaseModel):\n",
|
||||
" \"\"\"Get the current weather in a given location\"\"\"\n",
|
||||
"\n",
|
||||
" location: str = Field(..., description=\"The city and state, e.g. San Francisco, CA\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"llm_with_tools = llm.bind_tools([GetWeather])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='', id='run-b9769435-ec6a-4cb8-8545-5a5035fc19bd-0', tool_calls=[{'name': 'GetWeather', 'args': {'location': 'San Francisco, CA'}, 'id': 'call_064c4e1cb27e4adb9e4e7ed60362ecc9'}])"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ai_msg = llm_with_tools.invoke(\n",
|
||||
" \"what is the weather like in San Francisco\",\n",
|
||||
")\n",
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### AIMessage.tool_calls\n",
|
||||
"\n",
|
||||
"Notice that the AIMessage has a `tool_calls` attribute. This contains in a standardized `ToolCall` format that is model-provider agnostic."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'name': 'GetWeather',\n",
|
||||
" 'args': {'location': 'San Francisco, CA'},\n",
|
||||
" 'id': 'call_064c4e1cb27e4adb9e4e7ed60362ecc9'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ai_msg.tool_calls"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For more on binding tools and tool call outputs, head to the [tool calling](../../how_to/function_calling.ipynb) docs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ToolCallingLLM features and configurations head to the API reference: https://api.python.langchain.com/en/latest/llms/langchain_experimental.llms.ollama_functions.OllamaFunctions.html\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -56,23 +56,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"id": "e817fe2e-4f1d-4533-b19e-2400b1cf6ce8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Enter your OpenAI API key: ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")"
|
||||
"if not os.environ.get(\"OPENAI_API_KEY\"):\n",
|
||||
" os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -126,7 +119,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 2,
|
||||
"id": "522686de",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -158,7 +151,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 3,
|
||||
"id": "ce16ad78-8e6f-48cd-954e-98be75eb5836",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -167,10 +160,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore la programmation.\", response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 31, 'total_tokens': 36}, 'model_name': 'gpt-4o', 'system_fingerprint': 'fp_43dfabdef1', 'finish_reason': 'stop', 'logprobs': None}, id='run-012cffe2-5d3d-424d-83b5-51c6d4a593d1-0', usage_metadata={'input_tokens': 31, 'output_tokens': 5, 'total_tokens': 36})"
|
||||
"AIMessage(content=\"J'adore la programmation.\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 31, 'total_tokens': 36}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'stop', 'logprobs': None}, id='run-63219b22-03e3-4561-8cc4-78b7c7c3a3ca-0', usage_metadata={'input_tokens': 31, 'output_tokens': 5, 'total_tokens': 36})"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -189,7 +182,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 4,
|
||||
"id": "2cd224b8-4499-41fb-a604-d53a7ff17b2e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -217,7 +210,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 5,
|
||||
"id": "fbb043e6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -226,10 +219,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Ich liebe Programmieren.', response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 26, 'total_tokens': 31}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': 'fp_b28b39ffa8', 'finish_reason': 'stop', 'logprobs': None}, id='run-94fa6741-c99b-4513-afce-c3f562631c79-0')"
|
||||
"AIMessage(content='Ich liebe das Programmieren.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 26, 'total_tokens': 32}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'stop', 'logprobs': None}, id='run-350585e1-16ca-4dad-9460-3d9e7e49aaf1-0', usage_metadata={'input_tokens': 26, 'output_tokens': 6, 'total_tokens': 32})"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -281,12 +274,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 6,
|
||||
"id": "b7ea7690-ec7a-4337-b392-e87d1f39a6ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class GetWeather(BaseModel):\n",
|
||||
@@ -300,17 +293,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 7,
|
||||
"id": "1d1ab955-6a68-42f8-bb5d-86eb1111478a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_H7fABDuzEau48T10Qn0Lsh0D', 'function': {'arguments': '{\"location\":\"San Francisco\"}', 'name': 'GetWeather'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 70, 'total_tokens': 85}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': 'fp_b28b39ffa8', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-b469135e-2718-446a-8164-eef37e672ba2-0', tool_calls=[{'name': 'GetWeather', 'args': {'location': 'San Francisco'}, 'id': 'call_H7fABDuzEau48T10Qn0Lsh0D'}])"
|
||||
"AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_o9udf3EVOWiV4Iupktpbpofk', 'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'GetWeather'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 68, 'total_tokens': 85}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-1617c9b2-dda5-4120-996b-0333ed5992e2-0', tool_calls=[{'name': 'GetWeather', 'args': {'location': 'San Francisco, CA'}, 'id': 'call_o9udf3EVOWiV4Iupktpbpofk', 'type': 'tool_call'}], usage_metadata={'input_tokens': 68, 'output_tokens': 17, 'total_tokens': 85})"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -322,6 +315,47 @@
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "67b0f63d-15e6-45e0-9e86-2852ddcff54f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ``strict=True``\n",
|
||||
"\n",
|
||||
":::info Requires ``langchain-openai>=0.1.21rc1``\n",
|
||||
"\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
"As of Aug 6, 2024, OpenAI supports a `strict` argument when calling tools that will enforce that the tool argument schema is respected by the model. See more here: https://platform.openai.com/docs/guides/function-calling\n",
|
||||
"\n",
|
||||
"**Note**: If ``strict=True`` the tool definition will also be validated, and a subset of JSON schema are accepted. Crucially, schema cannot have optional args (those with default values). Read the full docs on what types of schema are supported here: https://platform.openai.com/docs/guides/structured-outputs/supported-schemas. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "dc8ac4f1-4039-4392-90c1-2d8331cd6910",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_jUqhd8wzAIzInTJl72Rla8ht', 'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'GetWeather'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 68, 'total_tokens': 85}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_3aa7262c27', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-5e3356a9-132d-4623-8e73-dd5a898cf4a6-0', tool_calls=[{'name': 'GetWeather', 'args': {'location': 'San Francisco, CA'}, 'id': 'call_jUqhd8wzAIzInTJl72Rla8ht', 'type': 'tool_call'}], usage_metadata={'input_tokens': 68, 'output_tokens': 17, 'total_tokens': 85})"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"llm_with_tools = llm.bind_tools([GetWeather], strict=True)\n",
|
||||
"ai_msg = llm_with_tools.invoke(\n",
|
||||
" \"what is the weather like in San Francisco\",\n",
|
||||
")\n",
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "768d1ae4-4b1a-48eb-a329-c8d5051067a3",
|
||||
@@ -333,7 +367,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 9,
|
||||
"id": "166cb7ce-831d-4a7c-9721-abc107f11084",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -341,11 +375,12 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'name': 'GetWeather',\n",
|
||||
" 'args': {'location': 'San Francisco'},\n",
|
||||
" 'id': 'call_H7fABDuzEau48T10Qn0Lsh0D'}]"
|
||||
" 'args': {'location': 'San Francisco, CA'},\n",
|
||||
" 'id': 'call_jUqhd8wzAIzInTJl72Rla8ht',\n",
|
||||
" 'type': 'tool_call'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -376,17 +411,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 11,
|
||||
"id": "33c4a8b0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore la programmation.\", additional_kwargs={}, example=False)"
|
||||
"AIMessage(content=\"J'adore la programmation.\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 31, 'total_tokens': 39}, 'model_name': 'ft:gpt-3.5-turbo-0613:langchain::7qTVM5AR', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-0f39b30e-c56e-4f3b-af99-5c948c984146-0', usage_metadata={'input_tokens': 31, 'output_tokens': 8, 'total_tokens': 39})"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -396,7 +431,7 @@
|
||||
" temperature=0, model_name=\"ft:gpt-3.5-turbo-0613:langchain::7qTVM5AR\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"fine_tuned_model(messages)"
|
||||
"fine_tuned_model.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -412,9 +447,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "poetry-venv-2",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "poetry-venv-2"
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -426,7 +461,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
"source": [
|
||||
"# ChatPerplexity\n",
|
||||
"\n",
|
||||
"This notebook covers how to get started with Perplexity chat models."
|
||||
"This notebook covers how to get started with `Perplexity` chat models."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -37,17 +37,31 @@
|
||||
"from langchain_core.prompts import ChatPromptTemplate"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b26e2035-2f81-4451-ba44-fa2e2d5aeb62",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The code provided assumes that your PPLX_API_KEY is set in your environment variables. If you would like to manually specify your API key and also choose a different model, you can use the following code:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d986aac6-1bae-4608-8514-d3ba5b35b10e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat = ChatPerplexity(\n",
|
||||
" temperature=0, pplx_api_key=\"YOUR_API_KEY\", model=\"llama-3-sonar-small-32k-online\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "97a8ce3a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The code provided assumes that your PPLX_API_KEY is set in your environment variables. If you would like to manually specify your API key and also choose a different model, you can use the following code:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"chat = ChatPerplexity(temperature=0, pplx_api_key=\"YOUR_API_KEY\", model=\"llama-3-sonar-small-32k-online\")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You can check a list of available models [here](https://docs.perplexity.ai/docs/model-cards). For reproducibility, we can set the API key dynamically by taking it as an input in this notebook."
|
||||
]
|
||||
},
|
||||
@@ -221,7 +235,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.18"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -53,7 +53,8 @@
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"TOGETHER_API_KEY\"] = getpass.getpass(\"Enter your Together API key: \")"
|
||||
"if \"TOGETHER_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"TOGETHER_API_KEY\"] = getpass.getpass(\"Enter your Together API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -87,21 +88,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"id": "652d6238-1f87-422a-b135-f5abbb8652fc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.1.2\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-together"
|
||||
]
|
||||
@@ -113,14 +103,12 @@
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:\n",
|
||||
"\n",
|
||||
"- TODO: Update model instantiation with relevant params."
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 3,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -147,7 +135,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"id": "62e0dbc3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -156,10 +144,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore la programmation.\", response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 35, 'total_tokens': 44}, 'model_name': 'meta-llama/Llama-3-70b-chat-hf', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-79efa49b-dbaf-4ef8-9dce-958533823ef6-0', usage_metadata={'input_tokens': 35, 'output_tokens': 9, 'total_tokens': 44})"
|
||||
"AIMessage(content=\"J'adore la programmation.\", response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 35, 'total_tokens': 44}, 'model_name': 'meta-llama/Llama-3-70b-chat-hf', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-eabcbe33-cdd8-45b8-ab0b-f90b6e7dfad8-0', usage_metadata={'input_tokens': 35, 'output_tokens': 9, 'total_tokens': 44})"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -178,7 +166,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 5,
|
||||
"id": "d86145b3-bfef-46e8-b227-4dda5c9c2705",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@@ -206,17 +194,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Ich liebe das Programmieren.', response_metadata={'token_usage': {'completion_tokens': 7, 'prompt_tokens': 30, 'total_tokens': 37}, 'model_name': 'meta-llama/Llama-3-70b-chat-hf', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-80bba5fa-1723-4242-8d5a-c09b76b8350b-0', usage_metadata={'input_tokens': 30, 'output_tokens': 7, 'total_tokens': 37})"
|
||||
"AIMessage(content='Ich liebe das Programmieren.', response_metadata={'token_usage': {'completion_tokens': 7, 'prompt_tokens': 30, 'total_tokens': 37}, 'model_name': 'meta-llama/Llama-3-70b-chat-hf', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-a249aa24-ee31-46ba-9bf9-f4eb135b0a95-0', usage_metadata={'input_tokens': 30, 'output_tokens': 7, 'total_tokens': 37})"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -271,7 +259,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"You need to have an existing dataset on the Apify platform. If you don't have one, please first check out [this notebook](/docs/integrations/tools/apify) on how to use Apify to extract content from documentation, knowledge bases, help centers, or blogs. This example shows how to load a dataset produced by the [Website Content Crawler](https://apify.com/apify/website-content-crawler)."
|
||||
"You need to have an existing dataset on the Apify platform. This example shows how to load a dataset produced by the [Website Content Crawler](https://apify.com/apify/website-content-crawler)."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
282
docs/docs/integrations/document_loaders/box.ipynb
Normal file
282
docs/docs/integrations/document_loaders/box.ipynb
Normal file
@@ -0,0 +1,282 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Box\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BoxLoader\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with Box [document loader](/docs/integrations/document_loaders/). For detailed documentation of all BoxLoader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.langchain_box_loader.BoxLoader.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"The `BoxLoader` class helps you get your unstructured content from Box in Langchain's `Document` format. You can do this with either a `List[str]` containing Box file IDs, or with a `str` containing a Box folder ID. \n",
|
||||
"\n",
|
||||
"You must provide either a `List[str]` containing Box file Ids, or a `str` containing a folder ID. If getting files from a folder with folder ID, you can also set a `Bool` to tell the loader to get all sub-folders in that folder, as well. \n",
|
||||
"\n",
|
||||
":::info\n",
|
||||
"A Box instance can contain Petabytes of files, and folders can contain millions of files. Be intentional when choosing what folders you choose to index. And we recommend never getting all files from folder 0 recursively. Folder ID 0 is your root folder.\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
"Files without a text representation will be skipped.\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [BoxLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_box.document_loaders.langchain_boxloader.BoxLoader.html) | [langchain_box](https://api.python.langchain.com/en/latest/box_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| BoxLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"In order to use the Box package, you will need a few things:\n",
|
||||
"\n",
|
||||
"* A Box account — If you are not a current Box customer or want to test outside of your production Box instance, you can use a [free developer account](https://account.box.com/signup/n/developer#ty9l3).\n",
|
||||
"* [A Box app](https://developer.box.com/guides/getting-started/first-application/) — This is configured in the [developer console](https://account.box.com/developers/console), and for Box AI, must have the `Manage AI` scope enabled. Here you will also select your authentication method\n",
|
||||
"* The app must be [enabled by the administrator](https://developer.box.com/guides/authorization/custom-app-approval/#manual-approval). For free developer accounts, this is whomever signed up for the account.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"For these examples, we will use [token authentication](https://developer.box.com/guides/authentication/tokens/developer-tokens). This can be used with any [authentication method](https://developer.box.com/guides/authentication/). Just get the token with whatever methodology. If you want to learn more about how to use other authentication types with `langchain-box`, visit the [Box provider](/docs/integrations/providers/box) document.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Enter your Box Developer Token: ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"box_developer_token = getpass.getpass(\"Enter your Box Developer Token: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_box**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_box"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"### Load files\n",
|
||||
"\n",
|
||||
"If you wish to load files, you must provide the `List` of file ids at instantiation time. \n",
|
||||
"\n",
|
||||
"This requires 1 piece of information:\n",
|
||||
"\n",
|
||||
"* **box_file_ids** (`List[str]`)- A list of Box file IDs. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_box.document_loaders import BoxLoader\n",
|
||||
"\n",
|
||||
"box_file_ids = [\"1514555423624\", \"1514553902288\"]\n",
|
||||
"\n",
|
||||
"loader = BoxLoader(\n",
|
||||
" box_developer_token=box_developer_token,\n",
|
||||
" box_file_ids=box_file_ids,\n",
|
||||
" character_limit=10000, # Optional. Defaults to no limit\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load from folder\n",
|
||||
"\n",
|
||||
"If you wish to load files from a folder, you must provide a `str` with the Box folder ID at instantiation time. \n",
|
||||
"\n",
|
||||
"This requires 1 piece of information:\n",
|
||||
"\n",
|
||||
"* **box_folder_id** (`str`)- A string containing a Box folder ID. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_box.document_loaders import BoxLoader\n",
|
||||
"\n",
|
||||
"box_folder_id = \"260932470532\"\n",
|
||||
"\n",
|
||||
"loader = BoxLoader(\n",
|
||||
" box_folder_id=box_folder_id,\n",
|
||||
" recursive=False, # Optional. return entire tree, defaults to False\n",
|
||||
" character_limit=10000, # Optional. Defaults to no limit\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': 'https://dl.boxcloud.com/api/2.0/internal_files/1514555423624/versions/1663171610024/representations/extracted_text/content/', 'title': 'Invoice-A5555_txt'}, page_content='Vendor: AstroTech Solutions\\nInvoice Number: A5555\\n\\nLine Items:\\n - Gravitational Wave Detector Kit: $800\\n - Exoplanet Terrarium: $120\\nTotal: $920')"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': 'https://dl.boxcloud.com/api/2.0/internal_files/1514555423624/versions/1663171610024/representations/extracted_text/content/', 'title': 'Invoice-A5555_txt'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all BoxLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_box.document_loaders.langchain_box_loader.BoxLoader.html\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Help\n",
|
||||
"\n",
|
||||
"If you have questions, you can check out our [developer documentation](https://developer.box.com) or reach out to use in our [developer community](https://community.box.com)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
243
docs/docs/integrations/document_loaders/bshtml.ipynb
Normal file
243
docs/docs/integrations/document_loaders/bshtml.ipynb
Normal file
@@ -0,0 +1,243 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BSHTMLLoader\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with BeautifulSoup4 [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.html_bs.BSHTMLLoader.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [BSHTMLLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.html_bs.BSHTMLLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| BSHTMLLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access BSHTMLLoader document loader you'll need to install the `langchain-community` integration package and the `bs4` python package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed to use the `BSHTMLLoader` class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community** and **bs4**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community bs4"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:\n",
|
||||
"\n",
|
||||
"- TODO: Update model instantiation with relevant params."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import BSHTMLLoader\n",
|
||||
"\n",
|
||||
"loader = BSHTMLLoader(\n",
|
||||
" file_path=\"./example_data/fake-content.html\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/fake-content.html', 'title': 'Test Title'}, page_content='\\nTest Title\\n\\n\\nMy First Heading\\nMy first paragraph.\\n\\n\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/fake-content.html', 'title': 'Test Title'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/fake-content.html', 'title': 'Test Title'}, page_content='\\nTest Title\\n\\n\\nMy First Heading\\nMy first paragraph.\\n\\n\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []\n",
|
||||
"page[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Adding separator to BS4\n",
|
||||
"\n",
|
||||
"We can also pass a separator to use when calling get_text on the soup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='\n",
|
||||
", Test Title, \n",
|
||||
", \n",
|
||||
", \n",
|
||||
", My First Heading, \n",
|
||||
", My first paragraph., \n",
|
||||
", \n",
|
||||
", \n",
|
||||
"' metadata={'source': './example_data/fake-content.html', 'title': 'Test Title'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = BSHTMLLoader(\n",
|
||||
" file_path=\"./example_data/fake-content.html\", get_text_separator=\", \"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all BSHTMLLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.html_bs.BSHTMLLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
# Sample Markdown Document
|
||||
|
||||
## Introduction
|
||||
|
||||
Welcome to this sample Markdown document. Markdown is a lightweight markup language used for formatting text. It's widely used for documentation, readme files, and more.
|
||||
|
||||
## Features
|
||||
|
||||
### Headers
|
||||
|
||||
Markdown supports multiple levels of headers:
|
||||
|
||||
- **Header 1**: `# Header 1`
|
||||
- **Header 2**: `## Header 2`
|
||||
- **Header 3**: `### Header 3`
|
||||
|
||||
### Lists
|
||||
|
||||
#### Unordered List
|
||||
|
||||
- Item 1
|
||||
- Item 2
|
||||
- Subitem 2.1
|
||||
- Subitem 2.2
|
||||
|
||||
#### Ordered List
|
||||
|
||||
1. First item
|
||||
2. Second item
|
||||
3. Third item
|
||||
|
||||
### Links
|
||||
|
||||
[OpenAI](https://www.openai.com) is an AI research organization.
|
||||
|
||||
### Images
|
||||
|
||||
Here's an example image:
|
||||
|
||||

|
||||
|
||||
### Code
|
||||
|
||||
#### Inline Code
|
||||
|
||||
Use `code` for inline code snippets.
|
||||
|
||||
#### Code Block
|
||||
|
||||
```python
|
||||
def greet(name):
|
||||
return f"Hello, {name}!"
|
||||
|
||||
print(greet("World"))
|
||||
```
|
||||
@@ -30,6 +30,7 @@
|
||||
{
|
||||
"sender_name": "User 2",
|
||||
"timestamp_ms": 1675595060730,
|
||||
"content": "",
|
||||
"photos": [
|
||||
{"uri": "url_of_some_picture.jpg", "creation_timestamp": 1675595059}
|
||||
]
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -164,7 +164,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import GithubFileLoader"
|
||||
"from langchain_community.document_loaders import GithubFileLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
69
docs/docs/integrations/document_loaders/index.mdx
Normal file
69
docs/docs/integrations/document_loaders/index.mdx
Normal file
@@ -0,0 +1,69 @@
|
||||
---
|
||||
sidebar_position: 0
|
||||
sidebar_class_name: hidden
|
||||
---
|
||||
|
||||
# Document loaders
|
||||
|
||||
import { CategoryTable, IndexTable } from "@theme/FeatureTables";
|
||||
|
||||
DocumentLoaders load data into the standard LangChain Document format.
|
||||
|
||||
Each DocumentLoader has its own specific parameters, but they can all be invoked in the same way with the .load method.
|
||||
An example use case is as follows:
|
||||
|
||||
```python
|
||||
from langchain_community.document_loaders.csv_loader import CSVLoader
|
||||
|
||||
loader = CSVLoader(
|
||||
... # <-- Integration specific parameters here
|
||||
)
|
||||
data = loader.load()
|
||||
```
|
||||
|
||||
## Webpages
|
||||
|
||||
The below document loaders allow you to load webpages.
|
||||
|
||||
<CategoryTable category="webpage_loaders" />
|
||||
|
||||
## PDFs
|
||||
|
||||
The below document loaders allow you to load PDF documents.
|
||||
|
||||
<CategoryTable category="pdf_loaders" />
|
||||
|
||||
## Cloud Providers
|
||||
|
||||
The below document loaders allow you to load documents from your favorite cloud providers.
|
||||
|
||||
<CategoryTable category="cloud_provider_loaders"/>
|
||||
|
||||
## Social Platforms
|
||||
|
||||
The below document loaders allow you to load documents from differnt social media platforms.
|
||||
|
||||
<CategoryTable category="social_loaders"/>
|
||||
|
||||
## Messaging Services
|
||||
|
||||
The below document loaders allow you to load data from different messaging platforms.
|
||||
|
||||
<CategoryTable category="messaging_loaders"/>
|
||||
|
||||
## Productivity tools
|
||||
|
||||
The below document loaders allow you to load data from commonly used productivity tools.
|
||||
|
||||
<CategoryTable category="productivity_loaders"/>
|
||||
|
||||
## Common File Types
|
||||
|
||||
The below document loaders allow you to load data from common data formats.
|
||||
|
||||
<CategoryTable category="common_loaders" />
|
||||
|
||||
|
||||
## All document loaders
|
||||
|
||||
<IndexTable />
|
||||
348
docs/docs/integrations/document_loaders/json.ipynb
Normal file
348
docs/docs/integrations/document_loaders/json.ipynb
Normal file
@@ -0,0 +1,348 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# JSONLoader\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with JSON [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all JSONLoader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.json_loader.JSONLoader.html).\n",
|
||||
"\n",
|
||||
"- TODO: Add any other relevant links, like information about underlying API, etc.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/file_loaders/json/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [JSONLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.json_loader.JSONLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| JSONLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access JSON document loader you'll need to install the `langchain-community` integration package as well as the ``jq`` python package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are required to use the `JSONLoader` class."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community** and **jq**:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community jq "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:\n",
|
||||
"\n",
|
||||
"- TODO: Update model instantiation with relevant params."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import JSONLoader\n",
|
||||
"\n",
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path=\"./example_data/facebook_chat.json\",\n",
|
||||
" jq_schema=\".messages[].content\",\n",
|
||||
" text_content=False,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat.json', 'seq_num': 1}, page_content='Bye!')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat.json', 'seq_num': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pages = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" pages.append(doc)\n",
|
||||
" if len(pages) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(pages)\n",
|
||||
"\n",
|
||||
" pages = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Read from JSON Lines file\n",
|
||||
"\n",
|
||||
"If you want to load documents from a JSON Lines file, you pass `json_lines=True`\n",
|
||||
"and specify `jq_schema` to extract `page_content` from a single JSON object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='Bye!' metadata={'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat_messages.jsonl', 'seq_num': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path=\"./example_data/facebook_chat_messages.jsonl\",\n",
|
||||
" jq_schema=\".content\",\n",
|
||||
" text_content=False,\n",
|
||||
" json_lines=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Read specific content keys\n",
|
||||
"\n",
|
||||
"Another option is to set `jq_schema='.'` and provide a `content_key` in order to only load specific content:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='User 2' metadata={'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat_messages.jsonl', 'seq_num': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path=\"./example_data/facebook_chat_messages.jsonl\",\n",
|
||||
" jq_schema=\".\",\n",
|
||||
" content_key=\"sender_name\",\n",
|
||||
" json_lines=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## JSON file with jq schema `content_key`\n",
|
||||
"\n",
|
||||
"To load documents from a JSON file using the `content_key` within the jq schema, set `is_content_key_jq_parsable=True`. Ensure that `content_key` is compatible and can be parsed using the jq schema."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='Bye!' metadata={'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat.json', 'seq_num': 1}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path=\"./example_data/facebook_chat.json\",\n",
|
||||
" jq_schema=\".messages[]\",\n",
|
||||
" content_key=\".content\",\n",
|
||||
" is_content_key_jq_parsable=True,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Extracting metadata\n",
|
||||
"\n",
|
||||
"Generally, we want to include metadata available in the JSON file into the documents that we create from the content.\n",
|
||||
"\n",
|
||||
"The following demonstrates how metadata can be extracted using the `JSONLoader`.\n",
|
||||
"\n",
|
||||
"There are some key changes to be noted. In the previous example where we didn't collect the metadata, we managed to directly specify in the schema where the value for the `page_content` can be extracted from.\n",
|
||||
"\n",
|
||||
"In this example, we have to tell the loader to iterate over the records in the `messages` field. The jq_schema then has to be `.messages[]`\n",
|
||||
"\n",
|
||||
"This allows us to pass the records (dict) into the `metadata_func` that has to be implemented. The `metadata_func` is responsible for identifying which pieces of information in the record should be included in the metadata stored in the final `Document` object.\n",
|
||||
"\n",
|
||||
"Additionally, we now have to explicitly specify in the loader, via the `content_key` argument, the key from the record where the value for the `page_content` needs to be extracted from."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': '/Users/isaachershenson/Documents/langchain/docs/docs/integrations/document_loaders/example_data/facebook_chat.json', 'seq_num': 1, 'sender_name': 'User 2', 'timestamp_ms': 1675597571851}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Define the metadata extraction function.\n",
|
||||
"def metadata_func(record: dict, metadata: dict) -> dict:\n",
|
||||
" metadata[\"sender_name\"] = record.get(\"sender_name\")\n",
|
||||
" metadata[\"timestamp_ms\"] = record.get(\"timestamp_ms\")\n",
|
||||
"\n",
|
||||
" return metadata\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"loader = JSONLoader(\n",
|
||||
" file_path=\"./example_data/facebook_chat.json\",\n",
|
||||
" jq_schema=\".messages[]\",\n",
|
||||
" content_key=\"content\",\n",
|
||||
" metadata_func=metadata_func,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all JSONLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.json_loader.JSONLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
294
docs/docs/integrations/document_loaders/langsmith.ipynb
Normal file
294
docs/docs/integrations/document_loaders/langsmith.ipynb
Normal file
@@ -0,0 +1,294 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: LangSmith\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# LangSmithLoader\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with the LangSmith [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all LangSmithLoader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_core.document_loaders.langsmith.LangSmithLoader.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [LangSmithLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_core.document_loaders.langsmith.LangSmithLoader.html) | [langchain-core](https://api.python.langchain.com/en/latest/core_api_reference.html) | ❌ | ❌ | ❌ | \n",
|
||||
"\n",
|
||||
"### Loader features\n",
|
||||
"| Source | Lazy loading | Native async\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| LangSmithLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access the LangSmith document loader you'll need to install `langchain-core`, create a [LangSmith](https://langsmith.com) account and get an API key.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Sign up at https://langsmith.com and generate an API key. Once you've done this set the LANGSMITH_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.environ.get(\"LANGSMITH_API_KEY\"):\n",
|
||||
" os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best-in-class tracing, you can also turn on LangSmith tracing:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install `langchain-core`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-core"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Clone example dataset\n",
|
||||
"\n",
|
||||
"For this example, we'll clone and load a public LangSmith dataset. Cloning creates a copy of this dataset on our personal LangSmith account. You can only load datasets that you have a personal copy of."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langsmith import Client as LangSmithClient\n",
|
||||
"\n",
|
||||
"ls_client = LangSmithClient()\n",
|
||||
"\n",
|
||||
"dataset_name = \"LangSmith Few Shot Datasets Notebook\"\n",
|
||||
"dataset_public_url = (\n",
|
||||
" \"https://smith.langchain.com/public/55658626-124a-4223-af45-07fb774a6212/d\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"ls_client.clone_public_dataset(dataset_public_url)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our document loader and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.document_loaders import LangSmithLoader\n",
|
||||
"\n",
|
||||
"loader = LangSmithLoader(\n",
|
||||
" dataset_name=dataset_name,\n",
|
||||
" content_key=\"question\",\n",
|
||||
" limit=50,\n",
|
||||
" # format_content=...,\n",
|
||||
" # ...\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Show me an example using Weaviate, but customizing the vectorStoreRetriever to return the top 10 k nearest neighbors. \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"print(docs[0].page_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'question': 'Show me an example using Weaviate, but customizing the vectorStoreRetriever to return the top 10 k nearest neighbors. '}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata[\"inputs\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'answer': 'To customize the Weaviate client and return the top 10 k nearest neighbors, you can utilize the `as_retriever` method with the appropriate parameters. Here\\'s how you can achieve this:\\n\\n```python\\n# Assuming you have imported the necessary modules and classes\\n\\n# Create the Weaviate client\\nclient = weaviate.Client(url=os.environ[\"WEAVIATE_URL\"], ...)\\n\\n# Initialize the Weaviate wrapper\\nweaviate = Weaviate(client, index_name, text_key)\\n\\n# Customize the client to return top 10 k nearest neighbors using as_retriever\\ncustom_retriever = weaviate.as_retriever(\\n search_type=\"similarity\",\\n search_kwargs={\\n \\'k\\': 10 # Customize the value of k as needed\\n }\\n)\\n\\n# Now you can use the custom_retriever to perform searches\\nresults = custom_retriever.search(query, ...)\\n```'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata[\"outputs\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['dataset_id',\n",
|
||||
" 'inputs',\n",
|
||||
" 'outputs',\n",
|
||||
" 'metadata',\n",
|
||||
" 'id',\n",
|
||||
" 'created_at',\n",
|
||||
" 'modified_at',\n",
|
||||
" 'runs',\n",
|
||||
" 'source_run_id']"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"list(docs[0].metadata.keys())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"10"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
" # page = []\n",
|
||||
" break\n",
|
||||
"len(page)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all LangSmithLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_core.document_loaders.langsmith.LangSmithLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "poetry-venv-311",
|
||||
"language": "python",
|
||||
"name": "poetry-venv-311"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
178
docs/docs/integrations/document_loaders/mathpix.ipynb
Normal file
178
docs/docs/integrations/document_loaders/mathpix.ipynb
Normal file
@@ -0,0 +1,178 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MathPixPDFLoader\n",
|
||||
"\n",
|
||||
"Inspired by Daniel Gross's snippet here: [https://gist.github.com/danielgross/3ab4104e14faccc12b49200843adab21](https://gist.github.com/danielgross/3ab4104e14faccc12b49200843adab21)\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [MathPixPDFLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.MathpixPDFLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| MathPixPDFLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Sign up for Mathpix and [create an API key](https://mathpix.com/docs/ocr/creating-an-api-key) to set the `MATHPIX_API_KEY` variables in your environment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if \"MATHPIX_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"MATHPIX_API_KEY\"] = getpass.getpass(\"Enter your Mathpix API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we are ready to initialize our loader:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import MathpixPDFLoader\n",
|
||||
"\n",
|
||||
"file_path = \"./example_data/layout-parser-paper.pdf\"\n",
|
||||
"loader = MathpixPDFLoader(file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all MathpixPDFLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.MathpixPDFLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,19 +1,170 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1dc7df1d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Notion DB 1/2\n",
|
||||
"# Notion DB 2/2\n",
|
||||
"\n",
|
||||
">[Notion](https://www.notion.so/) is a collaboration platform with modified Markdown support that integrates kanban boards, tasks, wikis and databases. It is an all-in-one workspace for notetaking, knowledge and data management, and project and task management.\n",
|
||||
"\n",
|
||||
"This notebook covers how to load documents from a Notion database dump.\n",
|
||||
"`NotionDBLoader` is a Python class for loading content from a `Notion` database. It retrieves pages from the database, reads their content, and returns a list of Document objects. `NotionDirectoryLoader` is used for loading data from a Notion database dump.\n",
|
||||
"\n",
|
||||
"In order to get this notion dump, follow these instructions:\n",
|
||||
"## Requirements\n",
|
||||
"\n",
|
||||
"## 🧑 Instructions for ingesting your own dataset\n",
|
||||
"- A `Notion` Database\n",
|
||||
"- Notion Integration Token\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### 1. Create a Notion Table Database\n",
|
||||
"Create a new table database in Notion. You can add any column to the database and they will be treated as metadata. For example you can add the following columns:\n",
|
||||
"\n",
|
||||
"- Title: set Title as the default property.\n",
|
||||
"- Categories: A Multi-select property to store categories associated with the page.\n",
|
||||
"- Keywords: A Multi-select property to store keywords associated with the page.\n",
|
||||
"\n",
|
||||
"Add your content to the body of each page in the database. The NotionDBLoader will extract the content and metadata from these pages.\n",
|
||||
"\n",
|
||||
"## 2. Create a Notion Integration\n",
|
||||
"To create a Notion Integration, follow these steps:\n",
|
||||
"\n",
|
||||
"1. Visit the [Notion Developers](https://www.notion.com/my-integrations) page and log in with your Notion account.\n",
|
||||
"2. Click on the \"+ New integration\" button.\n",
|
||||
"3. Give your integration a name and choose the workspace where your database is located.\n",
|
||||
"4. Select the require capabilities, this extension only need the Read content capability\n",
|
||||
"5. Click the \"Submit\" button to create the integration.\n",
|
||||
"Once the integration is created, you'll be provided with an `Integration Token (API key)`. Copy this token and keep it safe, as you'll need it to use the NotionDBLoader.\n",
|
||||
"\n",
|
||||
"### 3. Connect the Integration to the Database\n",
|
||||
"To connect your integration to the database, follow these steps:\n",
|
||||
"\n",
|
||||
"1. Open your database in Notion.\n",
|
||||
"2. Click on the three-dot menu icon in the top right corner of the database view.\n",
|
||||
"3. Click on the \"+ New integration\" button.\n",
|
||||
"4. Find your integration, you may need to start typing its name in the search box.\n",
|
||||
"5. Click on the \"Connect\" button to connect the integration to the database.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### 4. Get the Database ID\n",
|
||||
"To get the database ID, follow these steps:\n",
|
||||
"\n",
|
||||
"1. Open your database in Notion.\n",
|
||||
"2. Click on the three-dot menu icon in the top right corner of the database view.\n",
|
||||
"3. Select \"Copy link\" from the menu to copy the database URL to your clipboard.\n",
|
||||
"4. The database ID is the long string of alphanumeric characters found in the URL. It typically looks like this: https://www.notion.so/username/8935f9d140a04f95a872520c4f123456?v=.... In this example, the database ID is 8935f9d140a04f95a872520c4f123456.\n",
|
||||
"\n",
|
||||
"With the database properly set up and the integration token and database ID in hand, you can now use the NotionDBLoader code to load content and metadata from your Notion database.\n",
|
||||
"\n",
|
||||
"### 5. Installation\n",
|
||||
"\n",
|
||||
"Instaall the `langchain-community` integration package.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "412b38dc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cced2931",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"## Notion Database Loader\n",
|
||||
"NotionDBLoader is part of the langchain package's document loaders. You can use it as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "6c3a314c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"········\n",
|
||||
"········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"NOTION_TOKEN = getpass()\n",
|
||||
"DATABASE_ID = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "007c5cbf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import NotionDBLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "a1caec59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = NotionDBLoader(\n",
|
||||
" integration_token=NOTION_TOKEN,\n",
|
||||
" database_id=DATABASE_ID,\n",
|
||||
" request_timeout_sec=30, # optional, defaults to 10\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "b1c30ff7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "4f5789a2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b87ab5c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Notion Directory Loader\n",
|
||||
"\n",
|
||||
"### Setup\n",
|
||||
"\n",
|
||||
"Export your dataset from Notion. You can do this by clicking on the three dots in the upper right hand corner and then clicking `Export`.\n",
|
||||
"\n",
|
||||
@@ -27,33 +178,27 @@
|
||||
"unzip Export-d3adfe0f-3131-4bf3-8987-a52017fc1bae.zip -d Notion_DB\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Run the following command to ingest the data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "007c5cbf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import NotionDirectoryLoader"
|
||||
"### Usage\n",
|
||||
"\n",
|
||||
"Run the following command to ingest the data you just downloaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a1caec59",
|
||||
"id": "9debffdd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import NotionDirectoryLoader\n",
|
||||
"\n",
|
||||
"loader = NotionDirectoryLoader(\"Notion_DB\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b1c30ff7",
|
||||
"id": "81008087",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
||||
@@ -1,161 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1dc7df1d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Notion DB 2/2\n",
|
||||
"\n",
|
||||
">[Notion](https://www.notion.so/) is a collaboration platform with modified Markdown support that integrates kanban boards, tasks, wikis and databases. It is an all-in-one workspace for notetaking, knowledge and data management, and project and task management.\n",
|
||||
"\n",
|
||||
"`NotionDBLoader` is a Python class for loading content from a `Notion` database. It retrieves pages from the database, reads their content, and returns a list of Document objects.\n",
|
||||
"\n",
|
||||
"## Requirements\n",
|
||||
"\n",
|
||||
"- A `Notion` Database\n",
|
||||
"- Notion Integration Token\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### 1. Create a Notion Table Database\n",
|
||||
"Create a new table database in Notion. You can add any column to the database and they will be treated as metadata. For example you can add the following columns:\n",
|
||||
"\n",
|
||||
"- Title: set Title as the default property.\n",
|
||||
"- Categories: A Multi-select property to store categories associated with the page.\n",
|
||||
"- Keywords: A Multi-select property to store keywords associated with the page.\n",
|
||||
"\n",
|
||||
"Add your content to the body of each page in the database. The NotionDBLoader will extract the content and metadata from these pages.\n",
|
||||
"\n",
|
||||
"## 2. Create a Notion Integration\n",
|
||||
"To create a Notion Integration, follow these steps:\n",
|
||||
"\n",
|
||||
"1. Visit the [Notion Developers](https://www.notion.com/my-integrations) page and log in with your Notion account.\n",
|
||||
"2. Click on the \"+ New integration\" button.\n",
|
||||
"3. Give your integration a name and choose the workspace where your database is located.\n",
|
||||
"4. Select the require capabilities, this extension only need the Read content capability\n",
|
||||
"5. Click the \"Submit\" button to create the integration.\n",
|
||||
"Once the integration is created, you'll be provided with an `Integration Token (API key)`. Copy this token and keep it safe, as you'll need it to use the NotionDBLoader.\n",
|
||||
"\n",
|
||||
"### 3. Connect the Integration to the Database\n",
|
||||
"To connect your integration to the database, follow these steps:\n",
|
||||
"\n",
|
||||
"1. Open your database in Notion.\n",
|
||||
"2. Click on the three-dot menu icon in the top right corner of the database view.\n",
|
||||
"3. Click on the \"+ New integration\" button.\n",
|
||||
"4. Find your integration, you may need to start typing its name in the search box.\n",
|
||||
"5. Click on the \"Connect\" button to connect the integration to the database.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### 4. Get the Database ID\n",
|
||||
"To get the database ID, follow these steps:\n",
|
||||
"\n",
|
||||
"1. Open your database in Notion.\n",
|
||||
"2. Click on the three-dot menu icon in the top right corner of the database view.\n",
|
||||
"3. Select \"Copy link\" from the menu to copy the database URL to your clipboard.\n",
|
||||
"4. The database ID is the long string of alphanumeric characters found in the URL. It typically looks like this: https://www.notion.so/username/8935f9d140a04f95a872520c4f123456?v=.... In this example, the database ID is 8935f9d140a04f95a872520c4f123456.\n",
|
||||
"\n",
|
||||
"With the database properly set up and the integration token and database ID in hand, you can now use the NotionDBLoader code to load content and metadata from your Notion database.\n",
|
||||
"\n",
|
||||
"## Usage\n",
|
||||
"NotionDBLoader is part of the langchain package's document loaders. You can use it as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "6c3a314c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"········\n",
|
||||
"········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"NOTION_TOKEN = getpass()\n",
|
||||
"DATABASE_ID = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "007c5cbf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import NotionDBLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "a1caec59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = NotionDBLoader(\n",
|
||||
" integration_token=NOTION_TOKEN,\n",
|
||||
" database_id=DATABASE_ID,\n",
|
||||
" request_timeout_sec=30, # optional, defaults to 10\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "b1c30ff7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"docs = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "4f5789a2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
317
docs/docs/integrations/document_loaders/pdfminer.ipynb
Normal file
317
docs/docs/integrations/document_loaders/pdfminer.ipynb
Normal file
File diff suppressed because one or more lines are too long
183
docs/docs/integrations/document_loaders/pdfplumber.ipynb
Normal file
183
docs/docs/integrations/document_loaders/pdfplumber.ipynb
Normal file
@@ -0,0 +1,183 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PDFPlumber\n",
|
||||
"\n",
|
||||
"Like PyMuPDF, the output Documents contain detailed metadata about the PDF and its pages, and returns one document per page.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PDFPlumberLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PDFPlumberLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PDFPlumberLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed to use this loader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PDFPlumberLoader\n",
|
||||
"\n",
|
||||
"loader = PDFPlumberLoader(\"./example_data/layout-parser-paper.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'file_path': './example_data/layout-parser-paper.pdf', 'page': 0, 'total_pages': 16, 'Author': '', 'CreationDate': 'D:20210622012710Z', 'Creator': 'LaTeX with hyperref', 'Keywords': '', 'ModDate': 'D:20210622012710Z', 'PTEX.Fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'Producer': 'pdfTeX-1.40.21', 'Subject': '', 'Title': '', 'Trapped': 'False'}, page_content='LayoutParser: A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1 ((cid:0)), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1 Allen Institute for AI\\nshannons@allenai.org\\n2 Brown University\\nruochen zhang@brown.edu\\n3 Harvard University\\n{melissadell,jacob carlson}@fas.harvard.edu\\n4 University of Washington\\nbcgl@cs.washington.edu\\n5 University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recentadvancesindocumentimageanalysis(DIA)havebeen\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomescouldbeeasilydeployedinproductionandextendedforfurther\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportantinnovationsbyawideaudience.Thoughtherehavebeenon-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopmentindisciplineslikenaturallanguageprocessingandcomputer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademicresearchacross awiderangeof disciplinesinthesocialsciences\\nand humanities. This paper introduces LayoutParser, an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitiveinterfacesforapplyingandcustomizingDLmodelsforlayoutde-\\ntection,characterrecognition,andmanyotherdocumentprocessingtasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io.\\nKeywords: DocumentImageAnalysis·DeepLearning·LayoutAnalysis\\n· Character Recognition · Open Source library · Toolkit.\\n1 Introduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocumentimageanalysis(DIA)tasksincludingdocumentimageclassification[11,\\n1202\\nnuJ\\n12\\n]VC.sc[\\n2v84351.3012:viXra\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'file_path': './example_data/layout-parser-paper.pdf', 'page': 0, 'total_pages': 16, 'Author': '', 'CreationDate': 'D:20210622012710Z', 'Creator': 'LaTeX with hyperref', 'Keywords': '', 'ModDate': 'D:20210622012710Z', 'PTEX.Fullbanner': 'This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2', 'Producer': 'pdfTeX-1.40.21', 'Subject': '', 'Title': '', 'Trapped': 'False'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all PDFPlumberLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PDFPlumberLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
185
docs/docs/integrations/document_loaders/pymupdf.ipynb
Normal file
185
docs/docs/integrations/document_loaders/pymupdf.ipynb
Normal file
@@ -0,0 +1,185 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PyMuPDF\n",
|
||||
"\n",
|
||||
"`PyMuPDF` is optimized for speed, and contains detailed metadata about the PDF and its pages. It returns one document per page.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PyMuPDFLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyMuPDFLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PyMuPDFLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed to use the `PyMuPDFLoader`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community** and **pymupdf**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community pymupdf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can initialize our loader and start loading documents. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PyMuPDFLoader\n",
|
||||
"\n",
|
||||
"loader = PyMuPDFLoader(\"./example_data/layout-parser-paper.pdf\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load\n",
|
||||
"\n",
|
||||
"You can pass along any of the options from the [PyMuPDF documentation](https://pymupdf.readthedocs.io/en/latest/app1.html#plain-text/) as keyword arguments in the `load` call, and it will be pass along to the `get_text()` call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'file_path': './example_data/layout-parser-paper.pdf', 'page': 0, 'total_pages': 16, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'LaTeX with hyperref', 'producer': 'pdfTeX-1.40.21', 'creationDate': 'D:20210622012710Z', 'modDate': 'D:20210622012710Z', 'trapped': ''}, page_content='LayoutParser: A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1 (\\x00), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1 Allen Institute for AI\\nshannons@allenai.org\\n2 Brown University\\nruochen zhang@brown.edu\\n3 Harvard University\\n{melissadell,jacob carlson}@fas.harvard.edu\\n4 University of Washington\\nbcgl@cs.washington.edu\\n5 University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser, an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io.\\nKeywords: Document Image Analysis · Deep Learning · Layout Analysis\\n· Character Recognition · Open Source library · Toolkit.\\n1\\nIntroduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classification [11,\\narXiv:2103.15348v2 [cs.CV] 21 Jun 2021\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'file_path': './example_data/layout-parser-paper.pdf', 'page': 0, 'total_pages': 16, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'LaTeX with hyperref', 'producer': 'pdfTeX-1.40.21', 'creationDate': 'D:20210622012710Z', 'modDate': 'D:20210622012710Z', 'trapped': ''}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all PyMuPDFLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyMuPDFLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
187
docs/docs/integrations/document_loaders/pypdfdirectory.ipynb
Normal file
187
docs/docs/integrations/document_loaders/pypdfdirectory.ipynb
Normal file
@@ -0,0 +1,187 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PyPDFDirectoryLoader\n",
|
||||
"\n",
|
||||
"This loader loads all PDF files from a specific directory.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PyPDFDirectoryLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFDirectoryLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PyPDFDirectoryLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed for this loader."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PyPDFDirectoryLoader\n",
|
||||
"\n",
|
||||
"directory_path = (\n",
|
||||
" \"../../docs/integrations/document_loaders/example_data/layout-parser-paper.pdf\"\n",
|
||||
")\n",
|
||||
"loader = PyPDFDirectoryLoader(\"example_data/\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': 'example_data/layout-parser-paper.pdf', 'page': 0}, page_content='LayoutParser : A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1( \\x00), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1Allen Institute for AI\\nshannons@allenai.org\\n2Brown University\\nruochen zhang@brown.edu\\n3Harvard University\\n{melissadell,jacob carlson }@fas.harvard.edu\\n4University of Washington\\nbcgl@cs.washington.edu\\n5University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser , an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io .\\nKeywords: Document Image Analysis ·Deep Learning ·Layout Analysis\\n·Character Recognition ·Open Source library ·Toolkit.\\n1 Introduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classification [ 11,arXiv:2103.15348v2 [cs.CV] 21 Jun 2021')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': 'example_data/layout-parser-paper.pdf', 'page': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all PyPDFDirectoryLoader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFDirectoryLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
188
docs/docs/integrations/document_loaders/pypdfium2.ipynb
Normal file
188
docs/docs/integrations/document_loaders/pypdfium2.ipynb
Normal file
@@ -0,0 +1,188 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PyPDFium2Loader\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with PyPDFium2 [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFium2Loader.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PyPDFium2Loader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFium2Loader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PyPDFium2Loader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"To access PyPDFium2 document loader you'll need to install the `langchain-community` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are needed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"Install **langchain_community**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PyPDFium2Loader\n",
|
||||
"\n",
|
||||
"file_path = \"./example_data/layout-parser-paper.pdf\"\n",
|
||||
"loader = PyPDFium2Loader(file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'page': 0}, page_content='LayoutParser: A Unified Toolkit for Deep\\r\\nLearning Based Document Image Analysis\\r\\nZejiang Shen\\r\\n1\\r\\n(), Ruochen Zhang\\r\\n2\\r\\n, Melissa Dell\\r\\n3\\r\\n, Benjamin Charles Germain\\r\\nLee\\r\\n4\\r\\n, Jacob Carlson\\r\\n3\\r\\n, and Weining Li\\r\\n5\\r\\n1 Allen Institute for AI\\r\\nshannons@allenai.org 2 Brown University\\r\\nruochen zhang@brown.edu 3 Harvard University\\r\\n{melissadell,jacob carlson}@fas.harvard.edu\\r\\n4 University of Washington\\r\\nbcgl@cs.washington.edu 5 University of Waterloo\\r\\nw422li@uwaterloo.ca\\r\\nAbstract. Recent advances in document image analysis (DIA) have been\\r\\nprimarily driven by the application of neural networks. Ideally, research\\r\\noutcomes could be easily deployed in production and extended for further\\r\\ninvestigation. However, various factors like loosely organized codebases\\r\\nand sophisticated model configurations complicate the easy reuse of im\\x02portant innovations by a wide audience. Though there have been on-going\\r\\nefforts to improve reusability and simplify deep learning (DL) model\\r\\ndevelopment in disciplines like natural language processing and computer\\r\\nvision, none of them are optimized for challenges in the domain of DIA.\\r\\nThis represents a major gap in the existing toolkit, as DIA is central to\\r\\nacademic research across a wide range of disciplines in the social sciences\\r\\nand humanities. This paper introduces LayoutParser, an open-source\\r\\nlibrary for streamlining the usage of DL in DIA research and applica\\x02tions. The core LayoutParser library comes with a set of simple and\\r\\nintuitive interfaces for applying and customizing DL models for layout de\\x02tection, character recognition, and many other document processing tasks.\\r\\nTo promote extensibility, LayoutParser also incorporates a community\\r\\nplatform for sharing both pre-trained models and full document digiti\\x02zation pipelines. We demonstrate that LayoutParser is helpful for both\\r\\nlightweight and large-scale digitization pipelines in real-word use cases.\\r\\nThe library is publicly available at https://layout-parser.github.io.\\r\\nKeywords: Document Image Analysis· Deep Learning· Layout Analysis\\r\\n· Character Recognition· Open Source library· Toolkit.\\r\\n1 Introduction\\r\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\r\\ndocument image analysis (DIA) tasks including document image classification [11,\\r\\narXiv:2103.15348v2 [cs.CV] 21 Jun 2021\\n')"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'page': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all PyPDFium2Loader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFium2Loader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
202
docs/docs/integrations/document_loaders/pypdfloader.ipynb
Normal file
202
docs/docs/integrations/document_loaders/pypdfloader.ipynb
Normal file
@@ -0,0 +1,202 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PyPDFLoader\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with `PyPDF` [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all DocumentLoader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFLoader.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PyPDFLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PyPDFLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are required to use `PyPDFLoader`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"To use `PyPDFLoader` you need to have the `langchain-community` python package downloaded:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community pypdf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PyPDFLoader\n",
|
||||
"\n",
|
||||
"loader = PyPDFLoader(\n",
|
||||
" \"./example_data/layout-parser-paper.pdf\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'page': 0}, page_content='LayoutParser : A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1( \\x00), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1Allen Institute for AI\\nshannons@allenai.org\\n2Brown University\\nruochen zhang@brown.edu\\n3Harvard University\\n{melissadell,jacob carlson }@fas.harvard.edu\\n4University of Washington\\nbcgl@cs.washington.edu\\n5University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser , an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io .\\nKeywords: Document Image Analysis ·Deep Learning ·Layout Analysis\\n·Character Recognition ·Open Source library ·Toolkit.\\n1 Introduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classification [ 11,arXiv:2103.15348v2 [cs.CV] 21 Jun 2021')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'page': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"6"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pages = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" pages.append(doc)\n",
|
||||
" if len(pages) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" pages = []\n",
|
||||
"len(pages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"LayoutParser : A Unified Toolkit for DL-Based DIA 11\n",
|
||||
"focuses on precision, efficiency, and robustness. \n",
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'page': 10}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(pages[0].page_content[:100])\n",
|
||||
"print(pages[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `PyPDFLoader` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -7,7 +7,18 @@
|
||||
"source": [
|
||||
"# Recursive URL\n",
|
||||
"\n",
|
||||
"The `RecursiveUrlLoader` lets you recursively scrape all child links from a root URL and parse them into Documents."
|
||||
"The `RecursiveUrlLoader` lets you recursively scrape all child links from a root URL and parse them into Documents.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/web_loaders/recursive_url_loader/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [RecursiveUrlLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.recursive_url_loader.RecursiveUrlLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| RecursiveUrlLoader | ✅ | ❌ | \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -17,6 +28,12 @@
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are required to use the `RecursiveUrlLoader`.\n",
|
||||
"\n",
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The `RecursiveUrlLoader` lives in the `langchain-community` package. There's no other required packages, though you will get richer default Document metadata if you have ``beautifulsoup4` installed as well."
|
||||
]
|
||||
},
|
||||
@@ -186,6 +203,50 @@
|
||||
"That certainly looks like HTML that comes from the url https://docs.python.org/3.9/, which is what we expected. Let's now look at some variations we can make to our basic example that can be helpful in different situations. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b17b7202",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy loading\n",
|
||||
"\n",
|
||||
"If we're loading a large number of Documents and our downstream operations can be done over subsets of all loaded Documents, we can lazily load our Documents one at a time to minimize our memory footprint:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4b13e4d1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/4j/2rz3865x6qg07tx43146py8h0000gn/T/ipykernel_73962/2110507528.py:6: XMLParsedAsHTMLWarning: It looks like you're parsing an XML document using an HTML parser. If this really is an HTML document (maybe it's XHTML?), you can ignore or filter this warning. If it's XML, you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the lxml package installed, and pass the keyword argument `features=\"xml\"` into the BeautifulSoup constructor.\n",
|
||||
" soup = BeautifulSoup(html, \"lxml\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pages = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" pages.append(doc)\n",
|
||||
" if len(pages) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" pages = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fb039682",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this example we never have more than 10 Documents loaded into memory at a time."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8f41cc89",
|
||||
@@ -256,50 +317,6 @@
|
||||
"You can similarly pass in a `metadata_extractor` to customize how Document metadata is extracted from the HTTP response. See the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.recursive_url_loader.RecursiveUrlLoader.html) for more on this."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1dddbc94",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy loading\n",
|
||||
"\n",
|
||||
"If we're loading a large number of Documents and our downstream operations can be done over subsets of all loaded Documents, we can lazily load our Documents one at a time to minimize our memory footprint:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "7d0114fc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/4j/2rz3865x6qg07tx43146py8h0000gn/T/ipykernel_73962/2110507528.py:6: XMLParsedAsHTMLWarning: It looks like you're parsing an XML document using an HTML parser. If this really is an HTML document (maybe it's XHTML?), you can ignore or filter this warning. If it's XML, you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the lxml package installed, and pass the keyword argument `features=\"xml\"` into the BeautifulSoup constructor.\n",
|
||||
" soup = BeautifulSoup(html, \"lxml\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f88a7c2f-35df-4c3a-b238-f91be2674b96",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this example we never have more than 10 Documents loaded into memory at a time."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3e4d1c8f",
|
||||
|
||||
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user