mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-09 10:41:52 +00:00
Compare commits
48 Commits
langchain-
...
langchain-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
252f0877d1 | ||
|
|
217a915b29 | ||
|
|
056c7c2983 | ||
|
|
1adc161642 | ||
|
|
deb27d8970 | ||
|
|
5efd0fe9ae | ||
|
|
1c9917dfa2 | ||
|
|
ccff1ba8b8 | ||
|
|
53ee5770d3 | ||
|
|
8626abf8b5 | ||
|
|
1af8456a2c | ||
|
|
0a3500808d | ||
|
|
ee8a585791 | ||
|
|
e77eeee6ee | ||
|
|
9927a4866d | ||
|
|
420534c8ca | ||
|
|
794f28d4e2 | ||
|
|
f28ae20b81 | ||
|
|
9f0eda6a18 | ||
|
|
472527166f | ||
|
|
074fa0db73 | ||
|
|
4fd1efc48f | ||
|
|
aa2722cbe2 | ||
|
|
a82c0533f2 | ||
|
|
bc60cddc1b | ||
|
|
43deed2a95 | ||
|
|
9cd608efb3 | ||
|
|
fd546196ef | ||
|
|
6dd9f053e3 | ||
|
|
ca9dcee940 | ||
|
|
dadb6f1445 | ||
|
|
b6f0174bb9 | ||
|
|
c3ced4c6ce | ||
|
|
bd6c31617e | ||
|
|
6e57aa7c36 | ||
|
|
a2b4c33bd6 | ||
|
|
4825dc0d76 | ||
|
|
02300471be | ||
|
|
66b7206ab6 | ||
|
|
c81c77b465 | ||
|
|
3b7437d184 | ||
|
|
91ea4b7449 | ||
|
|
652b3fa4a4 | ||
|
|
7040013140 | ||
|
|
dc7423e88f | ||
|
|
25f2e25be1 | ||
|
|
786ef021a3 | ||
|
|
429a0ee7fd |
@@ -39,7 +39,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install langchain langchain-chroma unstructured[all-docs] pydantic lxml langchainhub"
|
||||
"! pip install langchain langchain-chroma \"unstructured[all-docs]\" pydantic lxml langchainhub"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -59,7 +59,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install langchain langchain-chroma unstructured[all-docs] pydantic lxml"
|
||||
"! pip install langchain langchain-chroma \"unstructured[all-docs]\" pydantic lxml"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -59,7 +59,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install langchain langchain-chroma unstructured[all-docs] pydantic lxml"
|
||||
"! pip install langchain langchain-chroma \"unstructured[all-docs]\" pydantic lxml"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
"\n",
|
||||
"You can use arbitrary functions as [Runnables](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable). This is useful for formatting or when you need functionality not provided by other LangChain components, and custom functions used as Runnables are called [`RunnableLambdas`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableLambda.html).\n",
|
||||
"\n",
|
||||
"Note that all inputs to these functions need to be a SINGLE argument. If you have a function that accepts multiple arguments, you should write a wrapper that accepts a single dict input and unpacks it into multiple argument.\n",
|
||||
"Note that all inputs to these functions need to be a SINGLE argument. If you have a function that accepts multiple arguments, you should write a wrapper that accepts a single dict input and unpacks it into multiple arguments.\n",
|
||||
"\n",
|
||||
"This guide will cover:\n",
|
||||
"\n",
|
||||
|
||||
@@ -721,9 +721,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
|
||||
"from langgraph.checkpoint.memory import MemorySaver\n",
|
||||
"\n",
|
||||
"memory = SqliteSaver.from_conn_string(\":memory:\")\n",
|
||||
"memory = MemorySaver()\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(llm, tools, checkpointer=memory)"
|
||||
]
|
||||
@@ -890,9 +890,9 @@
|
||||
"from langchain_community.document_loaders import WebBaseLoader\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
|
||||
"from langgraph.checkpoint.memory import MemorySaver\n",
|
||||
"\n",
|
||||
"memory = SqliteSaver.from_conn_string(\":memory:\")\n",
|
||||
"memory = MemorySaver()\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
File diff suppressed because one or more lines are too long
182
docs/docs/integrations/document_loaders/pypdfloader.ipynb
Normal file
182
docs/docs/integrations/document_loaders/pypdfloader.ipynb
Normal file
@@ -0,0 +1,182 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PyPDFLoader\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with `PyPDF` [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFLoader.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PyPDFLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PyPDFLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are required to use `PyPDFLoader`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"To use `PyPDFLoader` you need to have the `langchain-community` python package downloaded:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PyPDFLoader\n",
|
||||
"\n",
|
||||
"loader = PyPDFLoader(\n",
|
||||
" \"./example_data/layout-parser-paper.pdf\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'page': 0}, page_content='LayoutParser : A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1( \\x00), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1Allen Institute for AI\\nshannons@allenai.org\\n2Brown University\\nruochen zhang@brown.edu\\n3Harvard University\\n{melissadell,jacob carlson }@fas.harvard.edu\\n4University of Washington\\nbcgl@cs.washington.edu\\n5University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser , an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io .\\nKeywords: Document Image Analysis ·Deep Learning ·Layout Analysis\\n·Character Recognition ·Open Source library ·Toolkit.\\n1 Introduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classification [ 11,arXiv:2103.15348v2 [cs.CV] 21 Jun 2021')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'page': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"6"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []\n",
|
||||
"len(page)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `PyPDFLoader` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -7,7 +7,18 @@
|
||||
"source": [
|
||||
"# Recursive URL\n",
|
||||
"\n",
|
||||
"The `RecursiveUrlLoader` lets you recursively scrape all child links from a root URL and parse them into Documents."
|
||||
"The `RecursiveUrlLoader` lets you recursively scrape all child links from a root URL and parse them into Documents.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/web_loaders/recursive_url_loader/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [RecursiveUrlLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.recursive_url_loader.RecursiveUrlLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| RecursiveUrlLoader | ✅ | ❌ | \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -17,6 +28,12 @@
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are required to use the `RecursiveUrlLoader`.\n",
|
||||
"\n",
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The `RecursiveUrlLoader` lives in the `langchain-community` package. There's no other required packages, though you will get richer default Document metadata if you have ``beautifulsoup4` installed as well."
|
||||
]
|
||||
},
|
||||
@@ -186,6 +203,50 @@
|
||||
"That certainly looks like HTML that comes from the url https://docs.python.org/3.9/, which is what we expected. Let's now look at some variations we can make to our basic example that can be helpful in different situations. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b17b7202",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy loading\n",
|
||||
"\n",
|
||||
"If we're loading a large number of Documents and our downstream operations can be done over subsets of all loaded Documents, we can lazily load our Documents one at a time to minimize our memory footprint:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4b13e4d1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/4j/2rz3865x6qg07tx43146py8h0000gn/T/ipykernel_73962/2110507528.py:6: XMLParsedAsHTMLWarning: It looks like you're parsing an XML document using an HTML parser. If this really is an HTML document (maybe it's XHTML?), you can ignore or filter this warning. If it's XML, you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the lxml package installed, and pass the keyword argument `features=\"xml\"` into the BeautifulSoup constructor.\n",
|
||||
" soup = BeautifulSoup(html, \"lxml\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fb039682",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this example we never have more than 10 Documents loaded into memory at a time."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8f41cc89",
|
||||
@@ -256,50 +317,6 @@
|
||||
"You can similarly pass in a `metadata_extractor` to customize how Document metadata is extracted from the HTTP response. See the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.recursive_url_loader.RecursiveUrlLoader.html) for more on this."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1dddbc94",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy loading\n",
|
||||
"\n",
|
||||
"If we're loading a large number of Documents and our downstream operations can be done over subsets of all loaded Documents, we can lazily load our Documents one at a time to minimize our memory footprint:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "7d0114fc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/4j/2rz3865x6qg07tx43146py8h0000gn/T/ipykernel_73962/2110507528.py:6: XMLParsedAsHTMLWarning: It looks like you're parsing an XML document using an HTML parser. If this really is an HTML document (maybe it's XHTML?), you can ignore or filter this warning. If it's XML, you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the lxml package installed, and pass the keyword argument `features=\"xml\"` into the BeautifulSoup constructor.\n",
|
||||
" soup = BeautifulSoup(html, \"lxml\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f88a7c2f-35df-4c3a-b238-f91be2674b96",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this example we never have more than 10 Documents loaded into memory at a time."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3e4d1c8f",
|
||||
|
||||
@@ -7,20 +7,41 @@
|
||||
"source": [
|
||||
"# Unstructured\n",
|
||||
"\n",
|
||||
"This notebook covers how to use `Unstructured` package to load files of many types. `Unstructured` currently supports loading of text files, powerpoints, html, pdfs, images, and more.\n",
|
||||
"This notebook covers how to use `Unstructured` [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders) to load files of many types. `Unstructured` currently supports loading of text files, powerpoints, html, pdfs, images, and more.\n",
|
||||
"\n",
|
||||
"Please see [this guide](/docs/integrations/providers/unstructured/) for more instructions on setting up Unstructured locally, including setting up required system dependencies."
|
||||
"Please see [this guide](../../integrations/providers/unstructured.mdx) for more instructions on setting up Unstructured locally, including setting up required system dependencies.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/file_loaders/unstructured/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [UnstructuredLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_unstructured.document_loaders.UnstructuredLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/unstructured_api_reference.html) | ✅ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| UnstructuredLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"By default, `langchain-unstructured` installs a smaller footprint that requires offloading of the partitioning logic to the Unstructured API, which requires an API key. If you use the local installation, you do not need an API key. To get your API key, head over to [this site](https://unstructured.io) and get an API key, and then set it in the cell below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "2886982e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install package, compatible with API partitioning\n",
|
||||
"%pip install --upgrade --quiet \"langchain-unstructured\""
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"UNSTRUCTURED_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter your Unstructured API key: \"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -28,15 +49,32 @@
|
||||
"id": "e75e2a6d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Local Partitioning (Optional)\n",
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"By default, `langchain-unstructured` installs a smaller footprint that requires\n",
|
||||
"offloading of the partitioning logic to the Unstructured API, which requires an `api_key`. For\n",
|
||||
"partitioning using the API, refer to the Unstructured API section below.\n",
|
||||
"#### Normal Installation\n",
|
||||
"\n",
|
||||
"If you would like to run the partitioning logic locally, you will need to install\n",
|
||||
"a combination of system dependencies, as outlined in the \n",
|
||||
"[Unstructured documentation here](https://docs.unstructured.io/open-source/installation/full-installation).\n",
|
||||
"The following packages are required to run the rest of this notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d9de83b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install package, compatible with API partitioning\n",
|
||||
"%pip install --upgrade --quiet langchain-unstructured unstructured-client unstructured \"unstructured[pdf]\" python-magic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "637eda35",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Installation for Local\n",
|
||||
"\n",
|
||||
"If you would like to run the partitioning logic locally, you will need to install a combination of system dependencies, as outlined in the [Unstructured documentation here](https://docs.unstructured.io/open-source/installation/full-installation).\n",
|
||||
"\n",
|
||||
"For example, on Macs you can install the required dependencies with:\n",
|
||||
"\n",
|
||||
@@ -48,7 +86,7 @@
|
||||
"brew install libxml2 libxslt\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You can install the required `pip` dependencies with:\n",
|
||||
"You can install the required `pip` dependencies needed for local with:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install \"langchain-unstructured[local]\"\n",
|
||||
@@ -60,120 +98,117 @@
|
||||
"id": "a9c1c775",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Quickstart\n",
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"To simply load a file as a document, you can use the LangChain `DocumentLoader.load` \n",
|
||||
"interface:"
|
||||
"The `UnstructuredLoader` allows loading from a variety of different file types. To read all about the `unstructured` package please refer to their [documentation](https://docs.unstructured.io/open-source/introduction/overview)/. In this example, we show loading from both a text file and a PDF file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "79d3e549",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_unstructured import UnstructuredLoader\n",
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(\"./example_data/state_of_the_union.txt\")\n",
|
||||
"file_paths = [\n",
|
||||
" \"./example_data/layout-parser-paper.pdf\",\n",
|
||||
" \"./example_data/state_of_the_union.txt\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"docs = loader.load()"
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(file_paths)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b4ab0a79",
|
||||
"id": "8b68dcab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load list of files"
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "092d9a0b",
|
||||
"execution_count": 2,
|
||||
"id": "8da59ef8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO: NumExpr defaulting to 12 threads.\n",
|
||||
"INFO: pikepdf C++ to Python logger bridge initialized\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}, page_content='1 2 0 2')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "97f7aa1f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"whatsapp_chat.txt : 1/22/23, 6:30 PM - User 1: Hi! Im interested in your bag. Im offering $50. Let me know if you are in\n",
|
||||
"state_of_the_union.txt : May God bless you all. May God protect our troops.\n"
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"file_paths = [\n",
|
||||
" \"./example_data/whatsapp_chat.txt\",\n",
|
||||
" \"./example_data/state_of_the_union.txt\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(file_paths)\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"\n",
|
||||
"print(docs[0].metadata.get(\"filename\"), \": \", docs[0].page_content[:100])\n",
|
||||
"print(docs[-1].metadata.get(\"filename\"), \": \", docs[-1].page_content[:100])"
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8de9ef16",
|
||||
"id": "0d7f991b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## PDF Example\n",
|
||||
"\n",
|
||||
"Processing PDF documents works exactly the same way. Unstructured detects the file type and extracts the same types of elements."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "672733fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Define a Partitioning Strategy\n",
|
||||
"\n",
|
||||
"Unstructured document loader allow users to pass in a `strategy` parameter that lets Unstructured\n",
|
||||
"know how to partition pdf and other OCR'd documents. Currently supported strategies are `\"auto\"`,\n",
|
||||
"`\"hi_res\"`, `\"ocr_only\"`, and `\"fast\"`. Learn more about the different strategies\n",
|
||||
"[here](https://docs.unstructured.io/open-source/core-functionality/partitioning#partition-pdf). \n",
|
||||
"\n",
|
||||
"Not all document types have separate hi res and fast partitioning strategies. For those document types, the `strategy` kwarg is\n",
|
||||
"ignored. In some cases, the high res strategy will fallback to fast if there is a dependency missing\n",
|
||||
"(i.e. a model for document partitioning). You can see how to apply a strategy to an\n",
|
||||
"`UnstructuredLoader` below."
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "60685353",
|
||||
"execution_count": 4,
|
||||
"id": "b05604d2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 393.9), (16.34, 560.0), (36.34, 560.0), (36.34, 393.9)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'parent_id': '89565df026a24279aaea20dc08cedbec', 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'e9fa370aef7ee5c05744eb7bb7d9981b'}, page_content='2 v 8 4 3 5 1 . 3 0 1 2 : v i X r a'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((157.62199999999999, 114.23496279999995), (157.62199999999999, 146.5141628), (457.7358962799999, 146.5141628), (457.7358962799999, 114.23496279999995)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'Title', 'element_id': 'bde0b230a1aa488e3ce837d33015181b'}, page_content='LayoutParser: A Unified Toolkit for Deep Learning Based Document Image Analysis'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((134.809, 168.64029940800003), (134.809, 192.2517444), (480.5464199080001, 192.2517444), (480.5464199080001, 168.64029940800003)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'parent_id': 'bde0b230a1aa488e3ce837d33015181b', 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': '54700f902899f0c8c90488fa8d825bce'}, page_content='Zejiang Shen1 ((cid:0)), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain Lee4, Jacob Carlson3, and Weining Li5'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((207.23000000000002, 202.57205439999996), (207.23000000000002, 311.8195408), (408.12676, 311.8195408), (408.12676, 202.57205439999996)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'parent_id': 'bde0b230a1aa488e3ce837d33015181b', 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'b650f5867bad9bb4e30384282c79bcfe'}, page_content='1 Allen Institute for AI shannons@allenai.org 2 Brown University ruochen zhang@brown.edu 3 Harvard University {melissadell,jacob carlson}@fas.harvard.edu 4 University of Washington bcgl@cs.washington.edu 5 University of Waterloo w422li@uwaterloo.ca'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((162.779, 338.45008160000003), (162.779, 566.8455408), (454.0372021523199, 566.8455408), (454.0372021523199, 338.45008160000003)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'links': [{'text': ':// layout - parser . github . io', 'url': 'https://layout-parser.github.io', 'start_index': 1477}], 'page_number': 1, 'parent_id': 'bde0b230a1aa488e3ce837d33015181b', 'filetype': 'application/pdf', 'category': 'NarrativeText', 'element_id': 'cfc957c94fe63c8fd7c7f4bcb56e75a7'}, page_content='Abstract. Recent advances in document image analysis (DIA) have been primarily driven by the application of neural networks. Ideally, research outcomes could be easily deployed in production and extended for further investigation. However, various factors like loosely organized codebases and sophisticated model configurations complicate the easy reuse of im- portant innovations by a wide audience. Though there have been on-going efforts to improve reusability and simplify deep learning (DL) model development in disciplines like natural language processing and computer vision, none of them are optimized for challenges in the domain of DIA. This represents a major gap in the existing toolkit, as DIA is central to academic research across a wide range of disciplines in the social sciences and humanities. This paper introduces LayoutParser, an open-source library for streamlining the usage of DL in DIA research and applica- tions. The core LayoutParser library comes with a set of simple and intuitive interfaces for applying and customizing DL models for layout de- tection, character recognition, and many other document processing tasks. To promote extensibility, LayoutParser also incorporates a community platform for sharing both pre-trained models and full document digiti- zation pipelines. We demonstrate that LayoutParser is helpful for both lightweight and large-scale digitization pipelines in real-word use cases. The library is publicly available at https://layout-parser.github.io.')]"
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}, page_content='1 2 0 2')"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_unstructured import UnstructuredLoader\n",
|
||||
"pages = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" pages.append(doc)\n",
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(\"./example_data/layout-parser-paper.pdf\", strategy=\"fast\")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"\n",
|
||||
"docs[5:10]"
|
||||
"pages[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -242,23 +277,6 @@
|
||||
"if you’d like to self-host the Unstructured API or run it locally."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6e5fde16",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install package\n",
|
||||
"%pip install \"langchain-unstructured\"\n",
|
||||
"%pip install \"unstructured-client\"\n",
|
||||
"\n",
|
||||
"# Set API key\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"UNSTRUCTURED_API_KEY\"] = \"FAKE_API_KEY\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
@@ -496,6 +514,16 @@
|
||||
"print(\"Number of LangChain documents:\", len(docs))\n",
|
||||
"print(\"Length of text in the document:\", len(docs[0].page_content))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ce01aa40",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `UnstructuredLoader` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_unstructured.document_loaders.UnstructuredLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -514,7 +542,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -243,7 +243,7 @@
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `GmailToolkit` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.slack.toolkit.SlackToolkit.html)."
|
||||
"For detailed documentation of all `GmailToolkit` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.gmail.toolkit.GmailToolkit.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -34,7 +34,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-qdrant 'qdrant-client[fastembed]'"
|
||||
"%pip install -qU langchain-qdrant"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -628,7 +628,7 @@
|
||||
"id": "525e3582",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For a full list of all the search functions available for a `QdrantVectorStore`, read the [API reference](https://api.python.langchain.com/en/latest/vectorstores/langchain_qdrant.vectorstores.Qdrant.html)\n",
|
||||
"For a full list of all the search functions available for a `QdrantVectorStore`, read the [API reference](https://api.python.langchain.com/en/latest/qdrant/langchain_qdrant.qdrant.QdrantVectorStore.html)\n",
|
||||
"\n",
|
||||
"### Metadata filtering\n",
|
||||
"\n",
|
||||
@@ -814,7 +814,7 @@
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `QdrantVectorStore` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/vectorstores/langchain_qdrant.vectorstores.Qdrant.html"
|
||||
"For detailed documentation of all `QdrantVectorStore` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/qdrant/langchain_qdrant.qdrant.QdrantVectorStore.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -87,18 +87,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \\n\\nIn this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \\n\\nLet each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \\n\\nPlease rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \\n\\nThroughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos. \\n\\nThey keep moving. \\n\\nAnd the costs and the threats to America and the world keep rising. \\n\\nThat’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. \\n\\nThe United States is a member along with 29 other nations. \\n\\nIt matters. American diplomacy matters. American resolve matters.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='Putin’s latest attack on Ukraine was premeditated and unprovoked. \\n\\nHe rejected repeated efforts at diplomacy. \\n\\nHe thought the West and NATO wouldn’t respond. And he thought he could divide us at home. Putin was wrong. We were ready. Here is what we did. \\n\\nWe prepared extensively and carefully. \\n\\nWe spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin. \\n\\nI spent countless hours unifying our European allies. We shared with the world in advance what we knew Putin was planning and precisely how he would try to falsely justify his aggression. \\n\\nWe countered Russia’s lies with truth. \\n\\nAnd now that he has acted the free world is holding him accountable. \\n\\nAlong with twenty-seven members of the European Union including France, Germany, Italy, as well as countries like the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and many others, even Switzerland.', metadata={'source': '../../how_to/state_of_the_union.txt'})]"
|
||||
"[Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.'),\n",
|
||||
" Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \\n\\nIn this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \\n\\nLet each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \\n\\nPlease rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \\n\\nThroughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos. \\n\\nThey keep moving. \\n\\nAnd the costs and the threats to America and the world keep rising. \\n\\nThat’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. \\n\\nThe United States is a member along with 29 other nations. \\n\\nIt matters. American diplomacy matters. American resolve matters.'),\n",
|
||||
" Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='Putin’s latest attack on Ukraine was premeditated and unprovoked. \\n\\nHe rejected repeated efforts at diplomacy. \\n\\nHe thought the West and NATO wouldn’t respond. And he thought he could divide us at home. Putin was wrong. We were ready. Here is what we did. \\n\\nWe prepared extensively and carefully. \\n\\nWe spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin. \\n\\nI spent countless hours unifying our European allies. We shared with the world in advance what we knew Putin was planning and precisely how he would try to falsely justify his aggression. \\n\\nWe countered Russia’s lies with truth. \\n\\nAnd now that he has acted the free world is holding him accountable. \\n\\nAlong with twenty-seven members of the European Union including France, Germany, Italy, as well as countries like the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and many others, even Switzerland.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -126,20 +126,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['82b3781b-817c-4a4d-8f8b-cbd07c1d005a',\n",
|
||||
" 'a20e0a49-29d8-465e-8eae-0bc5ac3d24dc',\n",
|
||||
" 'c19f4108-b652-4890-873e-d4cad00f1b1a',\n",
|
||||
" '23d1fcf9-6ee1-4638-8c70-0f5030762301',\n",
|
||||
" '2d775784-825d-4627-97a3-fee4539d8f58']"
|
||||
"['247aa3ae-9be9-43e2-98e4-48f94f920749',\n",
|
||||
" 'c4dfc886-0a2d-497c-b2b7-d923a5cb3832',\n",
|
||||
" '0350761d-ca68-414e-b8db-7eca78cb0d18',\n",
|
||||
" '902fe5eb-8543-486a-bd5f-79858a7a8af1',\n",
|
||||
" '28875612-c672-4de4-b40a-3b658c72036a']"
|
||||
]
|
||||
},
|
||||
"execution_count": 35,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -154,33 +154,116 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"store"
|
||||
"## Querying\n",
|
||||
"\n",
|
||||
"The database can be queried using a vector or a text prompt.\n",
|
||||
"If a text prompt is used, it's first converted into embedding and then queried.\n",
|
||||
"\n",
|
||||
"The `k` parameter specifies how many results to return from the query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['fe1f7a7b-42e2-4828-88b0-5b449c49fe86',\n",
|
||||
" '154a0021-a99c-427e-befb-f0b2b18ed83c',\n",
|
||||
" 'a8218226-18a9-4ab5-ade5-5a71b19a7831',\n",
|
||||
" '62b7ef97-83bf-4b6d-8c93-f471796244dc',\n",
|
||||
" 'ab43fd2e-13df-46d4-8cf7-e6e16506e4bb',\n",
|
||||
" '6841e7f9-adaa-41d9-af3d-0813ee52443f',\n",
|
||||
" '45dda5a1-f0c1-4ac7-9acb-50253e4ee493']"
|
||||
"[Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='If you travel 20 miles east of Columbus, Ohio, you’ll find 1,000 empty acres of land. \\n\\nIt won’t look like much, but if you stop and look closely, you’ll see a “Field of dreams,” the ground on which America’s future will be built. \\n\\nThis is where Intel, the American company that helped build Silicon Valley, is going to build its $20 billion semiconductor “mega site”. \\n\\nUp to eight state-of-the-art factories in one place. 10,000 new good-paying jobs. \\n\\nSome of the most sophisticated manufacturing in the world to make computer chips the size of a fingertip that power the world and our everyday lives. \\n\\nSmartphones. The Internet. Technology we have yet to invent. \\n\\nBut that’s just the beginning. \\n\\nIntel’s CEO, Pat Gelsinger, who is here tonight, told me they are ready to increase their investment from \\n$20 billion to $100 billion. \\n\\nThat would be one of the biggest investments in manufacturing in American history. \\n\\nAnd all they’re waiting for is for you to pass this bill.'),\n",
|
||||
" Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='So let’s not wait any longer. Send it to my desk. I’ll sign it. \\n\\nAnd we will really take off. \\n\\nAnd Intel is not alone. \\n\\nThere’s something happening in America. \\n\\nJust look around and you’ll see an amazing story. \\n\\nThe rebirth of the pride that comes from stamping products “Made In America.” The revitalization of American manufacturing. \\n\\nCompanies are choosing to build new factories here, when just a few years ago, they would have built them overseas. \\n\\nThat’s what is happening. Ford is investing $11 billion to build electric vehicles, creating 11,000 jobs across the country. \\n\\nGM is making the largest investment in its history—$7 billion to build electric vehicles, creating 4,000 jobs in Michigan. \\n\\nAll told, we created 369,000 new manufacturing jobs in America just last year. \\n\\nPowered by people I’ve met like JoJo Burgess, from generations of union steelworkers from Pittsburgh, who’s here with us tonight.'),\n",
|
||||
" Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='When we use taxpayer dollars to rebuild America – we are going to Buy American: buy American products to support American jobs. \\n\\nThe federal government spends about $600 Billion a year to keep the country safe and secure. \\n\\nThere’s been a law on the books for almost a century \\nto make sure taxpayers’ dollars support American jobs and businesses. \\n\\nEvery Administration says they’ll do it, but we are actually doing it. \\n\\nWe will buy American to make sure everything from the deck of an aircraft carrier to the steel on highway guardrails are made in America. \\n\\nBut to compete for the best jobs of the future, we also need to level the playing field with China and other competitors. \\n\\nThat’s why it is so important to pass the Bipartisan Innovation Act sitting in Congress that will make record investments in emerging technologies and American manufacturing. \\n\\nLet me give you one example of why it’s so important to pass it.'),\n",
|
||||
" Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='Last month, I announced our plan to supercharge \\nthe Cancer Moonshot that President Obama asked me to lead six years ago. \\n\\nOur goal is to cut the cancer death rate by at least 50% over the next 25 years, turn more cancers from death sentences into treatable diseases. \\n\\nMore support for patients and families. \\n\\nTo get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health. \\n\\nIt’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much more. \\n\\nARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and more. \\n\\nA unity agenda for the nation. \\n\\nWe can do this. \\n\\nMy fellow Americans—tonight , we have gathered in a sacred space—the citadel of our democracy. \\n\\nIn this Capitol, generation after generation, Americans have debated great questions amid great strife, and have done great things. \\n\\nWe have fought for freedom, expanded liberty, defeated totalitarianism and terror.'),\n",
|
||||
" Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='And based on the projections, more of the country will reach that point across the next couple of weeks. \\n\\nThanks to the progress we have made this past year, COVID-19 need no longer control our lives. \\n\\nI know some are talking about “living with COVID-19”. Tonight – I say that we will never just accept living with COVID-19. \\n\\nWe will continue to combat the virus as we do other diseases. And because this is a virus that mutates and spreads, we will stay on guard. \\n\\nHere are four common sense steps as we move forward safely. \\n\\nFirst, stay protected with vaccines and treatments. We know how incredibly effective vaccines are. If you’re vaccinated and boosted you have the highest degree of protection. \\n\\nWe will never give up on vaccinating more Americans. Now, I know parents with kids under 5 are eager to see a vaccine authorized for their children. \\n\\nThe scientists are working hard to get that done and we’ll be ready with plenty of vaccines when they do.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"store.add_texts(\n",
|
||||
"result = store.similarity_search(\"technology\", k=5)\n",
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Querying with score\n",
|
||||
"\n",
|
||||
"The score of the query can be included for every result. \n",
|
||||
"\n",
|
||||
"> The score returned in the query requests is a normalized value between 0 and 1, where 1 indicates the highest similarity and 0 the lowest regardless of the similarity function used. For more information look at the [docs](https://upstash.com/docs/vector/overall/features#vector-similarity-functions)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.8968438\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.8895128\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.88626665\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.88538057\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.88432854\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = store.similarity_search_with_score(\"technology\", k=5)\n",
|
||||
"\n",
|
||||
"for doc, score in result:\n",
|
||||
" print(f\"{doc.metadata} - {score}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Namespaces\n",
|
||||
"\n",
|
||||
"Namespaces can be used to separate different types of documents. This can increase the efficiency of the queries since the search space is reduced. When no namespace is provided, the default namespace is used."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"store_books = UpstashVectorStore(embedding=embeddings, namespace=\"books\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['928a5f12-900f-40b7-9406-3861741cc9d6',\n",
|
||||
" '4908670e-0b9c-455b-96b8-e0f83bc59204',\n",
|
||||
" '7083ff98-d900-4435-a67c-d9690fc555ba',\n",
|
||||
" 'b910f9b1-2be0-4e0a-8b6c-93ba9b367df5',\n",
|
||||
" '7c40e950-4d2b-4293-9fb8-623a49e72607',\n",
|
||||
" '25a70e79-4905-42af-8b08-09f13bd48512',\n",
|
||||
" '695e2bcf-23d9-44d4-af26-a7b554c0c375']"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"store_books.add_texts(\n",
|
||||
" [\n",
|
||||
" \"A timeless tale set in the Jazz Age, this novel delves into the lives of affluent socialites, their pursuits of wealth, love, and the elusive American Dream. Amidst extravagant parties and glittering opulence, the story unravels the complexities of desire, ambition, and the consequences of obsession.\",\n",
|
||||
" \"Set in a small Southern town during the 1930s, this novel explores themes of racial injustice, moral growth, and empathy through the eyes of a young girl. It follows her father, a principled lawyer, as he defends a black man accused of assaulting a white woman, confronting deep-seated prejudices and challenging societal norms along the way.\",\n",
|
||||
@@ -202,63 +285,26 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Querying\n",
|
||||
"\n",
|
||||
"The database can be queried using a vector or a text prompt.\n",
|
||||
"If a text prompt is used, it's first converted into embedding and then queried.\n",
|
||||
"\n",
|
||||
"The `k` parameter specifies how many results to return from the query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='And my report is this: the State of the Union is strong—because you, the American people, are strong. \\n\\nWe are stronger today than we were a year ago. \\n\\nAnd we will be stronger a year from now than we are today. \\n\\nNow is our moment to meet and overcome the challenges of our time. \\n\\nAnd we will, as one people. \\n\\nOne America. \\n\\nThe United States of America. \\n\\nMay God bless you all. May God protect our troops.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='And built the strongest, freest, and most prosperous nation the world has ever known. \\n\\nNow is the hour. \\n\\nOur moment of responsibility. \\n\\nOur test of resolve and conscience, of history itself. \\n\\nIt is in this moment that our character is formed. Our purpose is found. Our future is forged. \\n\\nWell I know this nation. \\n\\nWe will meet the test. \\n\\nTo protect freedom and liberty, to expand fairness and opportunity. \\n\\nWe will save democracy. \\n\\nAs hard as these times have been, I am more optimistic about America today than I have been my whole life. \\n\\nBecause I see the future that is within our grasp. \\n\\nBecause I know there is simply nothing beyond our capacity. \\n\\nWe are the only nation on Earth that has always turned every crisis we have faced into an opportunity. \\n\\nThe only nation that can be defined by a single word: possibilities. \\n\\nSo on this night, in our 245th year as a nation, I have come to report on the State of the Union.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \\n\\nIn this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \\n\\nLet each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \\n\\nPlease rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \\n\\nThroughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos. \\n\\nThey keep moving. \\n\\nAnd the costs and the threats to America and the world keep rising. \\n\\nThat’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. \\n\\nThe United States is a member along with 29 other nations. \\n\\nIt matters. American diplomacy matters. American resolve matters.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='When we use taxpayer dollars to rebuild America – we are going to Buy American: buy American products to support American jobs. \\n\\nThe federal government spends about $600 Billion a year to keep the country safe and secure. \\n\\nThere’s been a law on the books for almost a century \\nto make sure taxpayers’ dollars support American jobs and businesses. \\n\\nEvery Administration says they’ll do it, but we are actually doing it. \\n\\nWe will buy American to make sure everything from the deck of an aircraft carrier to the steel on highway guardrails are made in America. \\n\\nBut to compete for the best jobs of the future, we also need to level the playing field with China and other competitors. \\n\\nThat’s why it is so important to pass the Bipartisan Innovation Act sitting in Congress that will make record investments in emerging technologies and American manufacturing. \\n\\nLet me give you one example of why it’s so important to pass it.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.', metadata={'source': '../../how_to/state_of_the_union.txt'})]"
|
||||
"[Document(metadata={'title': '1984', 'author': 'George Orwell', 'year': 1949}, page_content='A chilling portrayal of a totalitarian regime, this dystopian novel offers a bleak vision of a future world dominated by surveillance, propaganda, and thought control. Through the eyes of a disillusioned protagonist, it explores the dangers of totalitarianism and the erosion of individual freedom in a society ruled by fear and oppression.'),\n",
|
||||
" Document(metadata={'title': 'The Road', 'author': 'Cormac McCarthy', 'year': 2006}, page_content='Set in a future world devastated by environmental collapse, this novel follows a group of survivors as they struggle to survive in a harsh, unforgiving landscape. Amidst scarcity and desperation, they must confront moral dilemmas and question the nature of humanity itself.'),\n",
|
||||
" Document(metadata={'title': 'Brave New World', 'author': 'Aldous Huxley', 'year': 1932}, page_content='In a society where emotion is suppressed and individuality is forbidden, one man dares to defy the oppressive regime. Through acts of rebellion and forbidden love, he discovers the power of human connection and the importance of free will.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 29,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = store.similarity_search(\"The United States of America\", k=5)\n",
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='A chilling portrayal of a totalitarian regime, this dystopian novel offers a bleak vision of a future world dominated by surveillance, propaganda, and thought control. Through the eyes of a disillusioned protagonist, it explores the dangers of totalitarianism and the erosion of individual freedom in a society ruled by fear and oppression.', metadata={'title': '1984', 'author': 'George Orwell', 'year': 1949}),\n",
|
||||
" Document(page_content='Narrated by a disillusioned teenager, this novel follows his journey of self-discovery and rebellion against the phoniness of the adult world. Through a series of encounters and reflections, it explores themes of alienation, identity, and the search for authenticity in a society marked by conformity and hypocrisy.', metadata={'title': 'The Catcher in the Rye', 'author': 'J.D. Salinger', 'year': 1951}),\n",
|
||||
" Document(page_content='Set in the English countryside during the early 19th century, this novel follows the lives of the Bennet sisters as they navigate the intricate social hierarchy of their time. Focusing on themes of marriage, class, and societal expectations, the story offers a witty and insightful commentary on the complexities of romantic relationships and the pursuit of happiness.', metadata={'title': 'Pride and Prejudice', 'author': 'Jane Austen', 'year': 1813})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = store.similarity_search(\"dystopia\", k=3, filter=\"year < 2000\")\n",
|
||||
"result = store_books.similarity_search(\"dystopia\", k=3)\n",
|
||||
"result"
|
||||
]
|
||||
},
|
||||
@@ -266,35 +312,63 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Querying with score\n",
|
||||
"## Metadata Filtering\n",
|
||||
"\n",
|
||||
"The score of the query can be included for every result. \n",
|
||||
"\n",
|
||||
"> The score returned in the query requests is a normalized value between 0 and 1, where 1 indicates the highest similarity and 0 the lowest regardless of the similarity function used. For more information look at the [docs](https://upstash.com/docs/vector/overall/features#vector-similarity-functions)."
|
||||
"Metadata can be used to filter the results of a query. You can refer to the [docs](https://upstash.com/docs/vector/features/filtering) to see more complex ways of filtering."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.87391514\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.8549463\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.847913\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.84328896\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.832347\n"
|
||||
]
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'title': '1984', 'author': 'George Orwell', 'year': 1949}, page_content='A chilling portrayal of a totalitarian regime, this dystopian novel offers a bleak vision of a future world dominated by surveillance, propaganda, and thought control. Through the eyes of a disillusioned protagonist, it explores the dangers of totalitarianism and the erosion of individual freedom in a society ruled by fear and oppression.'),\n",
|
||||
" Document(metadata={'title': 'Brave New World', 'author': 'Aldous Huxley', 'year': 1932}, page_content='In a society where emotion is suppressed and individuality is forbidden, one man dares to defy the oppressive regime. Through acts of rebellion and forbidden love, he discovers the power of human connection and the importance of free will.'),\n",
|
||||
" Document(metadata={'title': 'The Catcher in the Rye', 'author': 'J.D. Salinger', 'year': 1951}, page_content='Narrated by a disillusioned teenager, this novel follows his journey of self-discovery and rebellion against the phoniness of the adult world. Through a series of encounters and reflections, it explores themes of alienation, identity, and the search for authenticity in a society marked by conformity and hypocrisy.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = store.similarity_search_with_score(\"The United States of America\", k=5)\n",
|
||||
"result = store_books.similarity_search(\"dystopia\", k=3, filter=\"year < 2000\")\n",
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Getting info about vector database\n",
|
||||
"\n",
|
||||
"for doc, score in result:\n",
|
||||
" print(f\"{doc.metadata} - {score}\")"
|
||||
"You can get information about your database like the distance metric dimension using the info function.\n",
|
||||
"\n",
|
||||
"> When an insert happens, the database an indexing takes place. While this is happening new vectors can not be queried. `pendingVectorCount` represents the number of vector that are currently being indexed. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"InfoResult(vector_count=49, pending_vector_count=0, index_size=2978163, dimension=1536, similarity_function='COSINE', namespaces={'': NamespaceInfo(vector_count=42, pending_vector_count=0), 'books': NamespaceInfo(vector_count=7, pending_vector_count=0)})"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"store.info()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -308,7 +382,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -326,42 +400,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"store.delete(delete_all=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Getting info about vector database\n",
|
||||
"\n",
|
||||
"You can get information about your database like the distance metric dimension using the info function.\n",
|
||||
"\n",
|
||||
"> When an insert happens, the database an indexing takes place. While this is happening new vectors can not be queried. `pendingVectorCount` represents the number of vector that are currently being indexed. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"InfoResult(vector_count=42, pending_vector_count=0, index_size=6470, dimension=384, similarity_function='COSINE')"
|
||||
]
|
||||
},
|
||||
"execution_count": 36,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"store.info()"
|
||||
"store.delete(delete_all=True)\n",
|
||||
"store_books.delete(delete_all=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -381,7 +425,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
"version": "3.12.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -71,11 +71,11 @@
|
||||
"from langchain_anthropic import ChatAnthropic\n",
|
||||
"from langchain_community.tools.tavily_search import TavilySearchResults\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
|
||||
"from langgraph.checkpoint.memory import MemorySaver\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"# Create the agent\n",
|
||||
"memory = SqliteSaver.from_conn_string(\":memory:\")\n",
|
||||
"memory = MemorySaver()\n",
|
||||
"model = ChatAnthropic(model_name=\"claude-3-sonnet-20240229\")\n",
|
||||
"search = TavilySearchResults(max_results=2)\n",
|
||||
"tools = [search]\n",
|
||||
@@ -121,7 +121,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -U langchain-community langgraph langchain-anthropic tavily-python"
|
||||
"%pip install -U langchain-community langgraph langchain-anthropic tavily-python langgraph-checkpoint-sqlite"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -606,9 +606,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
|
||||
"from langgraph.checkpoint.memory import MemorySaver\n",
|
||||
"\n",
|
||||
"memory = SqliteSaver.from_conn_string(\":memory:\")"
|
||||
"memory = MemorySaver()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -857,9 +857,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
|
||||
"from langgraph.checkpoint.memory import MemorySaver\n",
|
||||
"\n",
|
||||
"memory = SqliteSaver.from_conn_string(\":memory:\")\n",
|
||||
"memory = MemorySaver()\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(llm, tools, checkpointer=memory)"
|
||||
]
|
||||
@@ -1012,20 +1012,15 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import bs4\n",
|
||||
"from langchain.agents import AgentExecutor, create_tool_calling_agent\n",
|
||||
"from langchain.tools.retriever import create_retriever_tool\n",
|
||||
"from langchain_chroma import Chroma\n",
|
||||
"from langchain_community.chat_message_histories import ChatMessageHistory\n",
|
||||
"from langchain_community.document_loaders import WebBaseLoader\n",
|
||||
"from langchain_core.chat_history import BaseChatMessageHistory\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
|
||||
"from langchain_core.runnables.history import RunnableWithMessageHistory\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
|
||||
"from langgraph.checkpoint.memory import MemorySaver\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"memory = SqliteSaver.from_conn_string(\":memory:\")\n",
|
||||
"memory = MemorySaver()\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
@@ -54,12 +54,9 @@
|
||||
"id": "00df631d-5121-4918-94aa-b88acce9b769",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"import { ColumnContainer, Column } from \"@theme/Columns\";\n",
|
||||
"## Legacy\n",
|
||||
"\n",
|
||||
"<ColumnContainer>\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### Legacy\n"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -111,12 +108,11 @@
|
||||
"id": "f8e36b0e-c7dc-4130-a51b-189d4b756c7f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</Column>\n",
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"## LCEL\n",
|
||||
"\n",
|
||||
"#### LCEL\n",
|
||||
"\n"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -174,10 +170,6 @@
|
||||
"id": "6b386ce6-895e-442c-88f3-7bec0ab9f401",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"</Column>\n",
|
||||
"</ColumnContainer>\n",
|
||||
"\n",
|
||||
"The above example uses the same `history` for all sessions. The example below shows how to use a different chat history for each session."
|
||||
]
|
||||
},
|
||||
@@ -230,6 +222,8 @@
|
||||
"id": "b2717810",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"See [this tutorial](/docs/tutorials/chatbot) for a more end-to-end guide on building with [`RunnableWithMessageHistory`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html).\n",
|
||||
|
||||
@@ -83,13 +83,9 @@
|
||||
"id": "8bc06416",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"import { ColumnContainer, Column } from \"@theme/Columns\";\n",
|
||||
"## Legacy\n",
|
||||
"\n",
|
||||
"<ColumnContainer>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### Legacy"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -165,12 +161,11 @@
|
||||
"id": "43a8a23c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</Column>\n",
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"## LCEL\n",
|
||||
"\n",
|
||||
"#### LCEL\n",
|
||||
"\n"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -253,9 +248,7 @@
|
||||
"id": "b2717810",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</Column>\n",
|
||||
"\n",
|
||||
"</ColumnContainer>\n",
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
@@ -263,6 +256,14 @@
|
||||
"\n",
|
||||
"Next, check out the [LCEL conceptual docs](/docs/concepts/#langchain-expression-language-lcel) for more background information."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7bfc38bd-0ff8-40ee-83a3-9d7553364fd7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -2,33 +2,48 @@
|
||||
sidebar_position: 1
|
||||
---
|
||||
|
||||
# How to migrate chains to LCEL
|
||||
# How to migrate from v0.0 chains
|
||||
|
||||
:::info Prerequisites
|
||||
|
||||
This guide assumes familiarity with the following concepts:
|
||||
- [LangChain Expression Language](/docs/concepts#langchain-expression-language-lcel)
|
||||
|
||||
- [LangGraph](https://langchain-ai.github.io/langgraph/)
|
||||
:::
|
||||
|
||||
LCEL is designed to streamline the process of building useful apps with LLMs and combining related components. It does this by providing:
|
||||
LangChain maintains a number of legacy abstractions. Many of these can be reimplemented via short combinations of LCEL and LangGraph primitives.
|
||||
|
||||
### LCEL
|
||||
[LCEL](/docs/concepts/#langchain-expression-language-lcel) is designed to streamline the process of building useful apps with LLMs and combining related components. It does this by providing:
|
||||
|
||||
1. **A unified interface**: Every LCEL object implements the `Runnable` interface, which defines a common set of invocation methods (`invoke`, `batch`, `stream`, `ainvoke`, ...). This makes it possible to also automatically and consistently support useful operations like streaming of intermediate steps and batching, since every chain composed of LCEL objects is itself an LCEL object.
|
||||
2. **Composition primitives**: LCEL provides a number of primitives that make it easy to compose chains, parallelize components, add fallbacks, dynamically configure chain internals, and more.
|
||||
|
||||
LangChain maintains a number of legacy abstractions. Many of these can be reimplemented via short combinations of LCEL primitives. Doing so confers some general advantages:
|
||||
### LangGraph
|
||||
[LangGraph](https://langchain-ai.github.io/langgraph/), built on top of LCEL, allows for performant orchestrations of application components while maintaining concise and readable code. It includes built-in persistence, support for cycles, and prioritizes controllability.
|
||||
If LCEL grows unwieldy for larger or more complex chains, they may benefit from a LangGraph implementation.
|
||||
|
||||
### Advantages
|
||||
Using these frameworks for existing v0.0 chains confers some advantages:
|
||||
|
||||
- The resulting chains typically implement the full `Runnable` interface, including streaming and asynchronous support where appropriate;
|
||||
- The chains may be more easily extended or modified;
|
||||
- The parameters of the chain are typically surfaced for easier customization (e.g., prompts) over previous versions, which tended to be subclasses and had opaque parameters and internals.
|
||||
- If using LangGraph, the chain supports built-in persistence, allowing for conversational experiences via a "memory" of the chat history.
|
||||
- If using LangGraph, the steps of the chain can be streamed, allowing for greater control and customizability.
|
||||
|
||||
The LCEL implementations can be slightly more verbose, but there are significant benefits in transparency and customizability.
|
||||
|
||||
The below pages assist with migration from various specific chains to LCEL:
|
||||
The below pages assist with migration from various specific chains to LCEL and LangGraph:
|
||||
|
||||
- [LLMChain](/docs/versions/migrating_chains/llm_chain)
|
||||
- [ConversationChain](/docs/versions/migrating_chains/conversation_chain)
|
||||
- [RetrievalQA](/docs/versions/migrating_chains/retrieval_qa)
|
||||
- [ConversationalRetrievalChain](/docs/versions/migrating_chains/conversation_retrieval_chain)
|
||||
- [StuffDocumentsChain](/docs/versions/migrating_chains/stuff_docs_chain)
|
||||
- [MapReduceDocumentsChain](/docs/versions/migrating_chains/map_reduce_chain)
|
||||
- [MapRerankDocumentsChain](/docs/versions/migrating_chains/map_rerank_docs_chain)
|
||||
- [RefineDocumentsChain](/docs/versions/migrating_chains/refine_docs_chain)
|
||||
- [LLMRouterChain](/docs/versions/migrating_chains/llm_router_chain)
|
||||
- [MultiPromptChain](/docs/versions/migrating_chains/multi_prompt_chain)
|
||||
|
||||
Check out the [LCEL conceptual docs](/docs/concepts/#langchain-expression-language-lcel) for more background information.
|
||||
Check out the [LCEL conceptual docs](/docs/concepts/#langchain-expression-language-lcel) and [LangGraph docs](https://langchain-ai.github.io/langgraph/) for more background information.
|
||||
@@ -52,13 +52,9 @@
|
||||
"id": "e3621b62-a037-42b8-8faa-59575608bb8b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"import { ColumnContainer, Column } from \"@theme/Columns\";\n",
|
||||
"## Legacy\n",
|
||||
"\n",
|
||||
"<ColumnContainer>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### Legacy\n"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -98,13 +94,11 @@
|
||||
"id": "cdc3b527-c09e-4c77-9711-c3cc4506cd95",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"</Column>\n",
|
||||
"## LCEL\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### LCEL\n",
|
||||
"\n"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -143,10 +137,6 @@
|
||||
"id": "3c0b0513-77b8-4371-a20e-3e487cec7e7f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"</Column>\n",
|
||||
"</ColumnContainer>\n",
|
||||
"\n",
|
||||
"Note that `LLMChain` by default returns a `dict` containing both the input and the output. If this behavior is desired, we can replicate it using another LCEL primitive, [`RunnablePassthrough`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html):"
|
||||
]
|
||||
},
|
||||
@@ -181,6 +171,8 @@
|
||||
"id": "b2717810",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"See [this tutorial](/docs/tutorials/llm_chain) for more detail on building with prompt templates, LLMs, and output parsers.\n",
|
||||
|
||||
283
docs/docs/versions/migrating_chains/llm_router_chain.ipynb
Normal file
283
docs/docs/versions/migrating_chains/llm_router_chain.ipynb
Normal file
@@ -0,0 +1,283 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "575befea-4d98-4941-8e55-1581b169a674",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"title: Migrating from LLMRouterChain\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "14625d35-efca-41cf-b203-be9f4c375700",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The [`LLMRouterChain`](https://api.python.langchain.com/en/latest/chains/langchain.chains.router.llm_router.LLMRouterChain.html) routed an input query to one of multiple destinations-- that is, given an input query, it used a LLM to select from a list of destination chains, and passed its inputs to the selected chain.\n",
|
||||
"\n",
|
||||
"`LLMRouterChain` does not support common [chat model](/docs/concepts/#chat-models) features, such as message roles and [tool calling](/docs/concepts/#functiontool-calling). Under the hood, `LLMRouterChain` routes a query by instructing the LLM to generate JSON-formatted text, and parsing out the intended destination.\n",
|
||||
"\n",
|
||||
"Consider an example from a [MultiPromptChain](/docs/versions/migrating_chains/multi_prompt_chain), which uses `LLMRouterChain`. Below is an (example) default prompt:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "364814a5-d15c-41bb-bf3f-581df51a4721",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Given a raw text input to a language model select the model prompt best suited for the input. You will be given the names of the available prompts and a description of what the prompt is best suited for. You may also revise the original input if you think that revising it will ultimately lead to a better response from the language model.\n",
|
||||
"\n",
|
||||
"<< FORMATTING >>\n",
|
||||
"Return a markdown code snippet with a JSON object formatted to look like:\n",
|
||||
"'''json\n",
|
||||
"{{\n",
|
||||
" \"destination\": string \\ name of the prompt to use or \"DEFAULT\"\n",
|
||||
" \"next_inputs\": string \\ a potentially modified version of the original input\n",
|
||||
"}}\n",
|
||||
"'''\n",
|
||||
"\n",
|
||||
"REMEMBER: \"destination\" MUST be one of the candidate prompt names specified below OR it can be \"DEFAULT\" if the input is not well suited for any of the candidate prompts.\n",
|
||||
"REMEMBER: \"next_inputs\" can just be the original input if you don't think any modifications are needed.\n",
|
||||
"\n",
|
||||
"<< CANDIDATE PROMPTS >>\n",
|
||||
"\n",
|
||||
"animals: prompt for animal expert\n",
|
||||
"vegetables: prompt for a vegetable expert\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"<< INPUT >>\n",
|
||||
"{input}\n",
|
||||
"\n",
|
||||
"<< OUTPUT (must include '''json at the start of the response) >>\n",
|
||||
"<< OUTPUT (must end with ''') >>\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains.router.multi_prompt import MULTI_PROMPT_ROUTER_TEMPLATE\n",
|
||||
"\n",
|
||||
"destinations = \"\"\"\n",
|
||||
"animals: prompt for animal expert\n",
|
||||
"vegetables: prompt for a vegetable expert\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(destinations=destinations)\n",
|
||||
"\n",
|
||||
"print(router_template.replace(\"`\", \"'\")) # for rendering purposes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "934937d1-fc0a-4d3f-b297-29f96e6a8f5e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Most of the behavior is determined via a single natural language prompt. Chat models that support [tool calling](/docs/how_to/tool_calling/) features confer a number of advantages for this task:\n",
|
||||
"\n",
|
||||
"- Supports chat prompt templates, including messages with `system` and other roles;\n",
|
||||
"- Tool-calling models are fine-tuned to generate structured output;\n",
|
||||
"- Support for runnable methods like streaming and async operations.\n",
|
||||
"\n",
|
||||
"Now let's look at `LLMRouterChain` side-by-side with an LCEL implementation that uses tool-calling. Note that for this guide we will `langchain-openai >= 0.1.20`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ed12b22b-5452-4776-aee3-b67d9f965082",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-core langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b0edbba1-a497-49ef-ade7-4fe7967360eb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5d4dc41c-3fdc-4093-ba5e-31a9ebb54e13",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Legacy\n",
|
||||
"\n",
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "c58c9269-5a1d-4234-88b5-7168944618bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser\n",
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-4o-mini\")\n",
|
||||
"\n",
|
||||
"router_prompt = PromptTemplate(\n",
|
||||
" # Note: here we use the prompt template from above. Generally this would need\n",
|
||||
" # to be customized.\n",
|
||||
" template=router_template,\n",
|
||||
" input_variables=[\"input\"],\n",
|
||||
" output_parser=RouterOutputParser(),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = LLMRouterChain.from_llm(llm, router_prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a22ebdca-5f53-459e-9cff-a97b2354ffe0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"vegetables\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = chain.invoke({\"input\": \"What color are carrots?\"})\n",
|
||||
"\n",
|
||||
"print(result[\"destination\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6fd48120-056f-4c58-a04f-da5198c23068",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## LCEL\n",
|
||||
"\n",
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "5bbebac2-df19-4f59-8a69-f61cd7286e59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from operator import itemgetter\n",
|
||||
"from typing import Literal\n",
|
||||
"\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"from typing_extensions import TypedDict\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-4o-mini\")\n",
|
||||
"\n",
|
||||
"route_system = \"Route the user's query to either the animal or vegetable expert.\"\n",
|
||||
"route_prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", route_system),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Define schema for output:\n",
|
||||
"class RouteQuery(TypedDict):\n",
|
||||
" \"\"\"Route query to destination expert.\"\"\"\n",
|
||||
"\n",
|
||||
" destination: Literal[\"animal\", \"vegetable\"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Instead of writing formatting instructions into the prompt, we\n",
|
||||
"# leverage .with_structured_output to coerce the output into a simple\n",
|
||||
"# schema.\n",
|
||||
"chain = route_prompt | llm.with_structured_output(RouteQuery)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "88012e10-8def-44fa-833f-989935824182",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"vegetable\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = chain.invoke({\"input\": \"What color are carrots?\"})\n",
|
||||
"\n",
|
||||
"print(result[\"destination\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "baf7ba9e-65b4-48af-8a39-453c01a7b7cb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"See [this tutorial](/docs/tutorials/llm_chain) for more detail on building with prompt templates, LLMs, and output parsers.\n",
|
||||
"\n",
|
||||
"Check out the [LCEL conceptual docs](/docs/concepts/#langchain-expression-language-lcel) for more background information."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "353e4bab-3b8a-4e89-89e2-200a8d8eb8dd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
706
docs/docs/versions/migrating_chains/map_reduce_chain.ipynb
Normal file
706
docs/docs/versions/migrating_chains/map_reduce_chain.ipynb
Normal file
File diff suppressed because one or more lines are too long
341
docs/docs/versions/migrating_chains/map_rerank_docs_chain.ipynb
Normal file
341
docs/docs/versions/migrating_chains/map_rerank_docs_chain.ipynb
Normal file
File diff suppressed because one or more lines are too long
362
docs/docs/versions/migrating_chains/multi_prompt_chain.ipynb
Normal file
362
docs/docs/versions/migrating_chains/multi_prompt_chain.ipynb
Normal file
File diff suppressed because one or more lines are too long
452
docs/docs/versions/migrating_chains/refine_docs_chain.ipynb
Normal file
452
docs/docs/versions/migrating_chains/refine_docs_chain.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -82,13 +82,9 @@
|
||||
"id": "c7e16438",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"import { ColumnContainer, Column } from \"@theme/Columns\";\n",
|
||||
"## Legacy\n",
|
||||
"\n",
|
||||
"<ColumnContainer>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### Legacy"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -128,12 +124,11 @@
|
||||
"id": "081948e5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</Column>\n",
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"## LCEL\n",
|
||||
"\n",
|
||||
"#### LCEL\n",
|
||||
"\n"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -184,9 +179,6 @@
|
||||
"id": "d6f44fe8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</Column>\n",
|
||||
"</ColumnContainer>\n",
|
||||
"\n",
|
||||
"The LCEL implementation exposes the internals of what's happening around retrieving, formatting documents, and passing them through a prompt to the LLM, but it is more verbose. You can customize and wrap this composition logic in a helper function, or use the higher-level [`create_retrieval_chain`](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval.create_retrieval_chain.html) and [`create_stuff_documents_chain`](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) helper method:"
|
||||
]
|
||||
},
|
||||
@@ -231,6 +223,8 @@
|
||||
"id": "b2717810",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"Check out the [LCEL conceptual docs](/docs/concepts/#langchain-expression-language-lcel) for more background information."
|
||||
|
||||
281
docs/docs/versions/migrating_chains/stuff_docs_chain.ipynb
Normal file
281
docs/docs/versions/migrating_chains/stuff_docs_chain.ipynb
Normal file
@@ -0,0 +1,281 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ed78c53c-55ad-4ea2-9cc2-a39a1963c098",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"title: Migrating from StuffDocumentsChain\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"[StuffDocumentsChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.StuffDocumentsChain.html) combines documents by concatenating them into a single context window. It is a straightforward and effective strategy for combining documents for question-answering, summarization, and other purposes.\n",
|
||||
"\n",
|
||||
"[create_stuff_documents_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) is the recommended alternative. It functions the same as `StuffDocumentsChain`, with better support for streaming and batch functionality. Because it is a simple combination of [LCEL primitives](/docs/concepts/#langchain-expression-language-lcel), it is also easier to extend and incorporate into other LangChain applications.\n",
|
||||
"\n",
|
||||
"Below we will go through both `StuffDocumentsChain` and `create_stuff_documents_chain` on a simple example for illustrative purposes.\n",
|
||||
"\n",
|
||||
"Let's first load a chat model:\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs customVarName=\"llm\" />\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "dac0bef2-9453-46f2-a893-f7569b6a0170",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d4022d03-7b5e-4c81-98ff-5b82a2a4eaae",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example\n",
|
||||
"\n",
|
||||
"Let's go through an example where we analyze a set of documents. We first generate some simple documents for illustrative purposes:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "24fa0ba9-e245-47d1-bc2e-6286dd884117",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"documents = [\n",
|
||||
" Document(page_content=\"Apples are red\", metadata={\"title\": \"apple_book\"}),\n",
|
||||
" Document(page_content=\"Blueberries are blue\", metadata={\"title\": \"blueberry_book\"}),\n",
|
||||
" Document(page_content=\"Bananas are yelow\", metadata={\"title\": \"banana_book\"}),\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a769128-205f-417d-a25d-519e7cb03be7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Legacy\n",
|
||||
"\n",
|
||||
"<details open>\n",
|
||||
"\n",
|
||||
"Below we show an implementation with `StuffDocumentsChain`. We define the prompt template for a summarization task and instantiate a [LLMChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.llm.LLMChain.html) object for this purpose. We define how documents are formatted into the prompt and ensure consistency among the keys in the various prompts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "9734c0f3-64e7-4ae6-8578-df03b3dabb26",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import LLMChain, StuffDocumentsChain\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate, PromptTemplate\n",
|
||||
"\n",
|
||||
"# This controls how each document will be formatted. Specifically,\n",
|
||||
"# it will be passed to `format_document` - see that function for more\n",
|
||||
"# details.\n",
|
||||
"document_prompt = PromptTemplate(\n",
|
||||
" input_variables=[\"page_content\"], template=\"{page_content}\"\n",
|
||||
")\n",
|
||||
"document_variable_name = \"context\"\n",
|
||||
"# The prompt here should take as an input variable the\n",
|
||||
"# `document_variable_name`\n",
|
||||
"prompt = ChatPromptTemplate.from_template(\"Summarize this content: {context}\")\n",
|
||||
"\n",
|
||||
"llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
|
||||
"chain = StuffDocumentsChain(\n",
|
||||
" llm_chain=llm_chain,\n",
|
||||
" document_prompt=document_prompt,\n",
|
||||
" document_variable_name=document_variable_name,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0cb733bf-eb71-4fae-a8f4-d522924020cb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can now invoke our chain:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "d7d1ce10-bbee-4cb0-879d-7de4f69191c4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'This content describes the colors of different fruits: apples are red, blueberries are blue, and bananas are yellow.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = chain.invoke(documents)\n",
|
||||
"result[\"output_text\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "79b10d40-1521-433b-9026-6ec836ffeeb3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'input_documents': [Document(metadata={'title': 'apple_book'}, page_content='Apples are red'), Document(metadata={'title': 'blueberry_book'}, page_content='Blueberries are blue'), Document(metadata={'title': 'banana_book'}, page_content='Bananas are yelow')], 'output_text': 'This content describes the colors of different fruits: apples are red, blueberries are blue, and bananas are yellow.'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for chunk in chain.stream(documents):\n",
|
||||
" print(chunk)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b4cb6a5b-37ea-48cc-a096-b948d3ff7e9f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"### LCEL\n",
|
||||
"\n",
|
||||
"<details open>\n",
|
||||
"\n",
|
||||
"Below we show an implementation using `create_stuff_documents_chain`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "de38f27a-c648-44be-8c37-0a458c2920a9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.combine_documents import create_stuff_documents_chain\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(\"Summarize this content: {context}\")\n",
|
||||
"chain = create_stuff_documents_chain(llm, prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9d0e6996-9bf8-4097-9c1a-1c539eac3ed1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Invoking the chain, we obtain a similar result as before:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "f2d2bdfb-3a6a-464b-b4c2-e4252b2e53a0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'This content describes the colors of different fruits: apples are red, blueberries are blue, and bananas are yellow.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = chain.invoke({\"context\": documents})\n",
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "493e6270-c61d-46c5-91b3-0cf7740a88f9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note that this implementation supports streaming of output tokens:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "b5adcabd-9bc1-4c91-a12b-7be82d64e457",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" | This | content | describes | the | colors | of | different | fruits | : | apples | are | red | , | blue | berries | are | blue | , | and | bananas | are | yellow | . | | "
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for chunk in chain.stream({\"context\": documents}):\n",
|
||||
" print(chunk, end=\" | \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "181c5633-38ea-4692-a869-32f4f78398e4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"Check out the [LCEL conceptual docs](/docs/concepts/#langchain-expression-language-lcel) for more background information.\n",
|
||||
"\n",
|
||||
"See these [how-to guides](/docs/how_to/#qa-with-rag) for more on question-answering tasks with RAG.\n",
|
||||
"\n",
|
||||
"See [this tutorial](/docs/tutorials/summarization/) for more LLM-based summarization strategies."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -93,7 +93,7 @@ module.exports = {
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Migrating to LCEL",
|
||||
label: "Migrating from v0.0 chains",
|
||||
link: {type: 'doc', id: 'versions/migrating_chains/index'},
|
||||
collapsible: false,
|
||||
collapsed: false,
|
||||
|
||||
@@ -66,10 +66,6 @@
|
||||
"source": "/cookbook(/?)",
|
||||
"destination": "/v0.1/docs/cookbook/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/toolkits/document_comparison_toolkit(/?)",
|
||||
"destination": "/docs/tutorials/rag/"
|
||||
},
|
||||
{
|
||||
"source": "/v0.2/docs/how_to/migrate_chains(/?)",
|
||||
"destination": "/v0.2/docs/versions/migrating_chains"
|
||||
@@ -81,6 +77,14 @@
|
||||
{
|
||||
"source": "/v0.2/docs/integrations/toolkits/airbyte_structured_qa/",
|
||||
"destination": "/v0.2/docs/integrations/document_loaders/airbyte/"
|
||||
},
|
||||
{
|
||||
"source": "/v0.2/docs/integrations/toolkits/document_comparison_toolkit(/?)",
|
||||
"destination": "/v0.2/docs/tutorials/rag/"
|
||||
},
|
||||
{
|
||||
"source": "/v0.2/docs/integrations/toolkits/:path(.*/?)*",
|
||||
"destination": "/v0.2/docs/integrations/tools/:path*"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -9,3 +9,6 @@ langchain-nvidia-ai-endpoints
|
||||
langchain-elasticsearch
|
||||
urllib3==1.26.19
|
||||
nbconvert==7.16.4
|
||||
|
||||
# temp fix, uv fails to install 3.10.7
|
||||
orjson<=3.10.6
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
"\n",
|
||||
"- TODO: Make sure API reference link is correct.\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with __ModuleName__ [document loader](/docs/integrations/document_loaders/). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.__module_name___loader.__ModuleName__Loader.html).\n",
|
||||
"This notebook provides a quick overview for getting started with __ModuleName__ [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.__module_name___loader.__ModuleName__Loader.html).\n",
|
||||
"\n",
|
||||
"- TODO: Add any other relevant links, like information about underlying API, etc.\n",
|
||||
"\n",
|
||||
@@ -32,7 +32,7 @@
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [__ModuleName__Loader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.__module_name__loader.__ModuleName__Loader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅/❌ | beta/❌ | ✅/❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Async Support\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| __ModuleName__Loader | ✅/❌ | ✅/❌ | \n",
|
||||
"\n",
|
||||
@@ -65,7 +65,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -102,7 +102,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:\n",
|
||||
"\n",
|
||||
@@ -193,11 +193,6 @@
|
||||
"\n",
|
||||
"For detailed documentation of all __ModuleName__Loader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.__module_name___loader.__ModuleName__Loader.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -24,13 +24,9 @@
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"- TODO: Fill in table features.\n",
|
||||
"- TODO: Remove JS support link if not relevant, otherwise ensure link is correct.\n",
|
||||
"- TODO: Make sure API reference links are correct.\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/text_embedding/__package_name_short_snake__) | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [__ModuleName__Embeddings](https://api.python.langchain.com/en/latest/embeddings/__module_name__.embeddings.__ModuleName__Embeddings.html) | [__package_name__](https://api.python.langchain.com/en/latest/__package_name_short_snake___api_reference.html) | ✅/❌ | beta/❌ | ✅/❌ |  |  |\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"__ModuleName__\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
@@ -157,10 +153,10 @@
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_document = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_document.page_content"
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -210,7 +206,7 @@
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_queries([text, text2])\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
@@ -220,34 +216,10 @@
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Async Usage\n",
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"You can also use `aembed_query` and `aembed_documents` for producing embeddings asynchronously:\n"
|
||||
"For detailed documentation on `__ModuleName__Embeddings` features and configuration options, please refer to the [API reference](https://api.python.langchain.com/en/latest/embeddings/__module_name__.embeddings.__ModuleName__Embeddings.html).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4c3bef91",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"async def async_example():\n",
|
||||
" single_vector = await embeddings.embed_query(text)\n",
|
||||
" print(str(single_vector)[:100]) # Show the first 100 characters of the vector\n",
|
||||
"\n",
|
||||
"asyncio.run(async_example())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f1bd4396",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -44,7 +44,7 @@ class __ModuleName__Retriever(BaseRetriever):
|
||||
|
||||
retriever.invoke(query)
|
||||
|
||||
.. code-block:: python
|
||||
.. code-block:: none
|
||||
|
||||
# TODO: Example output.
|
||||
|
||||
@@ -67,7 +67,7 @@ class __ModuleName__Retriever(BaseRetriever):
|
||||
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
|
||||
|
||||
def format_docs(docs):
|
||||
return "\n\n".join(doc.page_content for doc in docs)
|
||||
return "\\n\\n".join(doc.page_content for doc in docs)
|
||||
|
||||
chain = (
|
||||
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
||||
@@ -78,7 +78,7 @@ class __ModuleName__Retriever(BaseRetriever):
|
||||
|
||||
chain.invoke("...")
|
||||
|
||||
.. code-block:: python
|
||||
.. code-block:: none
|
||||
|
||||
# TODO: Example output.
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ class __ModuleName__Toolkit(BaseToolKit):
|
||||
|
||||
toolkit.get_tools()
|
||||
|
||||
.. code-block:: python
|
||||
.. code-block:: none
|
||||
|
||||
# TODO: Example output.
|
||||
|
||||
@@ -61,7 +61,7 @@ class __ModuleName__Toolkit(BaseToolKit):
|
||||
for event in events:
|
||||
event["messages"][-1].pretty_print()
|
||||
|
||||
.. code-block:: python
|
||||
.. code-block:: none
|
||||
|
||||
# TODO: Example output.
|
||||
|
||||
|
||||
@@ -164,9 +164,105 @@ class GitHubToolkit(BaseToolkit):
|
||||
|
||||
See [Security](https://python.langchain.com/docs/security) for more information.
|
||||
|
||||
Setup:
|
||||
See detailed installation instructions here:
|
||||
https://python.langchain.com/v0.2/docs/integrations/tools/github/#installation
|
||||
|
||||
You will need to install ``pygithub`` and set the following environment
|
||||
variables:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U pygithub
|
||||
export GITHUB_APP_ID="your-app-id"
|
||||
export GITHUB_APP_PRIVATE_KEY="path-to-private-key"
|
||||
export GITHUB_REPOSITORY="your-github-repository"
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.agent_toolkits.github.toolkit import GitHubToolkit
|
||||
from langchain_community.utilities.github import GitHubAPIWrapper
|
||||
|
||||
github = GitHubAPIWrapper()
|
||||
toolkit = GitHubToolkit.from_github_api_wrapper(github)
|
||||
|
||||
Tools:
|
||||
.. code-block:: python
|
||||
|
||||
tools = toolkit.get_tools()
|
||||
for tool in tools:
|
||||
print(tool.name)
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
Get Issues
|
||||
Get Issue
|
||||
Comment on Issue
|
||||
List open pull requests (PRs)
|
||||
Get Pull Request
|
||||
Overview of files included in PR
|
||||
Create Pull Request
|
||||
List Pull Requests' Files
|
||||
Create File
|
||||
Read File
|
||||
Update File
|
||||
Delete File
|
||||
Overview of existing files in Main branch
|
||||
Overview of files in current working branch
|
||||
List branches in this repository
|
||||
Set active branch
|
||||
Create a new branch
|
||||
Get files from a directory
|
||||
Search issues and pull requests
|
||||
Search code
|
||||
Create review request
|
||||
|
||||
Use within an agent:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langgraph.prebuilt import create_react_agent
|
||||
|
||||
# Select example tool
|
||||
tools = [tool for tool in toolkit.get_tools() if tool.name == "Get Issue"]
|
||||
assert len(tools) == 1
|
||||
tools[0].name = "get_issue"
|
||||
|
||||
llm = ChatOpenAI(model="gpt-4o-mini")
|
||||
agent_executor = create_react_agent(llm, tools)
|
||||
|
||||
example_query = "What is the title of issue 24888?"
|
||||
|
||||
events = agent_executor.stream(
|
||||
{"messages": [("user", example_query)]},
|
||||
stream_mode="values",
|
||||
)
|
||||
for event in events:
|
||||
event["messages"][-1].pretty_print()
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
================================[1m Human Message [0m=================================
|
||||
|
||||
What is the title of issue 24888?
|
||||
==================================[1m Ai Message [0m==================================
|
||||
Tool Calls:
|
||||
get_issue (call_iSYJVaM7uchfNHOMJoVPQsOi)
|
||||
Call ID: call_iSYJVaM7uchfNHOMJoVPQsOi
|
||||
Args:
|
||||
issue_number: 24888
|
||||
=================================[1m Tool Message [0m=================================
|
||||
Name: get_issue
|
||||
|
||||
{"number": 24888, "title": "Standardize KV-Store Docs", "body": "..."
|
||||
==================================[1m Ai Message [0m==================================
|
||||
|
||||
The title of issue 24888 is "Standardize KV-Store Docs".
|
||||
|
||||
Parameters:
|
||||
tools: List[BaseTool]. The tools in the toolkit. Default is an empty list.
|
||||
"""
|
||||
""" # noqa: E501
|
||||
|
||||
tools: List[BaseTool] = []
|
||||
|
||||
|
||||
@@ -39,9 +39,81 @@ class GmailToolkit(BaseToolkit):
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
|
||||
Setup:
|
||||
You will need a Google credentials.json file to use this toolkit.
|
||||
See instructions here: https://python.langchain.com/v0.2/docs/integrations/tools/gmail/#setup
|
||||
|
||||
Key init args:
|
||||
api_resource: Optional. The Google API resource. Default is None.
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_google_community import GmailToolkit
|
||||
|
||||
toolkit = GmailToolkit()
|
||||
|
||||
Tools:
|
||||
.. code-block:: python
|
||||
|
||||
toolkit.get_tools()
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
[GmailCreateDraft(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),
|
||||
GmailSendMessage(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),
|
||||
GmailSearch(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),
|
||||
GmailGetMessage(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),
|
||||
GmailGetThread(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>)]
|
||||
|
||||
Use within an agent:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langgraph.prebuilt import create_react_agent
|
||||
|
||||
llm = ChatOpenAI(model="gpt-4o-mini")
|
||||
|
||||
agent_executor = create_react_agent(llm, tools)
|
||||
|
||||
example_query = "Draft an email to fake@fake.com thanking them for coffee."
|
||||
|
||||
events = agent_executor.stream(
|
||||
{"messages": [("user", example_query)]},
|
||||
stream_mode="values",
|
||||
)
|
||||
for event in events:
|
||||
event["messages"][-1].pretty_print()
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
================================[1m Human Message [0m=================================
|
||||
|
||||
Draft an email to fake@fake.com thanking them for coffee.
|
||||
==================================[1m Ai Message [0m==================================
|
||||
Tool Calls:
|
||||
create_gmail_draft (call_slGkYKZKA6h3Mf1CraUBzs6M)
|
||||
Call ID: call_slGkYKZKA6h3Mf1CraUBzs6M
|
||||
Args:
|
||||
message: Dear Fake,
|
||||
|
||||
I wanted to take a moment to thank you for the coffee yesterday. It was a pleasure catching up with you. Let's do it again soon!
|
||||
|
||||
Best regards,
|
||||
[Your Name]
|
||||
to: ['fake@fake.com']
|
||||
subject: Thank You for the Coffee
|
||||
=================================[1m Tool Message [0m=================================
|
||||
Name: create_gmail_draft
|
||||
|
||||
Draft created. Draft Id: r-7233782721440261513
|
||||
==================================[1m Ai Message [0m==================================
|
||||
|
||||
I have drafted an email to fake@fake.com thanking them for the coffee. You can review and send it from your email draft with the subject "Thank You for the Coffee".
|
||||
|
||||
Parameters:
|
||||
api_resource: Optional. The Google API resource. Default is None.
|
||||
"""
|
||||
""" # noqa: E501
|
||||
|
||||
api_resource: Resource = Field(default_factory=build_resource_service)
|
||||
|
||||
|
||||
@@ -38,7 +38,125 @@ class RequestsToolkit(BaseToolkit):
|
||||
what network access it has.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
Setup:
|
||||
Install ``langchain-community``.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U langchain-community
|
||||
|
||||
Key init args:
|
||||
requests_wrapper: langchain_community.utilities.requests.GenericRequestsWrapper
|
||||
wrapper for executing requests.
|
||||
allow_dangerous_requests: bool
|
||||
Defaults to False. Must "opt-in" to using dangerous requests by setting to True.
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.agent_toolkits.openapi.toolkit import RequestsToolkit
|
||||
from langchain_community.utilities.requests import TextRequestsWrapper
|
||||
|
||||
toolkit = RequestsToolkit(
|
||||
requests_wrapper=TextRequestsWrapper(headers={}),
|
||||
allow_dangerous_requests=ALLOW_DANGEROUS_REQUEST,
|
||||
)
|
||||
|
||||
Tools:
|
||||
.. code-block:: python
|
||||
|
||||
tools = toolkit.get_tools()
|
||||
tools
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
[RequestsGetTool(requests_wrapper=TextRequestsWrapper(headers={}, aiosession=None, auth=None, response_content_type='text', verify=True), allow_dangerous_requests=True),
|
||||
RequestsPostTool(requests_wrapper=TextRequestsWrapper(headers={}, aiosession=None, auth=None, response_content_type='text', verify=True), allow_dangerous_requests=True),
|
||||
RequestsPatchTool(requests_wrapper=TextRequestsWrapper(headers={}, aiosession=None, auth=None, response_content_type='text', verify=True), allow_dangerous_requests=True),
|
||||
RequestsPutTool(requests_wrapper=TextRequestsWrapper(headers={}, aiosession=None, auth=None, response_content_type='text', verify=True), allow_dangerous_requests=True),
|
||||
RequestsDeleteTool(requests_wrapper=TextRequestsWrapper(headers={}, aiosession=None, auth=None, response_content_type='text', verify=True), allow_dangerous_requests=True)]
|
||||
|
||||
Use within an agent:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langgraph.prebuilt import create_react_agent
|
||||
|
||||
|
||||
api_spec = \"\"\"
|
||||
openapi: 3.0.0
|
||||
info:
|
||||
title: JSONPlaceholder API
|
||||
version: 1.0.0
|
||||
servers:
|
||||
- url: https://jsonplaceholder.typicode.com
|
||||
paths:
|
||||
/posts:
|
||||
get:
|
||||
summary: Get posts
|
||||
parameters: &id001
|
||||
- name: _limit
|
||||
in: query
|
||||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
example: 2
|
||||
description: Limit the number of results
|
||||
\"\"\"
|
||||
|
||||
system_message = \"\"\"
|
||||
You have access to an API to help answer user queries.
|
||||
Here is documentation on the API:
|
||||
{api_spec}
|
||||
\"\"\".format(api_spec=api_spec)
|
||||
|
||||
llm = ChatOpenAI(model="gpt-4o-mini")
|
||||
agent_executor = create_react_agent(llm, tools, state_modifier=system_message)
|
||||
|
||||
example_query = "Fetch the top two posts. What are their titles?"
|
||||
|
||||
events = agent_executor.stream(
|
||||
{"messages": [("user", example_query)]},
|
||||
stream_mode="values",
|
||||
)
|
||||
for event in events:
|
||||
event["messages"][-1].pretty_print()
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
================================[1m Human Message [0m=================================
|
||||
|
||||
Fetch the top two posts. What are their titles?
|
||||
==================================[1m Ai Message [0m==================================
|
||||
Tool Calls:
|
||||
requests_get (call_RV2SOyzCnV5h2sm4WPgG8fND)
|
||||
Call ID: call_RV2SOyzCnV5h2sm4WPgG8fND
|
||||
Args:
|
||||
url: https://jsonplaceholder.typicode.com/posts?_limit=2
|
||||
=================================[1m Tool Message [0m=================================
|
||||
Name: requests_get
|
||||
|
||||
[
|
||||
{
|
||||
"userId": 1,
|
||||
"id": 1,
|
||||
"title": "sunt aut facere repellat provident occaecati excepturi optio reprehenderit",
|
||||
"body": "quia et suscipit..."
|
||||
},
|
||||
{
|
||||
"userId": 1,
|
||||
"id": 2,
|
||||
"title": "qui est esse",
|
||||
"body": "est rerum tempore vitae..."
|
||||
}
|
||||
]
|
||||
==================================[1m Ai Message [0m==================================
|
||||
|
||||
The titles of the top two posts are:
|
||||
1. "sunt aut facere repellat provident occaecati excepturi optio reprehenderit"
|
||||
2. "qui est esse"
|
||||
""" # noqa: E501
|
||||
|
||||
requests_wrapper: TextRequestsWrapper
|
||||
"""The requests wrapper."""
|
||||
|
||||
@@ -21,7 +21,73 @@ class SlackToolkit(BaseToolkit):
|
||||
|
||||
Parameters:
|
||||
client: The Slack client.
|
||||
"""
|
||||
|
||||
Setup:
|
||||
Install ``slack_sdk`` and set environment variable ``SLACK_USER_TOKEN``.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U slack_sdk
|
||||
export SLACK_USER_TOKEN="your-user-token"
|
||||
|
||||
Key init args:
|
||||
client: slack_sdk.WebClient
|
||||
The Slack client.
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.agent_toolkits import SlackToolkit
|
||||
|
||||
toolkit = SlackToolkit()
|
||||
|
||||
Tools:
|
||||
.. code-block:: python
|
||||
|
||||
tools = toolkit.get_tools()
|
||||
tools
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
[SlackGetChannel(client=<slack_sdk.web.client.WebClient object at 0x113caa8c0>),
|
||||
SlackGetMessage(client=<slack_sdk.web.client.WebClient object at 0x113caa4d0>),
|
||||
SlackScheduleMessage(client=<slack_sdk.web.client.WebClient object at 0x113caa440>),
|
||||
SlackSendMessage(client=<slack_sdk.web.client.WebClient object at 0x113caa410>)]
|
||||
|
||||
Use within an agent:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langgraph.prebuilt import create_react_agent
|
||||
|
||||
llm = ChatOpenAI(model="gpt-4o-mini")
|
||||
agent_executor = create_react_agent(llm, tools)
|
||||
|
||||
example_query = "When was the #general channel created?"
|
||||
|
||||
events = agent_executor.stream(
|
||||
{"messages": [("user", example_query)]},
|
||||
stream_mode="values",
|
||||
)
|
||||
for event in events:
|
||||
message = event["messages"][-1]
|
||||
if message.type != "tool": # mask sensitive information
|
||||
event["messages"][-1].pretty_print()
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
================================[1m Human Message [0m=================================
|
||||
|
||||
When was the #general channel created?
|
||||
==================================[1m Ai Message [0m==================================
|
||||
Tool Calls:
|
||||
get_channelid_name_dict (call_NXDkALjoOx97uF1v0CoZTqtJ)
|
||||
Call ID: call_NXDkALjoOx97uF1v0CoZTqtJ
|
||||
Args:
|
||||
==================================[1m Ai Message [0m==================================
|
||||
|
||||
The #general channel was created on timestamp 1671043305.
|
||||
""" # noqa: E501
|
||||
|
||||
client: WebClient = Field(default_factory=login)
|
||||
|
||||
|
||||
@@ -206,7 +206,7 @@ class ChatBaichuan(BaseChatModel):
|
||||
|
||||
Key init args — client params:
|
||||
api_key: Optional[str]
|
||||
MiniMax API key. If not passed in will be read from env var BAICHUAN_API_KEY.
|
||||
Baichuan API key. If not passed in will be read from env var BAICHUAN_API_KEY.
|
||||
base_url: Optional[str]
|
||||
Base URL for API requests.
|
||||
|
||||
|
||||
@@ -200,7 +200,7 @@ class QianfanChatEndpoint(BaseChatModel):
|
||||
("system", "你是一名专业的翻译家,可以将用户的中文翻译为英文。"),
|
||||
("human", "我喜欢编程。"),
|
||||
]
|
||||
qianfan_chat.invoke(message)
|
||||
qianfan_chat.invoke(messages)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@@ -219,6 +219,7 @@ class QianfanChatEndpoint(BaseChatModel):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
stream = chat.stream(messages)
|
||||
full = next(stream)
|
||||
for chunk in stream:
|
||||
full += chunk
|
||||
|
||||
@@ -167,7 +167,7 @@ class GPTRouter(BaseChatModel):
|
||||
"""Number of chat completions to generate for each prompt."""
|
||||
max_tokens: int = 256
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
values["gpt_router_api_base"] = get_from_dict_or_env(
|
||||
values,
|
||||
@@ -183,7 +183,10 @@ class GPTRouter(BaseChatModel):
|
||||
"GPT_ROUTER_API_KEY",
|
||||
)
|
||||
)
|
||||
return values
|
||||
|
||||
@root_validator(pre=True, skip_on_failure=True)
|
||||
def post_init(cls, values: Dict) -> Dict:
|
||||
try:
|
||||
from gpt_router.client import GPTRouterClient
|
||||
|
||||
|
||||
@@ -387,7 +387,7 @@ class MiniMaxChat(BaseChatModel):
|
||||
class Config:
|
||||
allow_population_by_field_name = True
|
||||
|
||||
@root_validator(pre=True, allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
values["minimax_api_key"] = convert_to_secret_str(
|
||||
|
||||
@@ -88,7 +88,7 @@ class ChatPerplexity(BaseChatModel):
|
||||
def lc_secrets(self) -> Dict[str, str]:
|
||||
return {"pplx_api_key": "PPLX_API_KEY"}
|
||||
|
||||
@root_validator(pre=True, allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Build extra kwargs from additional params that were passed in."""
|
||||
all_required_field_names = get_pydantic_field_names(cls)
|
||||
@@ -114,7 +114,7 @@ class ChatPerplexity(BaseChatModel):
|
||||
values["model_kwargs"] = extra
|
||||
return values
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
@root_validator(pre=False, skip_on_failure=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
values["pplx_api_key"] = get_from_dict_or_env(
|
||||
|
||||
@@ -126,9 +126,9 @@ class ChatSparkLLM(BaseChatModel):
|
||||
|
||||
from langchain_community.chat_models import ChatSparkLLM
|
||||
|
||||
chat = MiniMaxChat(
|
||||
api_key=api_key,
|
||||
api_secret=ak,
|
||||
chat = ChatSparkLLM(
|
||||
api_key="your-api-key",
|
||||
api_secret="your-api-secret",
|
||||
model='Spark4.0 Ultra',
|
||||
# temperature=...,
|
||||
# other params...
|
||||
|
||||
@@ -7,12 +7,25 @@ import logging
|
||||
import time
|
||||
from collections.abc import AsyncIterator, Iterator
|
||||
from contextlib import asynccontextmanager, contextmanager
|
||||
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
||||
from operator import itemgetter
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
List,
|
||||
Literal,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
Type,
|
||||
Union,
|
||||
)
|
||||
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManagerForLLMRun,
|
||||
CallbackManagerForLLMRun,
|
||||
)
|
||||
from langchain_core.language_models import LanguageModelInput
|
||||
from langchain_core.language_models.chat_models import (
|
||||
BaseChatModel,
|
||||
agenerate_from_stream,
|
||||
@@ -30,9 +43,17 @@ from langchain_core.messages import (
|
||||
SystemMessage,
|
||||
SystemMessageChunk,
|
||||
)
|
||||
from langchain_core.output_parsers.base import OutputParserLike
|
||||
from langchain_core.output_parsers.openai_tools import (
|
||||
JsonOutputKeyToolsParser,
|
||||
PydanticToolsParser,
|
||||
)
|
||||
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field, root_validator
|
||||
from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough
|
||||
from langchain_core.tools import BaseTool
|
||||
from langchain_core.utils import get_from_dict_or_env
|
||||
from langchain_core.utils.function_calling import convert_to_openai_tool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -40,6 +61,10 @@ API_TOKEN_TTL_SECONDS = 3 * 60
|
||||
ZHIPUAI_API_BASE = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
|
||||
|
||||
|
||||
def _is_pydantic_class(obj: Any) -> bool:
|
||||
return isinstance(obj, type) and issubclass(obj, BaseModel)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def connect_sse(client: Any, method: str, url: str, **kwargs: Any) -> Iterator:
|
||||
"""Context manager for connecting to an SSE stream.
|
||||
@@ -199,7 +224,7 @@ class ChatZhipuAI(BaseChatModel):
|
||||
|
||||
Key init args — completion params:
|
||||
model: Optional[str]
|
||||
Name of OpenAI model to use.
|
||||
Name of ZhipuAI model to use.
|
||||
temperature: float
|
||||
Sampling temperature.
|
||||
max_tokens: Optional[int]
|
||||
@@ -207,9 +232,9 @@ class ChatZhipuAI(BaseChatModel):
|
||||
|
||||
Key init args — client params:
|
||||
api_key: Optional[str]
|
||||
ZhipuAI API key. If not passed in will be read from env var ZHIPUAI_API_KEY.
|
||||
ZhipuAI API key. If not passed in will be read from env var ZHIPUAI_API_KEY.
|
||||
api_base: Optional[str]
|
||||
Base URL for API requests.
|
||||
Base URL for API requests.
|
||||
|
||||
See full list of supported init args and their descriptions in the params section.
|
||||
|
||||
@@ -255,7 +280,7 @@ class ChatZhipuAI(BaseChatModel):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
stream = llm.stream(messages)
|
||||
stream = zhipuai_chat.stream(messages)
|
||||
full = next(stream)
|
||||
for chunk in stream:
|
||||
full += chunk
|
||||
@@ -587,3 +612,178 @@ class ChatZhipuAI(BaseChatModel):
|
||||
|
||||
if finish_reason is not None:
|
||||
break
|
||||
|
||||
def bind_tools(
|
||||
self,
|
||||
tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
|
||||
*,
|
||||
tool_choice: Optional[
|
||||
Union[dict, str, Literal["auto", "any", "none"], bool]
|
||||
] = None,
|
||||
**kwargs: Any,
|
||||
) -> Runnable[LanguageModelInput, BaseMessage]:
|
||||
"""Bind tool-like objects to this chat model.
|
||||
Args:
|
||||
tools: A list of tool definitions to bind to this chat model.
|
||||
Can be a dictionary, pydantic model, callable, or BaseTool. Pydantic
|
||||
models, callables, and BaseTools will be automatically converted to
|
||||
their schema dictionary representation.
|
||||
tool_choice: Currently this can only be auto for this chat model.
|
||||
**kwargs: Any additional parameters to pass to the
|
||||
:class:`~langchain.runnable.Runnable` constructor.
|
||||
"""
|
||||
if self.model_name == "glm-4v":
|
||||
raise ValueError("glm-4v currently does not support tool calling")
|
||||
|
||||
formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
|
||||
if tool_choice and tool_choice != "auto":
|
||||
raise ValueError("ChatZhipuAI currently only supports `auto` tool choice")
|
||||
elif tool_choice and tool_choice == "auto":
|
||||
kwargs["tool_choice"] = tool_choice
|
||||
return self.bind(tools=formatted_tools, **kwargs)
|
||||
|
||||
def with_structured_output(
|
||||
self,
|
||||
schema: Optional[Union[Dict, Type[BaseModel]]] = None,
|
||||
*,
|
||||
method: Literal["function_calling", "json_mode"] = "function_calling",
|
||||
include_raw: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]:
|
||||
"""Model wrapper that returns outputs formatted to match the given schema.
|
||||
|
||||
Args:
|
||||
schema: The output schema as a dict or a Pydantic class. If a Pydantic class
|
||||
then the model output will be an object of that class. If a dict then
|
||||
the model output will be a dict. With a Pydantic class the returned
|
||||
attributes will be validated, whereas with a dict they will not be. If
|
||||
`method` is "function_calling" and `schema` is a dict, then the dict
|
||||
must match the OpenAI function-calling spec.
|
||||
method: The method for steering model generation, either "function_calling"
|
||||
or "json_mode". ZhipuAI only supports "function_calling" which
|
||||
converts the schema to a OpenAI function and the model will make use of the
|
||||
function-calling API.
|
||||
include_raw: If False then only the parsed structured output is returned. If
|
||||
an error occurs during model output parsing it will be raised. If True
|
||||
then both the raw model response (a BaseMessage) and the parsed model
|
||||
response will be returned. If an error occurs during output parsing it
|
||||
will be caught and returned as well. The final output is always a dict
|
||||
with keys "raw", "parsed", and "parsing_error".
|
||||
|
||||
Returns:
|
||||
A Runnable that takes any ChatModel input and returns as output:
|
||||
|
||||
If include_raw is True then a dict with keys:
|
||||
raw: BaseMessage
|
||||
parsed: Optional[_DictOrPydantic]
|
||||
parsing_error: Optional[BaseException]
|
||||
|
||||
If include_raw is False then just _DictOrPydantic is returned,
|
||||
where _DictOrPydantic depends on the schema:
|
||||
|
||||
If schema is a Pydantic class then _DictOrPydantic is the Pydantic
|
||||
class.
|
||||
|
||||
If schema is a dict then _DictOrPydantic is a dict.
|
||||
|
||||
Example: Function-calling, Pydantic schema (method="function_calling", include_raw=False):
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.chat_models import ChatZhipuAI
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
answer: str
|
||||
justification: str
|
||||
|
||||
llm = ChatZhipuAI(temperature=0)
|
||||
structured_llm = llm.with_structured_output(AnswerWithJustification)
|
||||
|
||||
structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
|
||||
# -> AnswerWithJustification(
|
||||
# answer='A pound of bricks and a pound of feathers weigh the same.'
|
||||
# justification="Both a pound of bricks and a pound of feathers have been defined to have the same weight. The 'pound' is a unit of weight, so any two things that are described as weighing a pound will weigh the same."
|
||||
# )
|
||||
|
||||
Example: Function-calling, Pydantic schema (method="function_calling", include_raw=True):
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.chat_models import ChatZhipuAI
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
answer: str
|
||||
justification: str
|
||||
|
||||
llm = ChatZhipuAI(temperature=0)
|
||||
structured_llm = llm.with_structured_output(AnswerWithJustification, include_raw=True)
|
||||
|
||||
structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
|
||||
# -> {
|
||||
# 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_01htjn3cspevxbqc1d7nkk8wab', 'function': {'arguments': '{"answer": "A pound of bricks and a pound of feathers weigh the same.", "justification": "Both a pound of bricks and a pound of feathers have been defined to have the same weight. The \'pound\' is a unit of weight, so any two things that are described as weighing a pound will weigh the same.", "unit": "pounds"}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}, id='run-456beee6-65f6-4e80-88af-a6065480822c-0'),
|
||||
# 'parsed': AnswerWithJustification(answer='A pound of bricks and a pound of feathers weigh the same.', justification="Both a pound of bricks and a pound of feathers have been defined to have the same weight. The 'pound' is a unit of weight, so any two things that are described as weighing a pound will weigh the same."),
|
||||
# 'parsing_error': None
|
||||
# }
|
||||
|
||||
Example: Function-calling, dict schema (method="function_calling", include_raw=False):
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.chat_models import ChatZhipuAI
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
from langchain_core.utils.function_calling import convert_to_openai_tool
|
||||
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
answer: str
|
||||
justification: str
|
||||
|
||||
dict_schema = convert_to_openai_tool(AnswerWithJustification)
|
||||
llm = ChatZhipuAI(temperature=0)
|
||||
structured_llm = llm.with_structured_output(dict_schema)
|
||||
|
||||
structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
|
||||
# -> {
|
||||
# 'answer': 'A pound of bricks and a pound of feathers weigh the same.',
|
||||
# 'justification': "Both a pound of bricks and a pound of feathers have been defined to have the same weight. The 'pound' is a unit of weight, so any two things that are described as weighing a pound will weigh the same.", 'unit': 'pounds'}
|
||||
# }
|
||||
|
||||
""" # noqa: E501
|
||||
if kwargs:
|
||||
raise ValueError(f"Received unsupported arguments {kwargs}")
|
||||
is_pydantic_schema = _is_pydantic_class(schema)
|
||||
if method == "function_calling":
|
||||
if schema is None:
|
||||
raise ValueError(
|
||||
"schema must be specified when method is 'function_calling'. "
|
||||
"Received None."
|
||||
)
|
||||
tool_name = convert_to_openai_tool(schema)["function"]["name"]
|
||||
llm = self.bind_tools([schema], tool_choice="auto")
|
||||
if is_pydantic_schema:
|
||||
output_parser: OutputParserLike = PydanticToolsParser(
|
||||
tools=[schema], # type: ignore[list-item]
|
||||
first_tool_only=True, # type: ignore[list-item]
|
||||
)
|
||||
else:
|
||||
output_parser = JsonOutputKeyToolsParser(
|
||||
key_name=tool_name, first_tool_only=True
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"""Unrecognized method argument. Expected 'function_calling'.
|
||||
Received: '{method}'"""
|
||||
)
|
||||
|
||||
if include_raw:
|
||||
parser_assign = RunnablePassthrough.assign(
|
||||
parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None
|
||||
)
|
||||
parser_none = RunnablePassthrough.assign(parsed=lambda _: None)
|
||||
parser_with_fallback = parser_assign.with_fallbacks(
|
||||
[parser_none], exception_key="parsing_error"
|
||||
)
|
||||
return RunnableMap(raw=llm) | parser_with_fallback
|
||||
else:
|
||||
return llm | output_parser
|
||||
|
||||
@@ -63,7 +63,10 @@ class FireCrawlLoader(BaseLoader):
|
||||
f"Unrecognized mode '{self.mode}'. Expected one of 'crawl', 'scrape'."
|
||||
)
|
||||
for doc in firecrawl_docs:
|
||||
yield Document(
|
||||
page_content=doc.get("markdown", ""),
|
||||
metadata=doc.get("metadata", {}),
|
||||
)
|
||||
metadata = doc.get("metadata", {})
|
||||
if (self.params is not None) and self.params.get(
|
||||
"extractorOptions", {}
|
||||
).get("mode") == "llm-extraction":
|
||||
metadata["llm_extraction"] = doc.get("llm_extraction")
|
||||
|
||||
yield Document(page_content=doc.get("markdown", ""), metadata=metadata)
|
||||
|
||||
@@ -21,7 +21,7 @@ class BaseGitHubLoader(BaseLoader, BaseModel, ABC):
|
||||
github_api_url: str = "https://api.github.com"
|
||||
"""URL of GitHub API"""
|
||||
|
||||
@root_validator(pre=True, allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that access token exists in environment."""
|
||||
values["access_token"] = get_from_dict_or_env(
|
||||
|
||||
@@ -3,7 +3,11 @@ from typing import Any, Dict, List, Optional
|
||||
import requests
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field, SecretStr, root_validator
|
||||
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
|
||||
from langchain_core.utils import (
|
||||
convert_to_secret_str,
|
||||
get_from_dict_or_env,
|
||||
secret_from_env,
|
||||
)
|
||||
from requests import RequestException
|
||||
|
||||
BAICHUAN_API_URL: str = "http://api.baichuan-ai.com/v1/embeddings"
|
||||
@@ -53,7 +57,10 @@ class BaichuanTextEmbeddings(BaseModel, Embeddings):
|
||||
session: Any #: :meta private:
|
||||
model_name: str = Field(default="Baichuan-Text-Embedding", alias="model")
|
||||
"""The model used to embed the documents."""
|
||||
baichuan_api_key: Optional[SecretStr] = Field(default=None, alias="api_key")
|
||||
baichuan_api_key: Optional[SecretStr] = Field(
|
||||
alias="api_key",
|
||||
default_factory=secret_from_env("BAICHUAN_API_KEY", default=None),
|
||||
)
|
||||
"""Automatically inferred from env var `BAICHUAN_API_KEY` if not provided."""
|
||||
chunk_size: int = 16
|
||||
"""Chunk size when multiple texts are input"""
|
||||
@@ -61,22 +68,21 @@ class BaichuanTextEmbeddings(BaseModel, Embeddings):
|
||||
class Config:
|
||||
allow_population_by_field_name = True
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
@root_validator(pre=False, skip_on_failure=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that auth token exists in environment."""
|
||||
try:
|
||||
if values["baichuan_api_key"] is None:
|
||||
# This is likely here for some backwards compatibility with
|
||||
# BAICHUAN_AUTH_TOKEN
|
||||
baichuan_api_key = convert_to_secret_str(
|
||||
get_from_dict_or_env(values, "baichuan_api_key", "BAICHUAN_API_KEY")
|
||||
)
|
||||
except ValueError as original_exc:
|
||||
try:
|
||||
baichuan_api_key = convert_to_secret_str(
|
||||
get_from_dict_or_env(
|
||||
values, "baichuan_auth_token", "BAICHUAN_AUTH_TOKEN"
|
||||
)
|
||||
get_from_dict_or_env(
|
||||
values, "baichuan_auth_token", "BAICHUAN_AUTH_TOKEN"
|
||||
)
|
||||
except ValueError:
|
||||
raise original_exc
|
||||
)
|
||||
values["baichuan_api_key"] = baichuan_api_key
|
||||
else:
|
||||
baichuan_api_key = values["baichuan_api_key"]
|
||||
|
||||
session = requests.Session()
|
||||
session.headers.update(
|
||||
{
|
||||
|
||||
@@ -56,7 +56,7 @@ class ClovaEmbeddings(BaseModel, Embeddings):
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
|
||||
@root_validator(pre=True, allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate api key exists in environment."""
|
||||
values["clova_emb_api_key"] = convert_to_secret_str(
|
||||
|
||||
@@ -53,7 +53,7 @@ class GradientEmbeddings(BaseModel, Embeddings):
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
|
||||
@@ -65,8 +65,15 @@ class GradientEmbeddings(BaseModel, Embeddings):
|
||||
)
|
||||
|
||||
values["gradient_api_url"] = get_from_dict_or_env(
|
||||
values, "gradient_api_url", "GRADIENT_API_URL"
|
||||
values,
|
||||
"gradient_api_url",
|
||||
"GRADIENT_API_URL",
|
||||
default="https://api.gradient.ai/api",
|
||||
)
|
||||
return values
|
||||
|
||||
@root_validator(pre=False, skip_on_failure=True)
|
||||
def post_init(cls, values: Dict) -> Dict:
|
||||
try:
|
||||
import gradientai
|
||||
except ImportError:
|
||||
@@ -85,7 +92,6 @@ class GradientEmbeddings(BaseModel, Embeddings):
|
||||
host=values["gradient_api_url"],
|
||||
)
|
||||
values["client"] = gradient.get_embeddings_model(slug=values["model"])
|
||||
|
||||
return values
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
|
||||
@@ -47,7 +47,7 @@ class InfinityEmbeddings(BaseModel, Embeddings):
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ class InfinityEmbeddingsLocal(BaseModel, Embeddings):
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
@root_validator(pre=False, skip_on_failure=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
|
||||
|
||||
@@ -12,8 +12,10 @@ from wsgiref.handlers import format_date_time
|
||||
import numpy as np
|
||||
import requests
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field, SecretStr, root_validator
|
||||
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field, SecretStr
|
||||
from langchain_core.utils import (
|
||||
secret_from_env,
|
||||
)
|
||||
from numpy import ndarray
|
||||
|
||||
# SparkLLMTextEmbeddings is an embedding model provided by iFLYTEK Co., Ltd.. (https://iflytek.com/en/).
|
||||
@@ -102,11 +104,18 @@ class SparkLLMTextEmbeddings(BaseModel, Embeddings):
|
||||
]
|
||||
""" # noqa: E501
|
||||
|
||||
spark_app_id: Optional[SecretStr] = Field(default=None, alias="app_id")
|
||||
spark_app_id: SecretStr = Field(
|
||||
alias="app_id", default_factory=secret_from_env("SPARK_APP_ID")
|
||||
)
|
||||
"""Automatically inferred from env var `SPARK_APP_ID` if not provided."""
|
||||
spark_api_key: Optional[SecretStr] = Field(default=None, alias="api_key")
|
||||
spark_api_key: Optional[SecretStr] = Field(
|
||||
alias="api_key", default_factory=secret_from_env("SPARK_API_KEY", default=None)
|
||||
)
|
||||
"""Automatically inferred from env var `SPARK_API_KEY` if not provided."""
|
||||
spark_api_secret: Optional[SecretStr] = Field(default=None, alias="api_secret")
|
||||
spark_api_secret: Optional[SecretStr] = Field(
|
||||
alias="api_secret",
|
||||
default_factory=secret_from_env("SPARK_API_SECRET", default=None),
|
||||
)
|
||||
"""Automatically inferred from env var `SPARK_API_SECRET` if not provided."""
|
||||
base_url: str = Field(default="https://emb-cn-huabei-1.xf-yun.com/")
|
||||
"""Base URL path for API requests"""
|
||||
@@ -118,20 +127,6 @@ class SparkLLMTextEmbeddings(BaseModel, Embeddings):
|
||||
class Config:
|
||||
allow_population_by_field_name = True
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that auth token exists in environment."""
|
||||
values["spark_app_id"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(values, "spark_app_id", "SPARK_APP_ID")
|
||||
)
|
||||
values["spark_api_key"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(values, "spark_api_key", "SPARK_API_KEY")
|
||||
)
|
||||
values["spark_api_secret"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(values, "spark_api_secret", "SPARK_API_SECRET")
|
||||
)
|
||||
return values
|
||||
|
||||
def _embed(self, texts: List[str], host: str) -> Optional[List[List[float]]]:
|
||||
"""Internal method to call Spark Embedding API and return embeddings.
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field, root_validator
|
||||
@@ -70,6 +70,11 @@ class ZhipuAIEmbeddings(BaseModel, Embeddings):
|
||||
"""Model name"""
|
||||
api_key: str
|
||||
"""Automatically inferred from env var `ZHIPU_API_KEY` if not provided."""
|
||||
dimensions: Optional[int] = None
|
||||
"""The number of dimensions the resulting output embeddings should have.
|
||||
|
||||
Only supported in `embedding-3` and later models.
|
||||
"""
|
||||
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
@@ -110,6 +115,13 @@ class ZhipuAIEmbeddings(BaseModel, Embeddings):
|
||||
A list of embeddings for each document in the input list.
|
||||
Each embedding is represented as a list of float values.
|
||||
"""
|
||||
resp = self.client.embeddings.create(model=self.model, input=texts)
|
||||
if self.dimensions is not None:
|
||||
resp = self.client.embeddings.create(
|
||||
model=self.model,
|
||||
input=texts,
|
||||
dimensions=self.dimensions,
|
||||
)
|
||||
else:
|
||||
resp = self.client.embeddings.create(model=self.model, input=texts)
|
||||
embeddings = [r.embedding for r in resp.data]
|
||||
return embeddings
|
||||
|
||||
@@ -32,6 +32,7 @@ class CassandraGraphVectorStore(GraphVectorStore):
|
||||
session: Optional[Session] = None,
|
||||
keyspace: Optional[str] = None,
|
||||
setup_mode: SetupMode = SetupMode.SYNC,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""
|
||||
Create the hybrid graph store.
|
||||
@@ -74,6 +75,7 @@ class CassandraGraphVectorStore(GraphVectorStore):
|
||||
session=session,
|
||||
keyspace=keyspace,
|
||||
setup_mode=_setup_mode,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@property
|
||||
|
||||
@@ -77,7 +77,7 @@ class GradientLLM(BaseLLM):
|
||||
allow_population_by_field_name = True
|
||||
extra = "forbid"
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
|
||||
@@ -88,6 +88,26 @@ class GradientLLM(BaseLLM):
|
||||
values, "gradient_workspace_id", "GRADIENT_WORKSPACE_ID"
|
||||
)
|
||||
|
||||
values["gradient_api_url"] = get_from_dict_or_env(
|
||||
values, "gradient_api_url", "GRADIENT_API_URL"
|
||||
)
|
||||
return values
|
||||
|
||||
@root_validator(pre=False, skip_on_failure=True)
|
||||
def post_init(cls, values: Dict) -> Dict:
|
||||
"""Post init validation."""
|
||||
# Can be most to post_init_validation
|
||||
try:
|
||||
import gradientai # noqa
|
||||
except ImportError:
|
||||
logging.warning(
|
||||
"DeprecationWarning: `GradientLLM` will use "
|
||||
"`pip install gradientai` in future releases of langchain."
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Can be most to post_init_validation
|
||||
if (
|
||||
values["gradient_access_token"] is None
|
||||
or len(values["gradient_access_token"]) < 10
|
||||
@@ -114,20 +134,6 @@ class GradientLLM(BaseLLM):
|
||||
if 0 >= kw.get("max_generated_token_count", 1):
|
||||
raise ValueError("`max_generated_token_count` must be positive")
|
||||
|
||||
values["gradient_api_url"] = get_from_dict_or_env(
|
||||
values, "gradient_api_url", "GRADIENT_API_URL"
|
||||
)
|
||||
|
||||
try:
|
||||
import gradientai # noqa
|
||||
except ImportError:
|
||||
logging.warning(
|
||||
"DeprecationWarning: `GradientLLM` will use "
|
||||
"`pip install gradientai` in future releases of langchain."
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return values
|
||||
|
||||
@property
|
||||
|
||||
@@ -31,7 +31,7 @@ class _MinimaxEndpointClient(BaseModel):
|
||||
api_key: SecretStr
|
||||
api_url: str
|
||||
|
||||
@root_validator(pre=True, allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def set_api_url(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
if "api_url" not in values:
|
||||
host = values["host"]
|
||||
|
||||
@@ -10,9 +10,76 @@ from langchain_community.utilities.arxiv import ArxivAPIWrapper
|
||||
class ArxivRetriever(BaseRetriever, ArxivAPIWrapper):
|
||||
"""`Arxiv` retriever.
|
||||
|
||||
It wraps load() to get_relevant_documents().
|
||||
It uses all ArxivAPIWrapper arguments without any change.
|
||||
"""
|
||||
Setup:
|
||||
Install ``arxiv``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U arxiv
|
||||
|
||||
Key init args:
|
||||
load_max_docs: int
|
||||
maximum number of documents to load
|
||||
get_ful_documents: bool
|
||||
whether to return full document text or snippets
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.retrievers import ArxivRetriever
|
||||
|
||||
retriever = ArxivRetriever(
|
||||
load_max_docs=2,
|
||||
get_ful_documents=True,
|
||||
)
|
||||
|
||||
Usage:
|
||||
.. code-block:: python
|
||||
|
||||
docs = retriever.invoke("What is the ImageBind model?")
|
||||
docs[0].metadata
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
{'Entry ID': 'http://arxiv.org/abs/2305.05665v2',
|
||||
'Published': datetime.date(2023, 5, 31),
|
||||
'Title': 'ImageBind: One Embedding Space To Bind Them All',
|
||||
'Authors': 'Rohit Girdhar, Alaaeldin El-Nouby, Zhuang Liu, Mannat Singh, Kalyan Vasudev Alwala, Armand Joulin, Ishan Misra'}
|
||||
|
||||
Use within a chain:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
prompt = ChatPromptTemplate.from_template(
|
||||
\"\"\"Answer the question based only on the context provided.
|
||||
|
||||
Context: {context}
|
||||
|
||||
Question: {question}\"\"\"
|
||||
)
|
||||
|
||||
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
|
||||
|
||||
def format_docs(docs):
|
||||
return "\\n\\n".join(doc.page_content for doc in docs)
|
||||
|
||||
chain = (
|
||||
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
||||
| prompt
|
||||
| llm
|
||||
| StrOutputParser()
|
||||
)
|
||||
|
||||
chain.invoke("What is the ImageBind model?")
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
'The ImageBind model is an approach to learn a joint embedding across six different modalities - images, text, audio, depth, thermal, and IMU data...'
|
||||
""" # noqa: E501
|
||||
|
||||
get_full_documents: bool = False
|
||||
|
||||
|
||||
@@ -19,7 +19,71 @@ DEFAULT_URL_SUFFIX = "search.windows.net"
|
||||
|
||||
|
||||
class AzureAISearchRetriever(BaseRetriever):
|
||||
"""`Azure AI Search` service retriever."""
|
||||
"""`Azure AI Search` service retriever.
|
||||
|
||||
Setup:
|
||||
See here for more detail: https://python.langchain.com/v0.2/docs/integrations/retrievers/azure_ai_search/
|
||||
|
||||
We will need to install the below dependencies and set the required
|
||||
environment variables:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U langchain-community azure-identity azure-search-documents
|
||||
export AZURE_AI_SEARCH_SERVICE_NAME="<YOUR_SEARCH_SERVICE_NAME>"
|
||||
export AZURE_AI_SEARCH_INDEX_NAME="<YOUR_SEARCH_INDEX_NAME>"
|
||||
export AZURE_AI_SEARCH_API_KEY="<YOUR_API_KEY>"
|
||||
|
||||
Key init args:
|
||||
content_key: str
|
||||
top_k: int
|
||||
index_name: str
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.retrievers import AzureAISearchRetriever
|
||||
|
||||
retriever = AzureAISearchRetriever(
|
||||
content_key="content", top_k=1, index_name="langchain-vector-demo"
|
||||
)
|
||||
|
||||
Usage:
|
||||
.. code-block:: python
|
||||
|
||||
retriever.invoke("here is my unstructured query string")
|
||||
|
||||
Use within a chain:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from langchain_openai import AzureChatOpenAI
|
||||
|
||||
prompt = ChatPromptTemplate.from_template(
|
||||
\"\"\"Answer the question based only on the context provided.
|
||||
|
||||
Context: {context}
|
||||
|
||||
Question: {question}\"\"\"
|
||||
)
|
||||
|
||||
llm = AzureChatOpenAI(azure_deployment="gpt-35-turbo")
|
||||
|
||||
def format_docs(docs):
|
||||
return "\\n\\n".join(doc.page_content for doc in docs)
|
||||
|
||||
chain = (
|
||||
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
||||
| prompt
|
||||
| llm
|
||||
| StrOutputParser()
|
||||
)
|
||||
|
||||
chain.invoke("...")
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
service_name: str = ""
|
||||
"""Name of Azure AI Search service"""
|
||||
|
||||
@@ -19,11 +19,18 @@ class RetrievalConfig(BaseModel, extra="allow"): # type: ignore[call-arg]
|
||||
|
||||
|
||||
class AmazonKnowledgeBasesRetriever(BaseRetriever):
|
||||
"""`Amazon Bedrock Knowledge Bases` retrieval.
|
||||
"""Amazon Bedrock Knowledge Bases retriever.
|
||||
|
||||
See https://aws.amazon.com/bedrock/knowledge-bases for more info.
|
||||
|
||||
Args:
|
||||
Setup:
|
||||
Install ``langchain-aws``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U langchain-aws
|
||||
|
||||
Key init args:
|
||||
knowledge_base_id: Knowledge Base ID.
|
||||
region_name: The aws region e.g., `us-west-2`.
|
||||
Fallback to AWS_DEFAULT_REGION env variable or region specified in
|
||||
@@ -35,7 +42,7 @@ class AmazonKnowledgeBasesRetriever(BaseRetriever):
|
||||
client: boto3 client for bedrock agent runtime.
|
||||
retrieval_config: Configuration for retrieval.
|
||||
|
||||
Example:
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.retrievers import AmazonKnowledgeBasesRetriever
|
||||
@@ -48,7 +55,48 @@ class AmazonKnowledgeBasesRetriever(BaseRetriever):
|
||||
}
|
||||
},
|
||||
)
|
||||
"""
|
||||
|
||||
Usage:
|
||||
.. code-block:: python
|
||||
|
||||
query = "..."
|
||||
|
||||
retriever.invoke(query)
|
||||
|
||||
Use within a chain:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_aws import ChatBedrockConverse
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
prompt = ChatPromptTemplate.from_template(
|
||||
\"\"\"Answer the question based only on the context provided.
|
||||
|
||||
Context: {context}
|
||||
|
||||
Question: {question}\"\"\"
|
||||
)
|
||||
|
||||
llm = ChatBedrockConverse(
|
||||
model_id="anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||
)
|
||||
|
||||
def format_docs(docs):
|
||||
return "\\n\\n".join(doc.page_content for doc in docs)
|
||||
|
||||
chain = (
|
||||
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
||||
| prompt
|
||||
| llm
|
||||
| StrOutputParser()
|
||||
)
|
||||
|
||||
chain.invoke("...")
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
knowledge_base_id: str
|
||||
region_name: Optional[str] = None
|
||||
|
||||
@@ -15,7 +15,73 @@ from langchain_community.vectorstores.milvus import Milvus
|
||||
|
||||
|
||||
class MilvusRetriever(BaseRetriever):
|
||||
"""`Milvus API` retriever."""
|
||||
"""Milvus API retriever.
|
||||
|
||||
See detailed instructions here: https://python.langchain.com/v0.2/docs/integrations/retrievers/milvus_hybrid_search/
|
||||
|
||||
Setup:
|
||||
Install ``langchain-milvus`` and other dependencies:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U pymilvus[model] langchain-milvus
|
||||
|
||||
Key init args:
|
||||
collection: Milvus Collection
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
retriever = MilvusCollectionHybridSearchRetriever(collection=collection)
|
||||
|
||||
Usage:
|
||||
.. code-block:: python
|
||||
|
||||
query = "What are the story about ventures?"
|
||||
|
||||
retriever.invoke(query)
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
[Document(page_content="In 'The Lost Expedition' by Caspian Grey...", metadata={'doc_id': '449281835035545843'}),
|
||||
Document(page_content="In 'The Phantom Pilgrim' by Rowan Welles...", metadata={'doc_id': '449281835035545845'}),
|
||||
Document(page_content="In 'The Dreamwalker's Journey' by Lyra Snow..", metadata={'doc_id': '449281835035545846'})]
|
||||
|
||||
Use within a chain:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
prompt = ChatPromptTemplate.from_template(
|
||||
\"\"\"Answer the question based only on the context provided.
|
||||
|
||||
Context: {context}
|
||||
|
||||
Question: {question}\"\"\"
|
||||
)
|
||||
|
||||
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
|
||||
|
||||
def format_docs(docs):
|
||||
return "\\n\\n".join(doc.page_content for doc in docs)
|
||||
|
||||
chain = (
|
||||
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
||||
| prompt
|
||||
| llm
|
||||
| StrOutputParser()
|
||||
)
|
||||
|
||||
chain.invoke("What novels has Lila written and what are their contents?")
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
"Lila Rose has written 'The Memory Thief,' which follows a charismatic thief..."
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
embedding_function: Embeddings
|
||||
collection_name: str = "LangChainCollection"
|
||||
|
||||
@@ -10,9 +10,66 @@ from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
|
||||
class WikipediaRetriever(BaseRetriever, WikipediaAPIWrapper):
|
||||
"""`Wikipedia API` retriever.
|
||||
|
||||
It wraps load() to get_relevant_documents().
|
||||
It uses all WikipediaAPIWrapper arguments without any change.
|
||||
"""
|
||||
Setup:
|
||||
Install the ``wikipedia`` dependency:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U wikipedia
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.retrievers import WikipediaRetriever
|
||||
|
||||
retriever = WikipediaRetriever()
|
||||
|
||||
Usage:
|
||||
.. code-block:: python
|
||||
|
||||
docs = retriever.invoke("TOKYO GHOUL")
|
||||
print(docs[0].page_content[:100])
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
Tokyo Ghoul (Japanese: 東京喰種(トーキョーグール), Hepburn: Tōkyō Gūru) is a Japanese dark fantasy
|
||||
|
||||
Use within a chain:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
prompt = ChatPromptTemplate.from_template(
|
||||
\"\"\"Answer the question based only on the context provided.
|
||||
|
||||
Context: {context}
|
||||
|
||||
Question: {question}\"\"\"
|
||||
)
|
||||
|
||||
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
|
||||
|
||||
def format_docs(docs):
|
||||
return "\\n\\n".join(doc.page_content for doc in docs)
|
||||
|
||||
chain = (
|
||||
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
||||
| prompt
|
||||
| llm
|
||||
| StrOutputParser()
|
||||
)
|
||||
|
||||
chain.invoke(
|
||||
"Who is the main character in `Tokyo Ghoul` and does he transform into a ghoul?"
|
||||
)
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
'The main character in Tokyo Ghoul is Ken Kaneki, who transforms into a ghoul after receiving an organ transplant from a ghoul named Rize.'
|
||||
""" # noqa: E501
|
||||
|
||||
def _get_relevant_documents(
|
||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||
|
||||
@@ -29,8 +29,6 @@ class EdenAiSpeechToTextTool(EdenaiTool):
|
||||
You can find your token here: https://app.edenai.run/admin/account/settings
|
||||
"""
|
||||
|
||||
edenai_api_key: Optional[str] = None
|
||||
|
||||
name: str = "edenai_speech_to_text"
|
||||
description = (
|
||||
"A wrapper around edenai Services speech to text "
|
||||
|
||||
@@ -6,9 +6,9 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from langchain_core.pydantic_v1 import root_validator
|
||||
from langchain_core.pydantic_v1 import Field, SecretStr
|
||||
from langchain_core.tools import BaseTool
|
||||
from langchain_core.utils import get_from_dict_or_env
|
||||
from langchain_core.utils import secret_from_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -23,20 +23,14 @@ class EdenaiTool(BaseTool):
|
||||
|
||||
feature: str
|
||||
subfeature: str
|
||||
edenai_api_key: Optional[str] = None
|
||||
edenai_api_key: SecretStr = Field(
|
||||
default_factory=secret_from_env("EDENAI_API_KEY", default=None)
|
||||
)
|
||||
is_async: bool = False
|
||||
|
||||
providers: List[str]
|
||||
"""provider to use for the API call."""
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key exists in environment."""
|
||||
values["edenai_api_key"] = get_from_dict_or_env(
|
||||
values, "edenai_api_key", "EDENAI_API_KEY"
|
||||
)
|
||||
return values
|
||||
|
||||
@staticmethod
|
||||
def get_user_agent() -> str:
|
||||
from langchain_community import __version__
|
||||
@@ -54,11 +48,8 @@ class EdenaiTool(BaseTool):
|
||||
requests.Response: The response from the EdenAI API call.
|
||||
|
||||
"""
|
||||
|
||||
# faire l'API call
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.edenai_api_key}",
|
||||
"Authorization": f"Bearer {self.edenai_api_key.get_secret_value()}",
|
||||
"User-Agent": self.get_user_agent(),
|
||||
}
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ class PowerBIDataset(BaseModel):
|
||||
"""Fix the table names."""
|
||||
return [fix_table_name(table) for table in table_names]
|
||||
|
||||
@root_validator(pre=True, allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def token_or_credential_present(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate that at least one of token and credentials is present."""
|
||||
if "token" in values or "credential" in values:
|
||||
|
||||
@@ -460,7 +460,45 @@ class ApertureDB(VectorStore):
|
||||
assert db.last_query_ok(), response
|
||||
return response[0]["FindDescriptorSet"]["entities"]
|
||||
|
||||
@override
|
||||
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
|
||||
"""Add or update documents in the vectorstore.
|
||||
|
||||
Args:
|
||||
documents: Documents to add to the vectorstore.
|
||||
kwargs: Additional keyword arguments.
|
||||
if kwargs contains ids and documents contain ids,
|
||||
the ids in the kwargs will receive precedence.
|
||||
|
||||
Returns:
|
||||
List of IDs of the added texts.
|
||||
|
||||
Raises:
|
||||
ValueError: If the number of ids does not match the number of documents.
|
||||
"""
|
||||
|
||||
if "ids" in kwargs:
|
||||
ids = kwargs.pop("ids")
|
||||
if ids and len(ids) != len(documents):
|
||||
raise ValueError(
|
||||
"The number of ids must match the number of documents. "
|
||||
"Got {len(ids)} ids and {len(documents)} documents."
|
||||
)
|
||||
|
||||
documents_ = []
|
||||
|
||||
for id_, document in zip(ids, documents):
|
||||
doc_with_id = Document(
|
||||
page_content=document.page_content,
|
||||
metadata=document.metadata,
|
||||
id=id_,
|
||||
)
|
||||
documents_.append(doc_with_id)
|
||||
else:
|
||||
documents_ = documents
|
||||
|
||||
# If upsert has been implemented, we can use it to add documents
|
||||
return self.upsert(documents_, **kwargs)["succeeded"]
|
||||
|
||||
def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse:
|
||||
"""Insert or update items
|
||||
|
||||
|
||||
@@ -1288,7 +1288,7 @@ class FAISS(VectorStore):
|
||||
relevance_score_fn = self._select_relevance_score_fn()
|
||||
if relevance_score_fn is None:
|
||||
raise ValueError(
|
||||
"normalize_score_fn must be provided to"
|
||||
"relevance_score_fn must be provided to"
|
||||
" FAISS constructor to normalize scores"
|
||||
)
|
||||
docs_and_scores = self.similarity_search_with_score(
|
||||
@@ -1317,7 +1317,7 @@ class FAISS(VectorStore):
|
||||
relevance_score_fn = self._select_relevance_score_fn()
|
||||
if relevance_score_fn is None:
|
||||
raise ValueError(
|
||||
"normalize_score_fn must be provided to"
|
||||
"relevance_score_fn must be provided to"
|
||||
" FAISS constructor to normalize scores"
|
||||
)
|
||||
docs_and_scores = await self.asimilarity_search_with_score(
|
||||
|
||||
@@ -6,8 +6,6 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import root_validator
|
||||
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
|
||||
@@ -164,18 +162,6 @@ class NeuralDBVectorStore(VectorStore):
|
||||
offset = self.db._savable_state.documents.get_source_by_id(source_id)[1]
|
||||
return [str(offset + i) for i in range(len(texts))] # type: ignore[arg-type]
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
def validate_environments(cls, values: Dict) -> Dict:
|
||||
"""Validate ThirdAI environment variables."""
|
||||
values["thirdai_key"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(
|
||||
values,
|
||||
"thirdai_key",
|
||||
"THIRDAI_KEY",
|
||||
)
|
||||
)
|
||||
return values
|
||||
|
||||
def insert( # type: ignore[no-untyped-def, no-untyped-def]
|
||||
self,
|
||||
sources: List[Any],
|
||||
|
||||
@@ -18,3 +18,14 @@ def test_zhipuai_embedding_query() -> None:
|
||||
embedding = ZhipuAIEmbeddings() # type: ignore[call-arg]
|
||||
res = embedding.embed_query(document)
|
||||
assert len(res) == 1024 # type: ignore[arg-type]
|
||||
|
||||
|
||||
def test_zhipuai_embedding_dimensions() -> None:
|
||||
"""Test ZhipuAI Text Embedding for query by assigning dimensions"""
|
||||
document = "This is a test query."
|
||||
embedding = ZhipuAIEmbeddings(
|
||||
model="embedding-3",
|
||||
dimensions=2048,
|
||||
) # type: ignore[call-arg]
|
||||
res = embedding.embed_query(document)
|
||||
assert len(res) == 2048 # type: ignore[arg-type]
|
||||
|
||||
@@ -8,7 +8,7 @@ from langchain_community.embeddings import BaichuanTextEmbeddings
|
||||
def test_sparkllm_initialization_by_alias() -> None:
|
||||
# Effective initialization
|
||||
embeddings = BaichuanTextEmbeddings( # type: ignore[call-arg]
|
||||
model="embedding_model", # type: ignore[arg-type]
|
||||
model="embedding_model",
|
||||
api_key="your-api-key", # type: ignore[arg-type]
|
||||
)
|
||||
assert embeddings.model_name == "embedding_model"
|
||||
|
||||
@@ -2,7 +2,7 @@ import os
|
||||
from typing import cast
|
||||
|
||||
import pytest
|
||||
from langchain_core.pydantic_v1 import SecretStr, ValidationError
|
||||
from langchain_core.pydantic_v1 import SecretStr
|
||||
|
||||
from langchain_community.embeddings import SparkLLMTextEmbeddings
|
||||
|
||||
@@ -43,5 +43,5 @@ def test_initialization_parameters_from_env() -> None:
|
||||
|
||||
# Environment variable missing
|
||||
del os.environ["SPARK_APP_ID"]
|
||||
with pytest.raises(ValidationError):
|
||||
with pytest.raises(ValueError):
|
||||
SparkLLMTextEmbeddings()
|
||||
|
||||
@@ -74,6 +74,11 @@ async def test_fake_retriever_v1_upgrade_async(
|
||||
assert callbacks.retriever_errors == 0
|
||||
|
||||
|
||||
def test_fake_retriever_v1_standard_params(fake_retriever_v1: BaseRetriever) -> None:
|
||||
ls_params = fake_retriever_v1._get_ls_params()
|
||||
assert ls_params == {"ls_retriever_name": "fakeretrieverv1"}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_retriever_v1_with_kwargs() -> BaseRetriever:
|
||||
# Test for things like the Weaviate V1 Retriever.
|
||||
@@ -213,3 +218,8 @@ async def test_fake_retriever_v2_async(
|
||||
await fake_erroring_retriever_v2.ainvoke(
|
||||
"Foo", config={"callbacks": [callbacks]}
|
||||
)
|
||||
|
||||
|
||||
def test_fake_retriever_v2_standard_params(fake_retriever_v2: BaseRetriever) -> None:
|
||||
ls_params = fake_retriever_v2._get_ls_params()
|
||||
assert ls_params == {"ls_retriever_name": "fakeretrieverv2"}
|
||||
|
||||
@@ -33,6 +33,11 @@ def test_create_client(amazon_retriever: AmazonKnowledgeBasesRetriever) -> None:
|
||||
amazon_retriever.create_client({})
|
||||
|
||||
|
||||
def test_standard_params(amazon_retriever: AmazonKnowledgeBasesRetriever) -> None:
|
||||
ls_params = amazon_retriever._get_ls_params()
|
||||
assert ls_params == {"ls_retriever_name": "amazonknowledgebases"}
|
||||
|
||||
|
||||
def test_get_relevant_documents(
|
||||
amazon_retriever: AmazonKnowledgeBasesRetriever, mock_client: MagicMock
|
||||
) -> None:
|
||||
|
||||
@@ -6,7 +6,9 @@ import pytest
|
||||
from langchain_community.tools.edenai import EdenAiTextModerationTool
|
||||
|
||||
tool = EdenAiTextModerationTool( # type: ignore[call-arg]
|
||||
providers=["openai"], language="en", edenai_api_key="fake_key"
|
||||
providers=["openai"],
|
||||
language="en",
|
||||
edenai_api_key="fake_key", # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -633,6 +633,28 @@ def test_similarity_score_threshold(index_details: dict, threshold: float) -> No
|
||||
assert len(search_result) == 0
|
||||
|
||||
|
||||
@pytest.mark.requires("databricks", "databricks.vector_search")
|
||||
def test_standard_params() -> None:
|
||||
index = mock_index(DIRECT_ACCESS_INDEX)
|
||||
vectorstore = default_databricks_vector_search(index)
|
||||
retriever = vectorstore.as_retriever()
|
||||
ls_params = retriever._get_ls_params()
|
||||
assert ls_params == {
|
||||
"ls_retriever_name": "vectorstore",
|
||||
"ls_vector_store_provider": "DatabricksVectorSearch",
|
||||
"ls_embedding_provider": "FakeEmbeddingsWithDimension",
|
||||
}
|
||||
|
||||
index = mock_index(DELTA_SYNC_INDEX_MANAGED_EMBEDDINGS)
|
||||
vectorstore = default_databricks_vector_search(index)
|
||||
retriever = vectorstore.as_retriever()
|
||||
ls_params = retriever._get_ls_params()
|
||||
assert ls_params == {
|
||||
"ls_retriever_name": "vectorstore",
|
||||
"ls_vector_store_provider": "DatabricksVectorSearch",
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.requires("databricks", "databricks.vector_search")
|
||||
@pytest.mark.parametrize(
|
||||
"index_details", [DELTA_SYNC_INDEX_SELF_MANAGED_EMBEDDINGS, DIRECT_ACCESS_INDEX]
|
||||
|
||||
@@ -49,6 +49,15 @@ def test_faiss() -> None:
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
# Retriever standard params
|
||||
retriever = docsearch.as_retriever()
|
||||
ls_params = retriever._get_ls_params()
|
||||
assert ls_params == {
|
||||
"ls_retriever_name": "vectorstore",
|
||||
"ls_vector_store_provider": "FAISS",
|
||||
"ls_embedding_provider": "FakeEmbeddings",
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
async def test_faiss_afrom_texts() -> None:
|
||||
|
||||
@@ -30,7 +30,8 @@ class LangChainPendingDeprecationWarning(PendingDeprecationWarning):
|
||||
# PUBLIC API
|
||||
|
||||
|
||||
T = TypeVar("T", bound=Union[Type, Callable[..., Any]])
|
||||
# Last Any should be FieldInfoV1 but this leads to circular imports
|
||||
T = TypeVar("T", bound=Union[Type, Callable[..., Any], Any])
|
||||
|
||||
|
||||
def _validate_deprecation_params(
|
||||
@@ -133,7 +134,7 @@ def deprecated(
|
||||
_package: str = package,
|
||||
) -> T:
|
||||
"""Implementation of the decorator returned by `deprecated`."""
|
||||
from pydantic.v1.fields import FieldInfo # pydantic: ignore
|
||||
from langchain_core.utils.pydantic import FieldInfoV1
|
||||
|
||||
def emit_warning() -> None:
|
||||
"""Emit the warning."""
|
||||
@@ -208,9 +209,7 @@ def deprecated(
|
||||
)
|
||||
return cast(T, obj)
|
||||
|
||||
elif isinstance(obj, FieldInfo):
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
|
||||
elif isinstance(obj, FieldInfoV1):
|
||||
wrapped = None
|
||||
if not _obj_type:
|
||||
_obj_type = "attribute"
|
||||
@@ -219,58 +218,64 @@ def deprecated(
|
||||
old_doc = obj.description
|
||||
|
||||
def finalize(wrapper: Callable[..., Any], new_doc: str) -> T:
|
||||
return Field(
|
||||
default=obj.default,
|
||||
default_factory=obj.default_factory,
|
||||
description=new_doc,
|
||||
alias=obj.alias,
|
||||
exclude=obj.exclude,
|
||||
return cast(
|
||||
T,
|
||||
FieldInfoV1(
|
||||
default=obj.default,
|
||||
default_factory=obj.default_factory,
|
||||
description=new_doc,
|
||||
alias=obj.alias,
|
||||
exclude=obj.exclude,
|
||||
),
|
||||
)
|
||||
|
||||
elif isinstance(obj, property):
|
||||
if not _obj_type:
|
||||
_obj_type = "attribute"
|
||||
wrapped = None
|
||||
_name = _name or obj.fget.__qualname__
|
||||
_name = _name or cast(Union[Type, Callable], obj.fget).__qualname__
|
||||
old_doc = obj.__doc__
|
||||
|
||||
class _deprecated_property(property):
|
||||
"""A deprecated property."""
|
||||
|
||||
def __init__(self, fget=None, fset=None, fdel=None, doc=None):
|
||||
def __init__(self, fget=None, fset=None, fdel=None, doc=None): # type: ignore[no-untyped-def]
|
||||
super().__init__(fget, fset, fdel, doc)
|
||||
self.__orig_fget = fget
|
||||
self.__orig_fset = fset
|
||||
self.__orig_fdel = fdel
|
||||
|
||||
def __get__(self, instance, owner=None):
|
||||
def __get__(self, instance, owner=None): # type: ignore[no-untyped-def]
|
||||
if instance is not None or owner is not None:
|
||||
emit_warning()
|
||||
return self.fget(instance)
|
||||
|
||||
def __set__(self, instance, value):
|
||||
def __set__(self, instance, value): # type: ignore[no-untyped-def]
|
||||
if instance is not None:
|
||||
emit_warning()
|
||||
return self.fset(instance, value)
|
||||
|
||||
def __delete__(self, instance):
|
||||
def __delete__(self, instance): # type: ignore[no-untyped-def]
|
||||
if instance is not None:
|
||||
emit_warning()
|
||||
return self.fdel(instance)
|
||||
|
||||
def __set_name__(self, owner, set_name):
|
||||
def __set_name__(self, owner, set_name): # type: ignore[no-untyped-def]
|
||||
nonlocal _name
|
||||
if _name == "<lambda>":
|
||||
_name = set_name
|
||||
|
||||
def finalize(wrapper: Callable[..., Any], new_doc: str) -> Any:
|
||||
def finalize(wrapper: Callable[..., Any], new_doc: str) -> T:
|
||||
"""Finalize the property."""
|
||||
return _deprecated_property(
|
||||
fget=obj.fget, fset=obj.fset, fdel=obj.fdel, doc=new_doc
|
||||
return cast(
|
||||
T,
|
||||
_deprecated_property(
|
||||
fget=obj.fget, fset=obj.fset, fdel=obj.fdel, doc=new_doc
|
||||
),
|
||||
)
|
||||
|
||||
else:
|
||||
_name = _name or obj.__qualname__
|
||||
_name = _name or cast(Union[Type, Callable], obj).__qualname__
|
||||
if not _obj_type:
|
||||
# edge case: when a function is within another function
|
||||
# within a test, this will call it a "method" not a "function"
|
||||
|
||||
@@ -15,14 +15,36 @@ class FakeEmbeddings(Embeddings, BaseModel):
|
||||
|
||||
Do not use this outside of testing, as it is not a real embedding model.
|
||||
|
||||
Example:
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.embeddings import FakeEmbeddings
|
||||
embed = FakeEmbeddings(size=100)
|
||||
|
||||
fake_embeddings = FakeEmbeddings(size=100)
|
||||
fake_embeddings.embed_documents(["hello world", "foo bar"])
|
||||
Embed single text:
|
||||
.. code-block:: python
|
||||
|
||||
input_text = "The meaning of life is 42"
|
||||
vector = embed.embed_query(input_text)
|
||||
print(vector[:3])
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
[-0.700234640213188, -0.581266257710429, -1.1328482266445354]
|
||||
|
||||
Embed multiple texts:
|
||||
.. code-block:: python
|
||||
|
||||
input_texts = ["Document 1...", "Document 2..."]
|
||||
vectors = embed.embed_documents(input_texts)
|
||||
print(len(vectors))
|
||||
# The first 3 coordinates for the first vector
|
||||
print(vectors[0][:3])
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
2
|
||||
[-0.5670477847544458, -0.31403828652395727, -0.5840547508955257]
|
||||
"""
|
||||
|
||||
size: int
|
||||
@@ -48,14 +70,36 @@ class DeterministicFakeEmbedding(Embeddings, BaseModel):
|
||||
|
||||
Do not use this outside of testing, as it is not a real embedding model.
|
||||
|
||||
Example:
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.embeddings import DeterministicFakeEmbedding
|
||||
embed = DeterministicFakeEmbedding(size=100)
|
||||
|
||||
fake_embeddings = DeterministicFakeEmbedding(size=100)
|
||||
fake_embeddings.embed_documents(["hello world", "foo bar"])
|
||||
Embed single text:
|
||||
.. code-block:: python
|
||||
|
||||
input_text = "The meaning of life is 42"
|
||||
vector = embed.embed_query(input_text)
|
||||
print(vector[:3])
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
[-0.700234640213188, -0.581266257710429, -1.1328482266445354]
|
||||
|
||||
Embed multiple texts:
|
||||
.. code-block:: python
|
||||
|
||||
input_texts = ["Document 1...", "Document 2..."]
|
||||
vectors = embed.embed_documents(input_texts)
|
||||
print(len(vectors))
|
||||
# The first 3 coordinates for the first vector
|
||||
print(vectors[0][:3])
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
2
|
||||
[-0.5670477847544458, -0.31403828652395727, -0.5840547508955257]
|
||||
"""
|
||||
|
||||
size: int
|
||||
|
||||
@@ -1177,7 +1177,7 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
|
||||
A message can be represented using the following formats:
|
||||
(1) BaseMessagePromptTemplate, (2) BaseMessage, (3) 2-tuple of
|
||||
(message type, template); e.g., ("human", "{user_input}"),
|
||||
(4) 2-tuple of (message class, template), (4) a string which is
|
||||
(4) 2-tuple of (message class, template), (5) a string which is
|
||||
shorthand for ("human", template); e.g., "{user_input}".
|
||||
template_format: format of the template. Defaults to "f-string".
|
||||
|
||||
|
||||
@@ -181,7 +181,7 @@ class InMemoryRateLimiter(BaseRateLimiter):
|
||||
the caller should try again later.
|
||||
"""
|
||||
with self._consume_lock:
|
||||
now = time.time()
|
||||
now = time.monotonic()
|
||||
|
||||
# initialize on first call to avoid a burst
|
||||
if self.last is None:
|
||||
|
||||
@@ -26,6 +26,8 @@ from abc import ABC, abstractmethod
|
||||
from inspect import signature
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
||||
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.load.dump import dumpd
|
||||
@@ -50,6 +52,19 @@ RetrieverLike = Runnable[RetrieverInput, RetrieverOutput]
|
||||
RetrieverOutputLike = Runnable[Any, RetrieverOutput]
|
||||
|
||||
|
||||
class LangSmithRetrieverParams(TypedDict, total=False):
|
||||
"""LangSmith parameters for tracing."""
|
||||
|
||||
ls_retriever_name: str
|
||||
"""Retriever name."""
|
||||
ls_vector_store_provider: Optional[str]
|
||||
"""Vector store provider."""
|
||||
ls_embedding_provider: Optional[str]
|
||||
"""Embedding provider."""
|
||||
ls_embedding_model: Optional[str]
|
||||
"""Embedding model."""
|
||||
|
||||
|
||||
class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
"""Abstract base class for a Document retrieval system.
|
||||
|
||||
@@ -167,6 +182,19 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
len(set(parameters.keys()) - {"self", "query", "run_manager"}) > 0
|
||||
)
|
||||
|
||||
def _get_ls_params(self, **kwargs: Any) -> LangSmithRetrieverParams:
|
||||
"""Get standard params for tracing."""
|
||||
|
||||
default_retriever_name = self.get_name()
|
||||
if default_retriever_name.startswith("Retriever"):
|
||||
default_retriever_name = default_retriever_name[9:]
|
||||
elif default_retriever_name.endswith("Retriever"):
|
||||
default_retriever_name = default_retriever_name[:-9]
|
||||
default_retriever_name = default_retriever_name.lower()
|
||||
|
||||
ls_params = LangSmithRetrieverParams(ls_retriever_name=default_retriever_name)
|
||||
return ls_params
|
||||
|
||||
def invoke(
|
||||
self, input: str, config: Optional[RunnableConfig] = None, **kwargs: Any
|
||||
) -> List[Document]:
|
||||
@@ -191,13 +219,17 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
from langchain_core.callbacks.manager import CallbackManager
|
||||
|
||||
config = ensure_config(config)
|
||||
inheritable_metadata = {
|
||||
**(config.get("metadata") or {}),
|
||||
**self._get_ls_params(**kwargs),
|
||||
}
|
||||
callback_manager = CallbackManager.configure(
|
||||
config.get("callbacks"),
|
||||
None,
|
||||
verbose=kwargs.get("verbose", False),
|
||||
inheritable_tags=config.get("tags"),
|
||||
local_tags=self.tags,
|
||||
inheritable_metadata=config.get("metadata"),
|
||||
inheritable_metadata=inheritable_metadata,
|
||||
local_metadata=self.metadata,
|
||||
)
|
||||
run_manager = callback_manager.on_retriever_start(
|
||||
@@ -250,13 +282,17 @@ class BaseRetriever(RunnableSerializable[RetrieverInput, RetrieverOutput], ABC):
|
||||
from langchain_core.callbacks.manager import AsyncCallbackManager
|
||||
|
||||
config = ensure_config(config)
|
||||
inheritable_metadata = {
|
||||
**(config.get("metadata") or {}),
|
||||
**self._get_ls_params(**kwargs),
|
||||
}
|
||||
callback_manager = AsyncCallbackManager.configure(
|
||||
config.get("callbacks"),
|
||||
None,
|
||||
verbose=kwargs.get("verbose", False),
|
||||
inheritable_tags=config.get("tags"),
|
||||
local_tags=self.tags,
|
||||
inheritable_metadata=config.get("metadata"),
|
||||
inheritable_metadata=inheritable_metadata,
|
||||
local_metadata=self.metadata,
|
||||
)
|
||||
run_manager = await callback_manager.on_retriever_start(
|
||||
|
||||
@@ -27,6 +27,7 @@ from langchain_core.utils.utils import (
|
||||
guard_import,
|
||||
mock_now,
|
||||
raise_for_status_with_text,
|
||||
secret_from_env,
|
||||
xor_args,
|
||||
)
|
||||
|
||||
@@ -56,4 +57,5 @@ __all__ = [
|
||||
"batch_iterate",
|
||||
"abatch_iterate",
|
||||
"from_env",
|
||||
"secret_from_env",
|
||||
]
|
||||
|
||||
@@ -26,9 +26,13 @@ PYDANTIC_MAJOR_VERSION = get_pydantic_major_version()
|
||||
|
||||
|
||||
if PYDANTIC_MAJOR_VERSION == 1:
|
||||
from pydantic.fields import FieldInfo as FieldInfoV1
|
||||
|
||||
PydanticBaseModel = pydantic.BaseModel
|
||||
TypeBaseModel = Type[BaseModel]
|
||||
elif PYDANTIC_MAJOR_VERSION == 2:
|
||||
from pydantic.v1.fields import FieldInfo as FieldInfoV1 # type: ignore[assignment]
|
||||
|
||||
# Union type needs to be last assignment to PydanticBaseModel to make mypy happy.
|
||||
PydanticBaseModel = Union[BaseModel, pydantic.BaseModel] # type: ignore
|
||||
TypeBaseModel = Union[Type[BaseModel], Type[pydantic.BaseModel]] # type: ignore
|
||||
@@ -272,7 +276,6 @@ if PYDANTIC_MAJOR_VERSION == 2:
|
||||
from pydantic import BaseModel as BaseModelV2
|
||||
from pydantic.fields import FieldInfo as FieldInfoV2
|
||||
from pydantic.v1 import BaseModel as BaseModelV1
|
||||
from pydantic.v1.fields import FieldInfo as FieldInfoV1
|
||||
|
||||
@overload
|
||||
def get_fields(model: Type[BaseModelV2]) -> Dict[str, FieldInfoV2]: ...
|
||||
@@ -304,11 +307,10 @@ if PYDANTIC_MAJOR_VERSION == 2:
|
||||
raise TypeError(f"Expected a Pydantic model. Got {type(model)}")
|
||||
elif PYDANTIC_MAJOR_VERSION == 1:
|
||||
from pydantic import BaseModel as BaseModelV1_
|
||||
from pydantic.fields import FieldInfo as FieldInfoV1_
|
||||
|
||||
def get_fields( # type: ignore[no-redef]
|
||||
model: Union[Type[BaseModelV1_], BaseModelV1_],
|
||||
) -> Dict[str, FieldInfoV1_]:
|
||||
) -> Dict[str, FieldInfoV1]:
|
||||
"""Get the field names of a Pydantic model."""
|
||||
return model.__fields__ # type: ignore
|
||||
else:
|
||||
|
||||
@@ -313,11 +313,11 @@ def from_env(
|
||||
This will be raised as a ValueError.
|
||||
"""
|
||||
|
||||
def get_from_env_fn() -> str: # type: ignore
|
||||
def get_from_env_fn() -> Optional[str]:
|
||||
"""Get a value from an environment variable."""
|
||||
if key in os.environ:
|
||||
return os.environ[key]
|
||||
elif isinstance(default, str):
|
||||
elif isinstance(default, (str, type(None))):
|
||||
return default
|
||||
else:
|
||||
if error_message:
|
||||
@@ -330,3 +330,62 @@ def from_env(
|
||||
)
|
||||
|
||||
return get_from_env_fn
|
||||
|
||||
|
||||
@overload
|
||||
def secret_from_env(key: str, /) -> Callable[[], SecretStr]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def secret_from_env(key: str, /, *, default: str) -> Callable[[], SecretStr]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def secret_from_env(
|
||||
key: str, /, *, default: None
|
||||
) -> Callable[[], Optional[SecretStr]]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def secret_from_env(key: str, /, *, error_message: str) -> Callable[[], SecretStr]: ...
|
||||
|
||||
|
||||
def secret_from_env(
|
||||
key: str,
|
||||
/,
|
||||
*,
|
||||
default: Union[str, _NoDefaultType, None] = _NoDefault,
|
||||
error_message: Optional[str] = None,
|
||||
) -> Union[Callable[[], Optional[SecretStr]], Callable[[], SecretStr]]:
|
||||
"""Secret from env.
|
||||
|
||||
Args:
|
||||
key: The environment variable to look up.
|
||||
default: The default value to return if the environment variable is not set.
|
||||
error_message: the error message which will be raised if the key is not found
|
||||
and no default value is provided.
|
||||
This will be raised as a ValueError.
|
||||
|
||||
Returns:
|
||||
factory method that will look up the secret from the environment.
|
||||
"""
|
||||
|
||||
def get_secret_from_env() -> Optional[SecretStr]:
|
||||
"""Get a value from an environment variable."""
|
||||
if key in os.environ:
|
||||
return SecretStr(os.environ[key])
|
||||
elif isinstance(default, str):
|
||||
return SecretStr(default)
|
||||
elif isinstance(default, type(None)):
|
||||
return None
|
||||
else:
|
||||
if error_message:
|
||||
raise ValueError(error_message)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Did not find {key}, please add an environment variable"
|
||||
f" `{key}` which contains it, or pass"
|
||||
f" `{key}` as a named parameter."
|
||||
)
|
||||
|
||||
return get_secret_from_env
|
||||
|
||||
@@ -29,30 +29,23 @@ from itertools import cycle
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
AsyncIterable,
|
||||
AsyncIterator,
|
||||
Callable,
|
||||
ClassVar,
|
||||
Collection,
|
||||
Dict,
|
||||
Iterable,
|
||||
Iterator,
|
||||
List,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
Type,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
|
||||
from langchain_core._api import beta
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import Field, root_validator
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
from langchain_core.retrievers import BaseRetriever, LangSmithRetrieverParams
|
||||
from langchain_core.runnables.config import run_in_executor
|
||||
from langchain_core.utils.aiter import abatch_iterate
|
||||
from langchain_core.utils.iter import batch_iterate
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain_core.callbacks.manager import (
|
||||
@@ -60,7 +53,6 @@ if TYPE_CHECKING:
|
||||
CallbackManagerForRetrieverRun,
|
||||
)
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.indexing import UpsertResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -96,7 +88,7 @@ class VectorStore(ABC):
|
||||
ValueError: If the number of metadatas does not match the number of texts.
|
||||
ValueError: If the number of ids does not match the number of texts.
|
||||
"""
|
||||
if type(self).upsert != VectorStore.upsert:
|
||||
if type(self).add_documents != VectorStore.add_documents:
|
||||
# Import document in local scope to avoid circular imports
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@@ -109,190 +101,19 @@ class VectorStore(ABC):
|
||||
if metadatas and len(metadatas) != len(texts_):
|
||||
raise ValueError(
|
||||
"The number of metadatas must match the number of texts."
|
||||
"Got {len(metadatas)} metadatas and {len(texts_)} texts."
|
||||
f"Got {len(metadatas)} metadatas and {len(texts_)} texts."
|
||||
)
|
||||
|
||||
if "ids" in kwargs:
|
||||
ids = kwargs.pop("ids")
|
||||
if ids and len(ids) != len(texts_):
|
||||
raise ValueError(
|
||||
"The number of ids must match the number of texts."
|
||||
"Got {len(ids)} ids and {len(texts_)} texts."
|
||||
)
|
||||
else:
|
||||
ids = None
|
||||
|
||||
metadatas_ = iter(metadatas) if metadatas else cycle([{}])
|
||||
ids_: Iterable[Union[str, None]] = ids if ids is not None else cycle([None])
|
||||
docs = [
|
||||
Document(page_content=text, metadata=metadata_, id=id_)
|
||||
for text, metadata_, id_ in zip(texts, metadatas_, ids_)
|
||||
Document(page_content=text, metadata=metadata_)
|
||||
for text, metadata_ in zip(texts, metadatas_)
|
||||
]
|
||||
upsert_response = self.upsert(docs, **kwargs)
|
||||
return upsert_response["succeeded"]
|
||||
|
||||
return self.add_documents(docs, **kwargs)
|
||||
raise NotImplementedError(
|
||||
f"`add_texts` has not been implemented for {self.__class__.__name__} "
|
||||
)
|
||||
|
||||
# Developer guidelines:
|
||||
# Do not override streaming_upsert!
|
||||
@beta(message="Added in 0.2.11. The API is subject to change.")
|
||||
def streaming_upsert(
|
||||
self, items: Iterable[Document], /, batch_size: int, **kwargs: Any
|
||||
) -> Iterator[UpsertResponse]:
|
||||
"""Upsert documents in a streaming fashion.
|
||||
|
||||
Args:
|
||||
items: Iterable of Documents to add to the vectorstore.
|
||||
batch_size: The size of each batch to upsert.
|
||||
kwargs: Additional keyword arguments.
|
||||
kwargs should only include parameters that are common to all
|
||||
documents. (e.g., timeout for indexing, retry policy, etc.)
|
||||
kwargs should not include ids to avoid ambiguous semantics.
|
||||
Instead, the ID should be provided as part of the Document object.
|
||||
|
||||
Yields:
|
||||
UpsertResponse: A response object that contains the list of IDs that were
|
||||
successfully added or updated in the vectorstore and the list of IDs that
|
||||
failed to be added or updated.
|
||||
|
||||
.. versionadded:: 0.2.11
|
||||
"""
|
||||
# The default implementation of this method breaks the input into
|
||||
# batches of size `batch_size` and calls the `upsert` method on each batch.
|
||||
# Subclasses can override this method to provide a more efficient
|
||||
# implementation.
|
||||
for item_batch in batch_iterate(batch_size, items):
|
||||
yield self.upsert(item_batch, **kwargs)
|
||||
|
||||
# Please note that we've added a new method `upsert` instead of re-using the
|
||||
# existing `add_documents` method.
|
||||
# This was done to resolve potential ambiguities around the behavior of **kwargs
|
||||
# in existing add_documents / add_texts methods which could include per document
|
||||
# information (e.g., the `ids` parameter).
|
||||
# Over time the `add_documents` could be denoted as legacy and deprecated
|
||||
# in favor of the `upsert` method.
|
||||
@beta(message="Added in 0.2.11. The API is subject to change.")
|
||||
def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse:
|
||||
"""Add or update documents in the vectorstore.
|
||||
|
||||
The upsert functionality should utilize the ID field of the Document object
|
||||
if it is provided. If the ID is not provided, the upsert method is free
|
||||
to generate an ID for the document.
|
||||
|
||||
When an ID is specified and the document already exists in the vectorstore,
|
||||
the upsert method should update the document with the new data. If the document
|
||||
does not exist, the upsert method should add the document to the vectorstore.
|
||||
|
||||
Args:
|
||||
items: Sequence of Documents to add to the vectorstore.
|
||||
kwargs: Additional keyword arguments.
|
||||
|
||||
Returns:
|
||||
UpsertResponse: A response object that contains the list of IDs that were
|
||||
successfully added or updated in the vectorstore and the list of IDs that
|
||||
failed to be added or updated.
|
||||
|
||||
.. versionadded:: 0.2.11
|
||||
"""
|
||||
# Developer guidelines:
|
||||
#
|
||||
# Vectorstores implementations are free to extend `upsert` implementation
|
||||
# to take in additional data per document.
|
||||
#
|
||||
# This data **SHOULD NOT** be part of the **kwargs** parameter, instead
|
||||
# sub-classes can use a Union type on `documents` to include additional
|
||||
# supported formats for the input data stream.
|
||||
#
|
||||
# For example,
|
||||
#
|
||||
# .. code-block:: python
|
||||
# from typing import TypedDict
|
||||
#
|
||||
# class DocumentWithVector(TypedDict):
|
||||
# document: Document
|
||||
# vector: List[float]
|
||||
#
|
||||
# def upsert(
|
||||
# self,
|
||||
# documents: Union[Iterable[Document], Iterable[DocumentWithVector]],
|
||||
# /,
|
||||
# **kwargs
|
||||
# ) -> UpsertResponse:
|
||||
# \"\"\"Add or update documents in the vectorstore.\"\"\"
|
||||
# # Implementation should check if documents is an
|
||||
# # iterable of DocumentWithVector or Document
|
||||
# pass
|
||||
#
|
||||
# Implementations that override upsert should include a new doc-string
|
||||
# that explains the semantics of upsert and includes in code
|
||||
# examples of how to insert using the alternate data formats.
|
||||
|
||||
# The implementation does not delegate to the `add_texts` method or
|
||||
# the `add_documents` method by default since those implementations
|
||||
raise NotImplementedError(
|
||||
f"upsert has not been implemented for {self.__class__.__name__}"
|
||||
)
|
||||
|
||||
@beta(message="Added in 0.2.11. The API is subject to change.")
|
||||
async def astreaming_upsert(
|
||||
self,
|
||||
items: AsyncIterable[Document],
|
||||
/,
|
||||
batch_size: int,
|
||||
**kwargs: Any,
|
||||
) -> AsyncIterator[UpsertResponse]:
|
||||
"""Upsert documents in a streaming fashion. Async version of streaming_upsert.
|
||||
|
||||
Args:
|
||||
items: Iterable of Documents to add to the vectorstore.
|
||||
batch_size: The size of each batch to upsert.
|
||||
kwargs: Additional keyword arguments.
|
||||
kwargs should only include parameters that are common to all
|
||||
documents. (e.g., timeout for indexing, retry policy, etc.)
|
||||
kwargs should not include ids to avoid ambiguous semantics.
|
||||
Instead the ID should be provided as part of the Document object.
|
||||
|
||||
Yields:
|
||||
UpsertResponse: A response object that contains the list of IDs that were
|
||||
successfully added or updated in the vectorstore and the list of IDs that
|
||||
failed to be added or updated.
|
||||
|
||||
.. versionadded:: 0.2.11
|
||||
"""
|
||||
async for batch in abatch_iterate(batch_size, items):
|
||||
yield await self.aupsert(batch, **kwargs)
|
||||
|
||||
@beta(message="Added in 0.2.11. The API is subject to change.")
|
||||
async def aupsert(
|
||||
self, items: Sequence[Document], /, **kwargs: Any
|
||||
) -> UpsertResponse:
|
||||
"""Add or update documents in the vectorstore. Async version of upsert.
|
||||
|
||||
The upsert functionality should utilize the ID field of the Document object
|
||||
if it is provided. If the ID is not provided, the upsert method is free
|
||||
to generate an ID for the document.
|
||||
|
||||
When an ID is specified and the document already exists in the vectorstore,
|
||||
the upsert method should update the document with the new data. If the document
|
||||
does not exist, the upsert method should add the document to the vectorstore.
|
||||
|
||||
Args:
|
||||
items: Sequence of Documents to add to the vectorstore.
|
||||
kwargs: Additional keyword arguments.
|
||||
|
||||
Returns:
|
||||
UpsertResponse: A response object that contains the list of IDs that were
|
||||
successfully added or updated in the vectorstore and the list of IDs that
|
||||
failed to be added or updated.
|
||||
|
||||
.. versionadded:: 0.2.11
|
||||
"""
|
||||
# Developer guidelines: See guidelines for the `upsert` method.
|
||||
# The implementation does not delegate to the `add_texts` method or
|
||||
# the `add_documents` method by default since those implementations
|
||||
return await run_in_executor(None, self.upsert, items, **kwargs)
|
||||
|
||||
@property
|
||||
def embeddings(self) -> Optional[Embeddings]:
|
||||
"""Access the query embedding object if available."""
|
||||
@@ -407,7 +228,7 @@ class VectorStore(ABC):
|
||||
ValueError: If the number of metadatas does not match the number of texts.
|
||||
ValueError: If the number of ids does not match the number of texts.
|
||||
"""
|
||||
if type(self).aupsert != VectorStore.aupsert:
|
||||
if type(self).aadd_documents != VectorStore.aadd_documents:
|
||||
# Import document in local scope to avoid circular imports
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@@ -420,27 +241,16 @@ class VectorStore(ABC):
|
||||
if metadatas and len(metadatas) != len(texts_):
|
||||
raise ValueError(
|
||||
"The number of metadatas must match the number of texts."
|
||||
"Got {len(metadatas)} metadatas and {len(texts_)} texts."
|
||||
f"Got {len(metadatas)} metadatas and {len(texts_)} texts."
|
||||
)
|
||||
|
||||
if "ids" in kwargs:
|
||||
ids = kwargs.pop("ids")
|
||||
if ids and len(ids) != len(texts_):
|
||||
raise ValueError(
|
||||
"The number of ids must match the number of texts."
|
||||
"Got {len(ids)} ids and {len(texts_)} texts."
|
||||
)
|
||||
else:
|
||||
ids = None
|
||||
|
||||
metadatas_ = iter(metadatas) if metadatas else cycle([{}])
|
||||
ids_: Iterable[Union[str, None]] = ids if ids is not None else cycle([None])
|
||||
|
||||
docs = [
|
||||
Document(page_content=text, metadata=metadata_, id=id_)
|
||||
for text, metadata_, id_ in zip(texts, metadatas_, ids_)
|
||||
Document(page_content=text, metadata=metadata_)
|
||||
for text, metadata_ in zip(texts, metadatas_)
|
||||
]
|
||||
upsert_response = await self.aupsert(docs, **kwargs)
|
||||
return upsert_response["succeeded"]
|
||||
|
||||
return await self.aadd_documents(docs, **kwargs)
|
||||
return await run_in_executor(None, self.add_texts, texts, metadatas, **kwargs)
|
||||
|
||||
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
|
||||
@@ -458,37 +268,22 @@ class VectorStore(ABC):
|
||||
Raises:
|
||||
ValueError: If the number of ids does not match the number of documents.
|
||||
"""
|
||||
if type(self).upsert != VectorStore.upsert:
|
||||
from langchain_core.documents import Document
|
||||
if type(self).add_texts != VectorStore.add_texts:
|
||||
if "ids" not in kwargs:
|
||||
ids = [doc.id for doc in documents]
|
||||
|
||||
if "ids" in kwargs:
|
||||
ids = kwargs.pop("ids")
|
||||
if ids and len(ids) != len(documents):
|
||||
raise ValueError(
|
||||
"The number of ids must match the number of documents. "
|
||||
"Got {len(ids)} ids and {len(documents)} documents."
|
||||
)
|
||||
# If there's at least one valid ID, we'll assume that IDs
|
||||
# should be used.
|
||||
if any(ids):
|
||||
kwargs["ids"] = ids
|
||||
|
||||
documents_ = []
|
||||
|
||||
for id_, document in zip(ids, documents):
|
||||
doc_with_id = Document(
|
||||
page_content=document.page_content,
|
||||
metadata=document.metadata,
|
||||
id=id_,
|
||||
)
|
||||
documents_.append(doc_with_id)
|
||||
else:
|
||||
documents_ = documents
|
||||
|
||||
# If upsert has been implemented, we can use it to add documents
|
||||
return self.upsert(documents_, **kwargs)["succeeded"]
|
||||
|
||||
# Code path that delegates to add_text for backwards compatibility
|
||||
# TODO: Handle the case where the user doesn't provide ids on the Collection
|
||||
texts = [doc.page_content for doc in documents]
|
||||
metadatas = [doc.metadata for doc in documents]
|
||||
return self.add_texts(texts, metadatas, **kwargs)
|
||||
texts = [doc.page_content for doc in documents]
|
||||
metadatas = [doc.metadata for doc in documents]
|
||||
return self.add_texts(texts, metadatas, **kwargs)
|
||||
raise NotImplementedError(
|
||||
f"`add_documents` and `add_texts` has not been implemented "
|
||||
f"for {self.__class__.__name__} "
|
||||
)
|
||||
|
||||
async def aadd_documents(
|
||||
self, documents: List[Document], **kwargs: Any
|
||||
@@ -506,41 +301,21 @@ class VectorStore(ABC):
|
||||
Raises:
|
||||
ValueError: If the number of IDs does not match the number of documents.
|
||||
"""
|
||||
# If either upsert or aupsert has been implemented, we delegate to them!
|
||||
if (
|
||||
type(self).aupsert != VectorStore.aupsert
|
||||
or type(self).upsert != VectorStore.upsert
|
||||
):
|
||||
# If aupsert has been implemented, we can use it to add documents
|
||||
from langchain_core.documents import Document
|
||||
# If the async method has been overridden, we'll use that.
|
||||
if type(self).aadd_texts != VectorStore.aadd_texts:
|
||||
if "ids" not in kwargs:
|
||||
ids = [doc.id for doc in documents]
|
||||
|
||||
if "ids" in kwargs:
|
||||
ids = kwargs.pop("ids")
|
||||
if ids and len(ids) != len(documents):
|
||||
raise ValueError(
|
||||
"The number of ids must match the number of documents."
|
||||
"Got {len(ids)} ids and {len(documents)} documents."
|
||||
)
|
||||
# If there's at least one valid ID, we'll assume that IDs
|
||||
# should be used.
|
||||
if any(ids):
|
||||
kwargs["ids"] = ids
|
||||
|
||||
documents_ = []
|
||||
texts = [doc.page_content for doc in documents]
|
||||
metadatas = [doc.metadata for doc in documents]
|
||||
return await self.aadd_texts(texts, metadatas, **kwargs)
|
||||
|
||||
for id_, document in zip(ids, documents):
|
||||
doc_with_id = Document(
|
||||
page_content=document.page_content,
|
||||
metadata=document.metadata,
|
||||
id=id_,
|
||||
)
|
||||
documents_.append(doc_with_id)
|
||||
else:
|
||||
documents_ = documents
|
||||
|
||||
# The default implementation of aupsert delegates to upsert.
|
||||
upsert_response = await self.aupsert(documents_, **kwargs)
|
||||
return upsert_response["succeeded"]
|
||||
|
||||
texts = [doc.page_content for doc in documents]
|
||||
metadatas = [doc.metadata for doc in documents]
|
||||
return await self.aadd_texts(texts, metadatas, **kwargs)
|
||||
return await run_in_executor(None, self.add_documents, documents, **kwargs)
|
||||
|
||||
def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]:
|
||||
"""Return docs most similar to query using a specified search type.
|
||||
@@ -1239,6 +1014,25 @@ class VectorStoreRetriever(BaseRetriever):
|
||||
)
|
||||
return values
|
||||
|
||||
def _get_ls_params(self, **kwargs: Any) -> LangSmithRetrieverParams:
|
||||
"""Get standard params for tracing."""
|
||||
|
||||
ls_params = super()._get_ls_params(**kwargs)
|
||||
ls_params["ls_vector_store_provider"] = self.vectorstore.__class__.__name__
|
||||
|
||||
if self.vectorstore.embeddings:
|
||||
ls_params["ls_embedding_provider"] = (
|
||||
self.vectorstore.embeddings.__class__.__name__
|
||||
)
|
||||
elif hasattr(self.vectorstore, "embedding") and isinstance(
|
||||
self.vectorstore.embedding, Embeddings
|
||||
):
|
||||
ls_params["ls_embedding_provider"] = (
|
||||
self.vectorstore.embedding.__class__.__name__
|
||||
)
|
||||
|
||||
return ls_params
|
||||
|
||||
def _get_relevant_documents(
|
||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||
) -> List[Document]:
|
||||
|
||||
@@ -8,12 +8,14 @@ from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
Iterator,
|
||||
List,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
)
|
||||
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.load import dumpd, load
|
||||
@@ -56,43 +58,71 @@ class InMemoryVectorStore(VectorStore):
|
||||
async def adelete(self, ids: Optional[Sequence[str]] = None, **kwargs: Any) -> None:
|
||||
self.delete(ids)
|
||||
|
||||
def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse:
|
||||
vectors = self.embedding.embed_documents([item.page_content for item in items])
|
||||
ids = []
|
||||
for item, vector in zip(items, vectors):
|
||||
doc_id = item.id if item.id else str(uuid.uuid4())
|
||||
ids.append(doc_id)
|
||||
self.store[doc_id] = {
|
||||
"id": doc_id,
|
||||
"vector": vector,
|
||||
"text": item.page_content,
|
||||
"metadata": item.metadata,
|
||||
}
|
||||
return {
|
||||
"succeeded": ids,
|
||||
"failed": [],
|
||||
}
|
||||
def add_documents(
|
||||
self,
|
||||
documents: List[Document],
|
||||
ids: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""Add documents to the store."""
|
||||
texts = [doc.page_content for doc in documents]
|
||||
vectors = self.embedding.embed_documents(texts)
|
||||
|
||||
async def aupsert(
|
||||
self, items: Sequence[Document], /, **kwargs: Any
|
||||
) -> UpsertResponse:
|
||||
vectors = await self.embedding.aembed_documents(
|
||||
[item.page_content for item in items]
|
||||
if ids and len(ids) != len(texts):
|
||||
raise ValueError(
|
||||
f"ids must be the same length as texts. "
|
||||
f"Got {len(ids)} ids and {len(texts)} texts."
|
||||
)
|
||||
|
||||
id_iterator: Iterator[Optional[str]] = (
|
||||
iter(ids) if ids else iter(doc.id for doc in documents)
|
||||
)
|
||||
ids = []
|
||||
for item, vector in zip(items, vectors):
|
||||
doc_id = item.id if item.id else str(uuid.uuid4())
|
||||
ids.append(doc_id)
|
||||
self.store[doc_id] = {
|
||||
"id": doc_id,
|
||||
|
||||
ids_ = []
|
||||
|
||||
for doc, vector in zip(documents, vectors):
|
||||
doc_id = next(id_iterator)
|
||||
doc_id_ = doc_id if doc_id else str(uuid.uuid4())
|
||||
ids_.append(doc_id_)
|
||||
self.store[doc_id_] = {
|
||||
"id": doc_id_,
|
||||
"vector": vector,
|
||||
"text": item.page_content,
|
||||
"metadata": item.metadata,
|
||||
"text": doc.page_content,
|
||||
"metadata": doc.metadata,
|
||||
}
|
||||
return {
|
||||
"succeeded": ids,
|
||||
"failed": [],
|
||||
}
|
||||
|
||||
return ids_
|
||||
|
||||
async def aadd_documents(
|
||||
self, documents: List[Document], ids: Optional[List[str]] = None, **kwargs: Any
|
||||
) -> List[str]:
|
||||
"""Add documents to the store."""
|
||||
texts = [doc.page_content for doc in documents]
|
||||
vectors = await self.embedding.aembed_documents(texts)
|
||||
|
||||
if ids and len(ids) != len(texts):
|
||||
raise ValueError(
|
||||
f"ids must be the same length as texts. "
|
||||
f"Got {len(ids)} ids and {len(texts)} texts."
|
||||
)
|
||||
|
||||
id_iterator: Iterator[Optional[str]] = (
|
||||
iter(ids) if ids else iter(doc.id for doc in documents)
|
||||
)
|
||||
ids_: List[str] = []
|
||||
|
||||
for doc, vector in zip(documents, vectors):
|
||||
doc_id = next(id_iterator)
|
||||
doc_id_ = doc_id if doc_id else str(uuid.uuid4())
|
||||
ids_.append(doc_id_)
|
||||
self.store[doc_id_] = {
|
||||
"id": doc_id_,
|
||||
"vector": vector,
|
||||
"text": doc.page_content,
|
||||
"metadata": doc.metadata,
|
||||
}
|
||||
|
||||
return ids_
|
||||
|
||||
def get_by_ids(self, ids: Sequence[str], /) -> List[Document]:
|
||||
"""Get documents by their ids.
|
||||
@@ -117,6 +147,62 @@ class InMemoryVectorStore(VectorStore):
|
||||
)
|
||||
return documents
|
||||
|
||||
@deprecated(
|
||||
alternative="VectorStore.add_documents",
|
||||
message=(
|
||||
"This was a beta API that was added in 0.2.11. "
|
||||
"It'll be removed in 0.3.0."
|
||||
),
|
||||
since="0.2.29",
|
||||
removal="0.3.0",
|
||||
)
|
||||
def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse:
|
||||
vectors = self.embedding.embed_documents([item.page_content for item in items])
|
||||
ids = []
|
||||
for item, vector in zip(items, vectors):
|
||||
doc_id = item.id if item.id else str(uuid.uuid4())
|
||||
ids.append(doc_id)
|
||||
self.store[doc_id] = {
|
||||
"id": doc_id,
|
||||
"vector": vector,
|
||||
"text": item.page_content,
|
||||
"metadata": item.metadata,
|
||||
}
|
||||
return {
|
||||
"succeeded": ids,
|
||||
"failed": [],
|
||||
}
|
||||
|
||||
@deprecated(
|
||||
alternative="VectorStore.aadd_documents",
|
||||
message=(
|
||||
"This was a beta API that was added in 0.2.11. "
|
||||
"It'll be removed in 0.3.0."
|
||||
),
|
||||
since="0.2.29",
|
||||
removal="0.3.0",
|
||||
)
|
||||
async def aupsert(
|
||||
self, items: Sequence[Document], /, **kwargs: Any
|
||||
) -> UpsertResponse:
|
||||
vectors = await self.embedding.aembed_documents(
|
||||
[item.page_content for item in items]
|
||||
)
|
||||
ids = []
|
||||
for item, vector in zip(items, vectors):
|
||||
doc_id = item.id if item.id else str(uuid.uuid4())
|
||||
ids.append(doc_id)
|
||||
self.store[doc_id] = {
|
||||
"id": doc_id,
|
||||
"vector": vector,
|
||||
"text": item.page_content,
|
||||
"metadata": item.metadata,
|
||||
}
|
||||
return {
|
||||
"succeeded": ids,
|
||||
"failed": [],
|
||||
}
|
||||
|
||||
async def aget_by_ids(self, ids: Sequence[str], /) -> List[Document]:
|
||||
"""Async get documents by their ids.
|
||||
|
||||
|
||||
949
libs/core/poetry.lock
generated
949
libs/core/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry]
|
||||
name = "langchain-core"
|
||||
version = "0.2.29"
|
||||
version = "0.2.30"
|
||||
description = "Building applications with LLMs through composability"
|
||||
authors = []
|
||||
license = "MIT"
|
||||
@@ -74,17 +74,20 @@ optional = true
|
||||
[tool.poetry.group.lint.dependencies]
|
||||
ruff = "^0.5"
|
||||
|
||||
|
||||
[tool.poetry.group.typing.dependencies]
|
||||
mypy = ">=1.10,<1.11"
|
||||
types-pyyaml = "^6.0.12.2"
|
||||
types-requests = "^2.28.11.5"
|
||||
types-jinja2 = "^2.11.9"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
jupyter = "^1.0.0"
|
||||
setuptools = "^67.6.1"
|
||||
grandalf = "^0.8"
|
||||
|
||||
|
||||
[tool.poetry.group.test.dependencies]
|
||||
pytest = "^7.3.0"
|
||||
freezegun = "^1.2.2"
|
||||
@@ -103,12 +106,15 @@ python = "<3.12"
|
||||
version = "^1.26.0"
|
||||
python = ">=3.12"
|
||||
|
||||
|
||||
[tool.poetry.group.test_integration.dependencies]
|
||||
|
||||
|
||||
[tool.poetry.group.typing.dependencies.langchain-text-splitters]
|
||||
path = "../text-splitters"
|
||||
develop = true
|
||||
|
||||
|
||||
[tool.poetry.group.test.dependencies.langchain-standard-tests]
|
||||
path = "../standard-tests"
|
||||
develop = true
|
||||
|
||||
@@ -26,6 +26,7 @@ EXPECTED_ALL = [
|
||||
"stringify_value",
|
||||
"pre_init",
|
||||
"from_env",
|
||||
"secret_from_env",
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
import os
|
||||
import re
|
||||
from contextlib import AbstractContextManager, nullcontext
|
||||
from typing import Any, Dict, Optional, Tuple, Type, Union
|
||||
from typing import Any, Callable, Dict, Optional, Tuple, Type, Union
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_core import utils
|
||||
from langchain_core.pydantic_v1 import SecretStr
|
||||
from langchain_core.utils import (
|
||||
check_package_version,
|
||||
from_env,
|
||||
@@ -15,6 +16,7 @@ from langchain_core.utils import (
|
||||
)
|
||||
from langchain_core.utils._merge import merge_dicts
|
||||
from langchain_core.utils.pydantic import PYDANTIC_MAJOR_VERSION
|
||||
from langchain_core.utils.utils import secret_from_env
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -254,3 +256,110 @@ def test_from_env_with_default_error_message() -> None:
|
||||
get_value = from_env(key)
|
||||
with pytest.raises(ValueError, match=f"Did not find {key}"):
|
||||
get_value()
|
||||
|
||||
|
||||
def test_secret_from_env_with_env_variable(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
# Set the environment variable
|
||||
monkeypatch.setenv("TEST_KEY", "secret_value")
|
||||
|
||||
# Get the function
|
||||
get_secret: Callable[[], Optional[SecretStr]] = secret_from_env("TEST_KEY")
|
||||
|
||||
# Assert that it returns the correct value
|
||||
assert get_secret() == SecretStr("secret_value")
|
||||
|
||||
|
||||
def test_secret_from_env_with_default_value(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
# Unset the environment variable
|
||||
monkeypatch.delenv("TEST_KEY", raising=False)
|
||||
|
||||
# Get the function with a default value
|
||||
get_secret: Callable[[], SecretStr] = secret_from_env(
|
||||
"TEST_KEY", default="default_value"
|
||||
)
|
||||
|
||||
# Assert that it returns the default value
|
||||
assert get_secret() == SecretStr("default_value")
|
||||
|
||||
|
||||
def test_secret_from_env_with_none_default(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
# Unset the environment variable
|
||||
monkeypatch.delenv("TEST_KEY", raising=False)
|
||||
|
||||
# Get the function with a default value of None
|
||||
get_secret: Callable[[], Optional[SecretStr]] = secret_from_env(
|
||||
"TEST_KEY", default=None
|
||||
)
|
||||
|
||||
# Assert that it returns None
|
||||
assert get_secret() is None
|
||||
|
||||
|
||||
def test_secret_from_env_without_default_raises_error(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
# Unset the environment variable
|
||||
monkeypatch.delenv("TEST_KEY", raising=False)
|
||||
|
||||
# Get the function without a default value
|
||||
get_secret: Callable[[], SecretStr] = secret_from_env("TEST_KEY")
|
||||
|
||||
# Assert that it raises a ValueError with the correct message
|
||||
with pytest.raises(ValueError, match="Did not find TEST_KEY"):
|
||||
get_secret()
|
||||
|
||||
|
||||
def test_secret_from_env_with_custom_error_message(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
# Unset the environment variable
|
||||
monkeypatch.delenv("TEST_KEY", raising=False)
|
||||
|
||||
# Get the function without a default value but with a custom error message
|
||||
get_secret: Callable[[], SecretStr] = secret_from_env(
|
||||
"TEST_KEY", error_message="Custom error message"
|
||||
)
|
||||
|
||||
# Assert that it raises a ValueError with the custom message
|
||||
with pytest.raises(ValueError, match="Custom error message"):
|
||||
get_secret()
|
||||
|
||||
|
||||
def test_using_secret_from_env_as_default_factory(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
|
||||
class Foo(BaseModel):
|
||||
secret: SecretStr = Field(default_factory=secret_from_env("TEST_KEY"))
|
||||
|
||||
# Pass the secret as a parameter
|
||||
foo = Foo(secret="super_secret") # type: ignore[arg-type]
|
||||
assert foo.secret.get_secret_value() == "super_secret"
|
||||
|
||||
# Set the environment variable
|
||||
monkeypatch.setenv("TEST_KEY", "secret_value")
|
||||
assert Foo().secret.get_secret_value() == "secret_value"
|
||||
|
||||
class Bar(BaseModel):
|
||||
secret: Optional[SecretStr] = Field(
|
||||
default_factory=secret_from_env("TEST_KEY_2", default=None)
|
||||
)
|
||||
|
||||
assert Bar().secret is None
|
||||
|
||||
class Buzz(BaseModel):
|
||||
secret: Optional[SecretStr] = Field(
|
||||
default_factory=secret_from_env("TEST_KEY_2", default="hello")
|
||||
)
|
||||
|
||||
# We know it will be SecretStr rather than Optional[SecretStr]
|
||||
assert Buzz().secret.get_secret_value() == "hello" # type: ignore
|
||||
|
||||
class OhMy(BaseModel):
|
||||
secret: Optional[SecretStr] = Field(
|
||||
default_factory=secret_from_env("FOOFOOFOOBAR")
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="Did not find FOOFOOFOOBAR"):
|
||||
OhMy()
|
||||
|
||||
@@ -1,69 +1,50 @@
|
||||
"""Set of tests that complement the standard tests for vectorstore.
|
||||
|
||||
These tests verify that the base abstraction does appropriate delegation to
|
||||
the relevant methods.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional, Sequence, Union
|
||||
|
||||
from typing_extensions import TypedDict
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.indexing import UpsertResponse
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
|
||||
def test_custom_upsert_type() -> None:
|
||||
"""Test that we can override the signature of the upsert method
|
||||
of the VectorStore class without creating typing issues by violating
|
||||
the Liskov Substitution Principle.
|
||||
"""
|
||||
|
||||
class ByVector(TypedDict):
|
||||
document: Document
|
||||
vector: List[float]
|
||||
|
||||
class CustomVectorStore(VectorStore):
|
||||
def upsert(
|
||||
# This unit test verifies that the signature of the upsert method
|
||||
# specifically the items parameter can be overridden without
|
||||
# violating the Liskov Substitution Principle (and getting
|
||||
# typing errors).
|
||||
self,
|
||||
items: Union[Sequence[Document], Sequence[ByVector]],
|
||||
/,
|
||||
**kwargs: Any,
|
||||
) -> UpsertResponse:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class CustomSyncVectorStore(VectorStore):
|
||||
"""A vectorstore that only implements the synchronous methods."""
|
||||
class CustomAddTextsVectorstore(VectorStore):
|
||||
"""A vectorstore that only implements add texts."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.store: Dict[str, Document] = {}
|
||||
|
||||
def upsert(
|
||||
def add_texts(
|
||||
self,
|
||||
items: Sequence[Document],
|
||||
/,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
# One of the kwargs should be `ids` which is a list of ids
|
||||
# associated with the texts.
|
||||
# This is not yet enforced in the type signature for backwards compatibility
|
||||
# with existing implementations.
|
||||
ids: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> UpsertResponse:
|
||||
ids = []
|
||||
for item in items:
|
||||
if item.id is None:
|
||||
new_item = item.copy()
|
||||
id_: str = str(uuid.uuid4())
|
||||
new_item.id = id_
|
||||
else:
|
||||
id_ = item.id
|
||||
new_item = item
|
||||
) -> List[str]:
|
||||
if not isinstance(texts, list):
|
||||
texts = list(texts)
|
||||
ids_iter = iter(ids or [])
|
||||
|
||||
self.store[id_] = new_item
|
||||
ids.append(id_)
|
||||
ids_ = []
|
||||
|
||||
return {
|
||||
"succeeded": ids,
|
||||
"failed": [],
|
||||
}
|
||||
metadatas_ = metadatas or [{} for _ in texts]
|
||||
|
||||
for text, metadata in zip(texts, metadatas_ or []):
|
||||
next_id = next(ids_iter, None)
|
||||
id_ = next_id or str(uuid.uuid4())
|
||||
self.store[id_] = Document(page_content=text, metadata=metadata, id=id_)
|
||||
ids_.append(id_)
|
||||
return ids_
|
||||
|
||||
def get_by_ids(self, ids: Sequence[str], /) -> List[Document]:
|
||||
return [self.store[id] for id in ids if id in self.store]
|
||||
@@ -74,8 +55,8 @@ class CustomSyncVectorStore(VectorStore):
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
**kwargs: Any,
|
||||
) -> CustomSyncVectorStore:
|
||||
vectorstore = CustomSyncVectorStore()
|
||||
) -> CustomAddTextsVectorstore:
|
||||
vectorstore = CustomAddTextsVectorstore()
|
||||
vectorstore.add_texts(texts, metadatas=metadatas, **kwargs)
|
||||
return vectorstore
|
||||
|
||||
@@ -85,30 +66,38 @@ class CustomSyncVectorStore(VectorStore):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def test_implement_upsert() -> None:
|
||||
def test_default_add_documents() -> None:
|
||||
"""Test that we can implement the upsert method of the CustomVectorStore
|
||||
class without violating the Liskov Substitution Principle.
|
||||
"""
|
||||
|
||||
store = CustomSyncVectorStore()
|
||||
store = CustomAddTextsVectorstore()
|
||||
|
||||
# Check upsert with id
|
||||
assert store.upsert([Document(id="1", page_content="hello")]) == {
|
||||
"succeeded": ["1"],
|
||||
"failed": [],
|
||||
}
|
||||
assert store.add_documents([Document(id="1", page_content="hello")]) == ["1"]
|
||||
|
||||
assert store.get_by_ids(["1"]) == [Document(id="1", page_content="hello")]
|
||||
|
||||
# Check upsert without id
|
||||
response = store.upsert([Document(page_content="world")])
|
||||
assert len(response["succeeded"]) == 1
|
||||
id_ = response["succeeded"][0]
|
||||
assert id_ is not None
|
||||
assert store.get_by_ids([id_]) == [Document(id=id_, page_content="world")]
|
||||
ids = store.add_documents([Document(page_content="world")])
|
||||
assert len(ids) == 1
|
||||
assert store.get_by_ids(ids) == [Document(id=ids[0], page_content="world")]
|
||||
|
||||
# Check that add_documents works
|
||||
assert store.add_documents([Document(id="5", page_content="baz")]) == ["5"]
|
||||
|
||||
# Test add documents with id specified in both document and ids
|
||||
original_document = Document(id="7", page_content="baz")
|
||||
assert store.add_documents([original_document], ids=["6"]) == ["6"]
|
||||
assert original_document.id == "7" # original document should not be modified
|
||||
assert store.get_by_ids(["6"]) == [Document(id="6", page_content="baz")]
|
||||
|
||||
|
||||
def test_default_add_texts() -> None:
|
||||
store = CustomAddTextsVectorstore()
|
||||
# Check that default implementation of add_texts works
|
||||
assert store.add_texts(["hello", "world"], ids=["3", "4"]) == ["3", "4"]
|
||||
|
||||
assert store.get_by_ids(["3", "4"]) == [
|
||||
Document(id="3", page_content="hello"),
|
||||
Document(id="4", page_content="world"),
|
||||
@@ -130,39 +119,37 @@ def test_implement_upsert() -> None:
|
||||
Document(id=ids_2[1], page_content="bar", metadata={"foo": "bar"}),
|
||||
]
|
||||
|
||||
# Check that add_documents works
|
||||
assert store.add_documents([Document(id="5", page_content="baz")]) == ["5"]
|
||||
|
||||
# Test add documents with id specified in both document and ids
|
||||
original_document = Document(id="7", page_content="baz")
|
||||
assert store.add_documents([original_document], ids=["6"]) == ["6"]
|
||||
assert original_document.id == "7" # original document should not be modified
|
||||
assert store.get_by_ids(["6"]) == [Document(id="6", page_content="baz")]
|
||||
|
||||
|
||||
async def test_aupsert_delegation_to_upsert() -> None:
|
||||
"""Test delegation to the synchronous upsert method in async execution
|
||||
if async methods are not implemented.
|
||||
"""
|
||||
store = CustomSyncVectorStore()
|
||||
async def test_default_aadd_documents() -> None:
|
||||
"""Test delegation to the synchronous method."""
|
||||
store = CustomAddTextsVectorstore()
|
||||
|
||||
# Check upsert with id
|
||||
assert await store.aupsert([Document(id="1", page_content="hello")]) == {
|
||||
"succeeded": ["1"],
|
||||
"failed": [],
|
||||
}
|
||||
assert await store.aadd_documents([Document(id="1", page_content="hello")]) == ["1"]
|
||||
|
||||
assert await store.aget_by_ids(["1"]) == [Document(id="1", page_content="hello")]
|
||||
|
||||
# Check upsert without id
|
||||
response = await store.aupsert([Document(page_content="world")])
|
||||
assert len(response["succeeded"]) == 1
|
||||
id_ = response["succeeded"][0]
|
||||
assert id_ is not None
|
||||
assert await store.aget_by_ids([id_]) == [Document(id=id_, page_content="world")]
|
||||
ids = await store.aadd_documents([Document(page_content="world")])
|
||||
assert len(ids) == 1
|
||||
assert await store.aget_by_ids(ids) == [Document(id=ids[0], page_content="world")]
|
||||
|
||||
# Check that add_documents works
|
||||
assert await store.aadd_documents([Document(id="5", page_content="baz")]) == ["5"]
|
||||
|
||||
# Test add documents with id specified in both document and ids
|
||||
original_document = Document(id="7", page_content="baz")
|
||||
assert await store.aadd_documents([original_document], ids=["6"]) == ["6"]
|
||||
assert original_document.id == "7" # original document should not be modified
|
||||
assert await store.aget_by_ids(["6"]) == [Document(id="6", page_content="baz")]
|
||||
|
||||
|
||||
async def test_default_aadd_texts() -> None:
|
||||
"""Test delegation to the synchronous method."""
|
||||
store = CustomAddTextsVectorstore()
|
||||
# Check that default implementation of add_texts works
|
||||
assert await store.aadd_texts(["hello", "world"], ids=["3", "4"]) == ["3", "4"]
|
||||
|
||||
assert await store.aget_by_ids(["3", "4"]) == [
|
||||
Document(id="3", page_content="hello"),
|
||||
Document(id="4", page_content="world"),
|
||||
@@ -183,12 +170,3 @@ async def test_aupsert_delegation_to_upsert() -> None:
|
||||
Document(id=ids_2[0], page_content="foo", metadata={"foo": "bar"}),
|
||||
Document(id=ids_2[1], page_content="bar", metadata={"foo": "bar"}),
|
||||
]
|
||||
|
||||
# Check that add_documents works
|
||||
assert await store.aadd_documents([Document(id="5", page_content="baz")]) == ["5"]
|
||||
|
||||
# Test add documents with id specified in both document and ids
|
||||
original_document = Document(id="7", page_content="baz")
|
||||
assert await store.aadd_documents([original_document], ids=["6"]) == ["6"]
|
||||
assert original_document.id == "7" # original document should not be modified
|
||||
assert await store.aget_by_ids(["6"]) == [Document(id="6", page_content="baz")]
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.callbacks.base import BaseCallbackManager
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
|
||||
@@ -15,6 +16,16 @@ from langchain.agents.mrkl.base import ZeroShotAgent
|
||||
from langchain.chains.llm import LLMChain
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.2.13",
|
||||
removal="1.0",
|
||||
message=(
|
||||
"See API reference for this function for a replacement implementation: "
|
||||
"https://api.python.langchain.com/en/latest/agents/langchain.agents.agent_toolkits.vectorstore.base.create_vectorstore_agent.html " # noqa: E501
|
||||
"Read more here on how to create agents that query vector stores: "
|
||||
"https://python.langchain.com/v0.2/docs/how_to/qa_chat_history_how_to/#agents"
|
||||
),
|
||||
)
|
||||
def create_vectorstore_agent(
|
||||
llm: BaseLanguageModel,
|
||||
toolkit: VectorStoreToolkit,
|
||||
@@ -26,6 +37,44 @@ def create_vectorstore_agent(
|
||||
) -> AgentExecutor:
|
||||
"""Construct a VectorStore agent from an LLM and tools.
|
||||
|
||||
Note: this class is deprecated. See below for a replacement that uses tool
|
||||
calling methods and LangGraph. Install LangGraph with:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U langgraph
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.tools import create_retriever_tool
|
||||
from langchain_core.vectorstores import InMemoryVectorStore
|
||||
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
||||
from langgraph.prebuilt import create_react_agent
|
||||
|
||||
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
|
||||
|
||||
vector_store = InMemoryVectorStore.from_texts(
|
||||
[
|
||||
"Dogs are great companions, known for their loyalty and friendliness.",
|
||||
"Cats are independent pets that often enjoy their own space.",
|
||||
],
|
||||
OpenAIEmbeddings(),
|
||||
)
|
||||
|
||||
tool = create_retriever_tool(
|
||||
vector_store.as_retriever(),
|
||||
"pet_information_retriever",
|
||||
"Fetches information about pets.",
|
||||
)
|
||||
|
||||
agent = create_react_agent(llm, [tool])
|
||||
|
||||
for step in agent.stream(
|
||||
{"messages": [("human", "What are dogs known for?")]},
|
||||
stream_mode="values",
|
||||
):
|
||||
step["messages"][-1].pretty_print()
|
||||
|
||||
Args:
|
||||
llm (BaseLanguageModel): LLM that will be used by the agent
|
||||
toolkit (VectorStoreToolkit): Set of tools for the agent
|
||||
@@ -56,6 +105,16 @@ def create_vectorstore_agent(
|
||||
)
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.2.13",
|
||||
removal="1.0",
|
||||
message=(
|
||||
"See API reference for this function for a replacement implementation: "
|
||||
"https://api.python.langchain.com/en/latest/agents/langchain.agents.agent_toolkits.vectorstore.base.create_vectorstore_router_agent.html " # noqa: E501
|
||||
"Read more here on how to create agents that query vector stores: "
|
||||
"https://python.langchain.com/v0.2/docs/how_to/qa_chat_history_how_to/#agents"
|
||||
),
|
||||
)
|
||||
def create_vectorstore_router_agent(
|
||||
llm: BaseLanguageModel,
|
||||
toolkit: VectorStoreRouterToolkit,
|
||||
@@ -67,6 +126,59 @@ def create_vectorstore_router_agent(
|
||||
) -> AgentExecutor:
|
||||
"""Construct a VectorStore router agent from an LLM and tools.
|
||||
|
||||
Note: this class is deprecated. See below for a replacement that uses tool
|
||||
calling methods and LangGraph. Install LangGraph with:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U langgraph
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.tools import create_retriever_tool
|
||||
from langchain_core.vectorstores import InMemoryVectorStore
|
||||
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
||||
from langgraph.prebuilt import create_react_agent
|
||||
|
||||
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
|
||||
|
||||
pet_vector_store = InMemoryVectorStore.from_texts(
|
||||
[
|
||||
"Dogs are great companions, known for their loyalty and friendliness.",
|
||||
"Cats are independent pets that often enjoy their own space.",
|
||||
],
|
||||
OpenAIEmbeddings(),
|
||||
)
|
||||
|
||||
food_vector_store = InMemoryVectorStore.from_texts(
|
||||
[
|
||||
"Carrots are orange and delicious.",
|
||||
"Apples are red and delicious.",
|
||||
],
|
||||
OpenAIEmbeddings(),
|
||||
)
|
||||
|
||||
tools = [
|
||||
create_retriever_tool(
|
||||
pet_vector_store.as_retriever(),
|
||||
"pet_information_retriever",
|
||||
"Fetches information about pets.",
|
||||
),
|
||||
create_retriever_tool(
|
||||
food_vector_store.as_retriever(),
|
||||
"food_information_retriever",
|
||||
"Fetches information about food.",
|
||||
)
|
||||
]
|
||||
|
||||
agent = create_react_agent(llm, tools)
|
||||
|
||||
for step in agent.stream(
|
||||
{"messages": [("human", "Tell me about carrots.")]},
|
||||
stream_mode="values",
|
||||
):
|
||||
step["messages"][-1].pretty_print()
|
||||
|
||||
Args:
|
||||
llm (BaseLanguageModel): LLM that will be used by the agent
|
||||
toolkit (VectorStoreRouterToolkit): Set of tools for the agent which have routing capability with multiple vector stores
|
||||
|
||||
@@ -59,6 +59,7 @@ _module_lookup = {
|
||||
"OpenAIModerationChain": "langchain.chains.moderation",
|
||||
"NatBotChain": "langchain.chains.natbot.base",
|
||||
"create_citation_fuzzy_match_chain": "langchain.chains.openai_functions",
|
||||
"create_citation_fuzzy_match_runnable": "langchain.chains.openai_functions",
|
||||
"create_extraction_chain": "langchain.chains.openai_functions",
|
||||
"create_extraction_chain_pydantic": "langchain.chains.openai_functions",
|
||||
"create_qa_with_sources_chain": "langchain.chains.openai_functions",
|
||||
|
||||
@@ -5,6 +5,7 @@ from __future__ import annotations
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManagerForChainRun,
|
||||
CallbackManagerForChainRun,
|
||||
@@ -53,6 +54,15 @@ def _check_in_allowed_domain(url: str, limit_to_domains: Sequence[str]) -> bool:
|
||||
try:
|
||||
from langchain_community.utilities.requests import TextRequestsWrapper
|
||||
|
||||
@deprecated(
|
||||
since="0.2.13",
|
||||
message=(
|
||||
"This class is deprecated and will be removed in langchain 1.0. "
|
||||
"See API reference for replacement: "
|
||||
"https://api.python.langchain.com/en/latest/chains/langchain.chains.api.base.APIChain.html" # noqa: E501
|
||||
),
|
||||
removal="1.0",
|
||||
)
|
||||
class APIChain(Chain):
|
||||
"""Chain that makes API calls and summarizes the responses to answer a question.
|
||||
|
||||
@@ -69,7 +79,117 @@ try:
|
||||
what network access it has.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
Note: this class is deprecated. See below for a replacement implementation
|
||||
using LangGraph. The benefits of this implementation are:
|
||||
|
||||
- Uses LLM tool calling features to encourage properly-formatted API requests;
|
||||
- Support for both token-by-token and step-by-step streaming;
|
||||
- Support for checkpointing and memory of chat history;
|
||||
- Easier to modify or extend (e.g., with additional tools, structured responses, etc.)
|
||||
|
||||
Install LangGraph with:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U langgraph
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from typing import Annotated, Sequence
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from langchain.chains.api.prompt import API_URL_PROMPT
|
||||
from langchain_community.agent_toolkits.openapi.toolkit import RequestsToolkit
|
||||
from langchain_community.utilities.requests import TextRequestsWrapper
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
from langgraph.graph import END, StateGraph
|
||||
from langgraph.graph.message import add_messages
|
||||
from langgraph.prebuilt.tool_node import ToolNode
|
||||
|
||||
# NOTE: There are inherent risks in giving models discretion
|
||||
# to execute real-world actions. We must "opt-in" to these
|
||||
# risks by setting allow_dangerous_request=True to use these tools.
|
||||
# This can be dangerous for calling unwanted requests. Please make
|
||||
# sure your custom OpenAPI spec (yaml) is safe and that permissions
|
||||
# associated with the tools are narrowly-scoped.
|
||||
ALLOW_DANGEROUS_REQUESTS = True
|
||||
|
||||
# Subset of spec for https://jsonplaceholder.typicode.com
|
||||
api_spec = \"\"\"
|
||||
openapi: 3.0.0
|
||||
info:
|
||||
title: JSONPlaceholder API
|
||||
version: 1.0.0
|
||||
servers:
|
||||
- url: https://jsonplaceholder.typicode.com
|
||||
paths:
|
||||
/posts:
|
||||
get:
|
||||
summary: Get posts
|
||||
parameters: &id001
|
||||
- name: _limit
|
||||
in: query
|
||||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
example: 2
|
||||
description: Limit the number of results
|
||||
\"\"\"
|
||||
|
||||
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
|
||||
toolkit = RequestsToolkit(
|
||||
requests_wrapper=TextRequestsWrapper(headers={}), # no auth required
|
||||
allow_dangerous_requests=ALLOW_DANGEROUS_REQUESTS,
|
||||
)
|
||||
tools = toolkit.get_tools()
|
||||
|
||||
api_request_chain = (
|
||||
API_URL_PROMPT.partial(api_docs=api_spec)
|
||||
| llm.bind_tools(tools, tool_choice="any")
|
||||
)
|
||||
|
||||
class ChainState(TypedDict):
|
||||
\"\"\"LangGraph state.\"\"\"
|
||||
|
||||
messages: Annotated[Sequence[BaseMessage], add_messages]
|
||||
|
||||
|
||||
async def acall_request_chain(state: ChainState, config: RunnableConfig):
|
||||
last_message = state["messages"][-1]
|
||||
response = await api_request_chain.ainvoke(
|
||||
{"question": last_message.content}, config
|
||||
)
|
||||
return {"messages": [response]}
|
||||
|
||||
async def acall_model(state: ChainState, config: RunnableConfig):
|
||||
response = await llm.ainvoke(state["messages"], config)
|
||||
return {"messages": [response]}
|
||||
|
||||
graph_builder = StateGraph(ChainState)
|
||||
graph_builder.add_node("call_tool", acall_request_chain)
|
||||
graph_builder.add_node("execute_tool", ToolNode(tools))
|
||||
graph_builder.add_node("call_model", acall_model)
|
||||
graph_builder.set_entry_point("call_tool")
|
||||
graph_builder.add_edge("call_tool", "execute_tool")
|
||||
graph_builder.add_edge("execute_tool", "call_model")
|
||||
graph_builder.add_edge("call_model", END)
|
||||
chain = graph_builder.compile()
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
example_query = "Fetch the top two posts. What are their titles?"
|
||||
|
||||
events = chain.astream(
|
||||
{"messages": [("user", example_query)]},
|
||||
stream_mode="values",
|
||||
)
|
||||
async for event in events:
|
||||
event["messages"][-1].pretty_print()
|
||||
""" # noqa: E501
|
||||
|
||||
api_request_chain: LLMChain
|
||||
api_answer_chain: LLMChain
|
||||
|
||||
@@ -25,7 +25,7 @@ class MapRerankDocumentsChain(BaseCombineDocumentsChain):
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain.chains import StuffDocumentsChain, LLMChain
|
||||
from langchain.chains import MapRerankDocumentsChain, LLMChain
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
from langchain_community.llms import OpenAI
|
||||
from langchain.output_parsers.regex import RegexParser
|
||||
@@ -39,7 +39,7 @@ class MapRerankDocumentsChain(BaseCombineDocumentsChain):
|
||||
prompt_template = (
|
||||
"Use the following context to tell me the chemical formula "
|
||||
"for water. Output both your answer and a score of how confident "
|
||||
"you are. Context: {content}"
|
||||
"you are. Context: {context}"
|
||||
)
|
||||
output_parser = RegexParser(
|
||||
regex=r"(.*?)\nScore: (.*)",
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.callbacks import Callbacks
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.language_models import LanguageModelLike
|
||||
@@ -95,6 +96,15 @@ def create_stuff_documents_chain(
|
||||
).with_config(run_name="stuff_documents_chain")
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.2.13",
|
||||
removal="1.0",
|
||||
message=(
|
||||
"This class is deprecated. Use the `create_stuff_documents_chain` constructor "
|
||||
"instead. See migration guide here: "
|
||||
"https://python.langchain.com/v0.2/docs/versions/migrating_chains/stuff_docs_chain/" # noqa: E501
|
||||
),
|
||||
)
|
||||
class StuffDocumentsChain(BaseCombineDocumentsChain):
|
||||
"""Chain that combines documents by stuffing into context.
|
||||
|
||||
|
||||
@@ -6,14 +6,14 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForChainRun
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.output_parsers import BaseLLMOutputParser
|
||||
from langchain_core.output_parsers import BaseOutputParser, StrOutputParser
|
||||
from langchain_core.output_parsers.json import SimpleJsonOutputParser
|
||||
from langchain_core.prompts import BasePromptTemplate
|
||||
from langchain_core.pydantic_v1 import root_validator
|
||||
from langchain_core.runnables import Runnable
|
||||
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.chains.elasticsearch_database.prompts import ANSWER_PROMPT, DSL_PROMPT
|
||||
from langchain.chains.llm import LLMChain
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from elasticsearch import Elasticsearch
|
||||
@@ -35,9 +35,9 @@ class ElasticsearchDatabaseChain(Chain):
|
||||
db_chain = ElasticsearchDatabaseChain.from_llm(OpenAI(), database)
|
||||
"""
|
||||
|
||||
query_chain: LLMChain
|
||||
query_chain: Runnable
|
||||
"""Chain for creating the ES query."""
|
||||
answer_chain: LLMChain
|
||||
answer_chain: Runnable
|
||||
"""Chain for answering the user question."""
|
||||
database: Any
|
||||
"""Elasticsearch database to connect to of type elasticsearch.Elasticsearch."""
|
||||
@@ -135,9 +135,9 @@ class ElasticsearchDatabaseChain(Chain):
|
||||
intermediate_steps: List = []
|
||||
try:
|
||||
intermediate_steps.append(query_inputs) # input: es generation
|
||||
es_cmd = self.query_chain.run(
|
||||
callbacks=_run_manager.get_child(),
|
||||
**query_inputs,
|
||||
es_cmd = self.query_chain.invoke(
|
||||
query_inputs,
|
||||
config={"callbacks": _run_manager.get_child()},
|
||||
)
|
||||
|
||||
_run_manager.on_text(es_cmd, color="green", verbose=self.verbose)
|
||||
@@ -154,9 +154,9 @@ class ElasticsearchDatabaseChain(Chain):
|
||||
_run_manager.on_text("\nAnswer:", verbose=self.verbose)
|
||||
answer_inputs: dict = {"data": result, "input": input_text}
|
||||
intermediate_steps.append(answer_inputs) # input: final answer
|
||||
final_result = self.answer_chain.run(
|
||||
callbacks=_run_manager.get_child(),
|
||||
**answer_inputs,
|
||||
final_result = self.answer_chain.invoke(
|
||||
answer_inputs,
|
||||
config={"callbacks": _run_manager.get_child()},
|
||||
)
|
||||
|
||||
intermediate_steps.append(final_result) # output: final answer
|
||||
@@ -183,7 +183,7 @@ class ElasticsearchDatabaseChain(Chain):
|
||||
*,
|
||||
query_prompt: Optional[BasePromptTemplate] = None,
|
||||
answer_prompt: Optional[BasePromptTemplate] = None,
|
||||
query_output_parser: Optional[BaseLLMOutputParser] = None,
|
||||
query_output_parser: Optional[BaseOutputParser] = None,
|
||||
**kwargs: Any,
|
||||
) -> ElasticsearchDatabaseChain:
|
||||
"""Convenience method to construct ElasticsearchDatabaseChain from an LLM.
|
||||
@@ -199,11 +199,9 @@ class ElasticsearchDatabaseChain(Chain):
|
||||
"""
|
||||
query_prompt = query_prompt or DSL_PROMPT
|
||||
query_output_parser = query_output_parser or SimpleJsonOutputParser()
|
||||
query_chain = LLMChain(
|
||||
llm=llm, prompt=query_prompt, output_parser=query_output_parser
|
||||
)
|
||||
query_chain = query_prompt | llm | query_output_parser
|
||||
answer_prompt = answer_prompt or ANSWER_PROMPT
|
||||
answer_chain = LLMChain(llm=llm, prompt=answer_prompt)
|
||||
answer_chain = answer_prompt | llm | StrOutputParser()
|
||||
return cls(
|
||||
query_chain=query_chain,
|
||||
answer_chain=answer_chain,
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
from typing import List
|
||||
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts.few_shot import FewShotPromptTemplate
|
||||
from langchain_core.prompts.prompt import PromptTemplate
|
||||
|
||||
from langchain.chains.llm import LLMChain
|
||||
|
||||
TEST_GEN_TEMPLATE_SUFFIX = "Add another example."
|
||||
|
||||
|
||||
@@ -19,5 +18,5 @@ def generate_example(
|
||||
input_variables=[],
|
||||
example_prompt=prompt_template,
|
||||
)
|
||||
chain = LLMChain(llm=llm, prompt=prompt)
|
||||
return chain.predict()
|
||||
chain = prompt | llm | StrOutputParser()
|
||||
return chain.invoke({})
|
||||
|
||||
@@ -5,6 +5,7 @@ from __future__ import annotations
|
||||
import warnings
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.callbacks import CallbackManagerForChainRun
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
@@ -63,6 +64,15 @@ def _load_question_to_checked_assertions_chain(
|
||||
return question_to_checked_assertions_chain
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.2.13",
|
||||
message=(
|
||||
"See LangGraph guides for a variety of self-reflection and corrective "
|
||||
"strategies for question-answering and other tasks: "
|
||||
"https://langchain-ai.github.io/langgraph/tutorials/rag/langgraph_self_rag/"
|
||||
),
|
||||
removal="1.0",
|
||||
)
|
||||
class LLMCheckerChain(Chain):
|
||||
"""Chain for question-answering with self-verification.
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ import warnings
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain_core._api import deprecated
|
||||
from langchain_core.callbacks import CallbackManagerForChainRun
|
||||
from langchain_core.language_models import BaseLanguageModel
|
||||
from langchain_core.prompts.prompt import PromptTemplate
|
||||
@@ -65,6 +66,15 @@ def _load_sequential_chain(
|
||||
return chain
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.2.13",
|
||||
message=(
|
||||
"See LangGraph guides for a variety of self-reflection and corrective "
|
||||
"strategies for question-answering and other tasks: "
|
||||
"https://langchain-ai.github.io/langgraph/tutorials/rag/langgraph_self_rag/"
|
||||
),
|
||||
removal="1.0",
|
||||
)
|
||||
class LLMSummarizationCheckerChain(Chain):
|
||||
"""Chain for question-answering with self-verification.
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user