mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-13 06:16:26 +00:00
Compare commits
48 Commits
eugene/cle
...
langchain=
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
06f8bd9946 | ||
|
|
252f0877d1 | ||
|
|
217a915b29 | ||
|
|
056c7c2983 | ||
|
|
1adc161642 | ||
|
|
deb27d8970 | ||
|
|
5efd0fe9ae | ||
|
|
1c9917dfa2 | ||
|
|
ccff1ba8b8 | ||
|
|
53ee5770d3 | ||
|
|
8626abf8b5 | ||
|
|
1af8456a2c | ||
|
|
0a3500808d | ||
|
|
ee8a585791 | ||
|
|
e77eeee6ee | ||
|
|
9927a4866d | ||
|
|
420534c8ca | ||
|
|
794f28d4e2 | ||
|
|
f28ae20b81 | ||
|
|
9f0eda6a18 | ||
|
|
472527166f | ||
|
|
074fa0db73 | ||
|
|
4fd1efc48f | ||
|
|
aa2722cbe2 | ||
|
|
a82c0533f2 | ||
|
|
bc60cddc1b | ||
|
|
43deed2a95 | ||
|
|
9cd608efb3 | ||
|
|
fd546196ef | ||
|
|
6dd9f053e3 | ||
|
|
ca9dcee940 | ||
|
|
dadb6f1445 | ||
|
|
b6f0174bb9 | ||
|
|
c3ced4c6ce | ||
|
|
bd6c31617e | ||
|
|
6e57aa7c36 | ||
|
|
a2b4c33bd6 | ||
|
|
4825dc0d76 | ||
|
|
02300471be | ||
|
|
66b7206ab6 | ||
|
|
c81c77b465 | ||
|
|
3b7437d184 | ||
|
|
91ea4b7449 | ||
|
|
652b3fa4a4 | ||
|
|
7040013140 | ||
|
|
dc7423e88f | ||
|
|
25f2e25be1 | ||
|
|
786ef021a3 |
@@ -39,7 +39,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install langchain langchain-chroma unstructured[all-docs] pydantic lxml langchainhub"
|
||||
"! pip install langchain langchain-chroma \"unstructured[all-docs]\" pydantic lxml langchainhub"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -59,7 +59,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install langchain langchain-chroma unstructured[all-docs] pydantic lxml"
|
||||
"! pip install langchain langchain-chroma \"unstructured[all-docs]\" pydantic lxml"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -59,7 +59,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"! pip install langchain langchain-chroma unstructured[all-docs] pydantic lxml"
|
||||
"! pip install langchain langchain-chroma \"unstructured[all-docs]\" pydantic lxml"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
"\n",
|
||||
"You can use arbitrary functions as [Runnables](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.Runnable.html#langchain_core.runnables.base.Runnable). This is useful for formatting or when you need functionality not provided by other LangChain components, and custom functions used as Runnables are called [`RunnableLambdas`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableLambda.html).\n",
|
||||
"\n",
|
||||
"Note that all inputs to these functions need to be a SINGLE argument. If you have a function that accepts multiple arguments, you should write a wrapper that accepts a single dict input and unpacks it into multiple argument.\n",
|
||||
"Note that all inputs to these functions need to be a SINGLE argument. If you have a function that accepts multiple arguments, you should write a wrapper that accepts a single dict input and unpacks it into multiple arguments.\n",
|
||||
"\n",
|
||||
"This guide will cover:\n",
|
||||
"\n",
|
||||
|
||||
@@ -721,9 +721,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
|
||||
"from langgraph.checkpoint.memory import MemorySaver\n",
|
||||
"\n",
|
||||
"memory = SqliteSaver.from_conn_string(\":memory:\")\n",
|
||||
"memory = MemorySaver()\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(llm, tools, checkpointer=memory)"
|
||||
]
|
||||
@@ -890,9 +890,9 @@
|
||||
"from langchain_community.document_loaders import WebBaseLoader\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
|
||||
"from langgraph.checkpoint.memory import MemorySaver\n",
|
||||
"\n",
|
||||
"memory = SqliteSaver.from_conn_string(\":memory:\")\n",
|
||||
"memory = MemorySaver()\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
File diff suppressed because one or more lines are too long
182
docs/docs/integrations/document_loaders/pypdfloader.ipynb
Normal file
182
docs/docs/integrations/document_loaders/pypdfloader.ipynb
Normal file
@@ -0,0 +1,182 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# PyPDFLoader\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with `PyPDF` [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFLoader.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [PyPDFLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| PyPDFLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are required to use `PyPDFLoader`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"To use `PyPDFLoader` you need to have the `langchain-community` python package downloaded:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain_community"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.document_loaders import PyPDFLoader\n",
|
||||
"\n",
|
||||
"loader = PyPDFLoader(\n",
|
||||
" \"./example_data/layout-parser-paper.pdf\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'page': 0}, page_content='LayoutParser : A Unified Toolkit for Deep\\nLearning Based Document Image Analysis\\nZejiang Shen1( \\x00), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain\\nLee4, Jacob Carlson3, and Weining Li5\\n1Allen Institute for AI\\nshannons@allenai.org\\n2Brown University\\nruochen zhang@brown.edu\\n3Harvard University\\n{melissadell,jacob carlson }@fas.harvard.edu\\n4University of Washington\\nbcgl@cs.washington.edu\\n5University of Waterloo\\nw422li@uwaterloo.ca\\nAbstract. Recent advances in document image analysis (DIA) have been\\nprimarily driven by the application of neural networks. Ideally, research\\noutcomes could be easily deployed in production and extended for further\\ninvestigation. However, various factors like loosely organized codebases\\nand sophisticated model configurations complicate the easy reuse of im-\\nportant innovations by a wide audience. Though there have been on-going\\nefforts to improve reusability and simplify deep learning (DL) model\\ndevelopment in disciplines like natural language processing and computer\\nvision, none of them are optimized for challenges in the domain of DIA.\\nThis represents a major gap in the existing toolkit, as DIA is central to\\nacademic research across a wide range of disciplines in the social sciences\\nand humanities. This paper introduces LayoutParser , an open-source\\nlibrary for streamlining the usage of DL in DIA research and applica-\\ntions. The core LayoutParser library comes with a set of simple and\\nintuitive interfaces for applying and customizing DL models for layout de-\\ntection, character recognition, and many other document processing tasks.\\nTo promote extensibility, LayoutParser also incorporates a community\\nplatform for sharing both pre-trained models and full document digiti-\\nzation pipelines. We demonstrate that LayoutParser is helpful for both\\nlightweight and large-scale digitization pipelines in real-word use cases.\\nThe library is publicly available at https://layout-parser.github.io .\\nKeywords: Document Image Analysis ·Deep Learning ·Layout Analysis\\n·Character Recognition ·Open Source library ·Toolkit.\\n1 Introduction\\nDeep Learning(DL)-based approaches are the state-of-the-art for a wide range of\\ndocument image analysis (DIA) tasks including document image classification [ 11,arXiv:2103.15348v2 [cs.CV] 21 Jun 2021')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'page': 0}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy Load\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"6"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []\n",
|
||||
"len(page)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `PyPDFLoader` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.pdf.PyPDFLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -7,7 +7,18 @@
|
||||
"source": [
|
||||
"# Recursive URL\n",
|
||||
"\n",
|
||||
"The `RecursiveUrlLoader` lets you recursively scrape all child links from a root URL and parse them into Documents."
|
||||
"The `RecursiveUrlLoader` lets you recursively scrape all child links from a root URL and parse them into Documents.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/web_loaders/recursive_url_loader/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [RecursiveUrlLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.recursive_url_loader.RecursiveUrlLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| RecursiveUrlLoader | ✅ | ❌ | \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -17,6 +28,12 @@
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"No credentials are required to use the `RecursiveUrlLoader`.\n",
|
||||
"\n",
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The `RecursiveUrlLoader` lives in the `langchain-community` package. There's no other required packages, though you will get richer default Document metadata if you have ``beautifulsoup4` installed as well."
|
||||
]
|
||||
},
|
||||
@@ -186,6 +203,50 @@
|
||||
"That certainly looks like HTML that comes from the url https://docs.python.org/3.9/, which is what we expected. Let's now look at some variations we can make to our basic example that can be helpful in different situations. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b17b7202",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy loading\n",
|
||||
"\n",
|
||||
"If we're loading a large number of Documents and our downstream operations can be done over subsets of all loaded Documents, we can lazily load our Documents one at a time to minimize our memory footprint:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4b13e4d1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/4j/2rz3865x6qg07tx43146py8h0000gn/T/ipykernel_73962/2110507528.py:6: XMLParsedAsHTMLWarning: It looks like you're parsing an XML document using an HTML parser. If this really is an HTML document (maybe it's XHTML?), you can ignore or filter this warning. If it's XML, you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the lxml package installed, and pass the keyword argument `features=\"xml\"` into the BeautifulSoup constructor.\n",
|
||||
" soup = BeautifulSoup(html, \"lxml\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fb039682",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this example we never have more than 10 Documents loaded into memory at a time."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8f41cc89",
|
||||
@@ -256,50 +317,6 @@
|
||||
"You can similarly pass in a `metadata_extractor` to customize how Document metadata is extracted from the HTTP response. See the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.recursive_url_loader.RecursiveUrlLoader.html) for more on this."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1dddbc94",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Lazy loading\n",
|
||||
"\n",
|
||||
"If we're loading a large number of Documents and our downstream operations can be done over subsets of all loaded Documents, we can lazily load our Documents one at a time to minimize our memory footprint:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "7d0114fc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/var/folders/4j/2rz3865x6qg07tx43146py8h0000gn/T/ipykernel_73962/2110507528.py:6: XMLParsedAsHTMLWarning: It looks like you're parsing an XML document using an HTML parser. If this really is an HTML document (maybe it's XHTML?), you can ignore or filter this warning. If it's XML, you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the lxml package installed, and pass the keyword argument `features=\"xml\"` into the BeautifulSoup constructor.\n",
|
||||
" soup = BeautifulSoup(html, \"lxml\")\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" page.append(doc)\n",
|
||||
" if len(page) >= 10:\n",
|
||||
" # do some paged operation, e.g.\n",
|
||||
" # index.upsert(page)\n",
|
||||
"\n",
|
||||
" page = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f88a7c2f-35df-4c3a-b238-f91be2674b96",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this example we never have more than 10 Documents loaded into memory at a time."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3e4d1c8f",
|
||||
|
||||
@@ -7,20 +7,41 @@
|
||||
"source": [
|
||||
"# Unstructured\n",
|
||||
"\n",
|
||||
"This notebook covers how to use `Unstructured` package to load files of many types. `Unstructured` currently supports loading of text files, powerpoints, html, pdfs, images, and more.\n",
|
||||
"This notebook covers how to use `Unstructured` [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders) to load files of many types. `Unstructured` currently supports loading of text files, powerpoints, html, pdfs, images, and more.\n",
|
||||
"\n",
|
||||
"Please see [this guide](/docs/integrations/providers/unstructured/) for more instructions on setting up Unstructured locally, including setting up required system dependencies."
|
||||
"Please see [this guide](../../integrations/providers/unstructured.mdx) for more instructions on setting up Unstructured locally, including setting up required system dependencies.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/file_loaders/unstructured/)|\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [UnstructuredLoader](https://api.python.langchain.com/en/latest/document_loaders/langchain_unstructured.document_loaders.UnstructuredLoader.html) | [langchain_community](https://api.python.langchain.com/en/latest/unstructured_api_reference.html) | ✅ | ❌ | ✅ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| UnstructuredLoader | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"By default, `langchain-unstructured` installs a smaller footprint that requires offloading of the partitioning logic to the Unstructured API, which requires an API key. If you use the local installation, you do not need an API key. To get your API key, head over to [this site](https://unstructured.io) and get an API key, and then set it in the cell below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "2886982e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install package, compatible with API partitioning\n",
|
||||
"%pip install --upgrade --quiet \"langchain-unstructured\""
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"UNSTRUCTURED_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter your Unstructured API key: \"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -28,15 +49,32 @@
|
||||
"id": "e75e2a6d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Local Partitioning (Optional)\n",
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"By default, `langchain-unstructured` installs a smaller footprint that requires\n",
|
||||
"offloading of the partitioning logic to the Unstructured API, which requires an `api_key`. For\n",
|
||||
"partitioning using the API, refer to the Unstructured API section below.\n",
|
||||
"#### Normal Installation\n",
|
||||
"\n",
|
||||
"If you would like to run the partitioning logic locally, you will need to install\n",
|
||||
"a combination of system dependencies, as outlined in the \n",
|
||||
"[Unstructured documentation here](https://docs.unstructured.io/open-source/installation/full-installation).\n",
|
||||
"The following packages are required to run the rest of this notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d9de83b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install package, compatible with API partitioning\n",
|
||||
"%pip install --upgrade --quiet langchain-unstructured unstructured-client unstructured \"unstructured[pdf]\" python-magic"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "637eda35",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Installation for Local\n",
|
||||
"\n",
|
||||
"If you would like to run the partitioning logic locally, you will need to install a combination of system dependencies, as outlined in the [Unstructured documentation here](https://docs.unstructured.io/open-source/installation/full-installation).\n",
|
||||
"\n",
|
||||
"For example, on Macs you can install the required dependencies with:\n",
|
||||
"\n",
|
||||
@@ -48,7 +86,7 @@
|
||||
"brew install libxml2 libxslt\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You can install the required `pip` dependencies with:\n",
|
||||
"You can install the required `pip` dependencies needed for local with:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install \"langchain-unstructured[local]\"\n",
|
||||
@@ -60,120 +98,117 @@
|
||||
"id": "a9c1c775",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Quickstart\n",
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"To simply load a file as a document, you can use the LangChain `DocumentLoader.load` \n",
|
||||
"interface:"
|
||||
"The `UnstructuredLoader` allows loading from a variety of different file types. To read all about the `unstructured` package please refer to their [documentation](https://docs.unstructured.io/open-source/introduction/overview)/. In this example, we show loading from both a text file and a PDF file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "79d3e549",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_unstructured import UnstructuredLoader\n",
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(\"./example_data/state_of_the_union.txt\")\n",
|
||||
"file_paths = [\n",
|
||||
" \"./example_data/layout-parser-paper.pdf\",\n",
|
||||
" \"./example_data/state_of_the_union.txt\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"docs = loader.load()"
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(file_paths)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b4ab0a79",
|
||||
"id": "8b68dcab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load list of files"
|
||||
"## Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "092d9a0b",
|
||||
"execution_count": 2,
|
||||
"id": "8da59ef8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO: NumExpr defaulting to 12 threads.\n",
|
||||
"INFO: pikepdf C++ to Python logger bridge initialized\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}, page_content='1 2 0 2')"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"docs = loader.load()\n",
|
||||
"\n",
|
||||
"docs[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "97f7aa1f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"whatsapp_chat.txt : 1/22/23, 6:30 PM - User 1: Hi! Im interested in your bag. Im offering $50. Let me know if you are in\n",
|
||||
"state_of_the_union.txt : May God bless you all. May God protect our troops.\n"
|
||||
"{'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"file_paths = [\n",
|
||||
" \"./example_data/whatsapp_chat.txt\",\n",
|
||||
" \"./example_data/state_of_the_union.txt\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(file_paths)\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"\n",
|
||||
"print(docs[0].metadata.get(\"filename\"), \": \", docs[0].page_content[:100])\n",
|
||||
"print(docs[-1].metadata.get(\"filename\"), \": \", docs[-1].page_content[:100])"
|
||||
"print(docs[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8de9ef16",
|
||||
"id": "0d7f991b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## PDF Example\n",
|
||||
"\n",
|
||||
"Processing PDF documents works exactly the same way. Unstructured detects the file type and extracts the same types of elements."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "672733fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Define a Partitioning Strategy\n",
|
||||
"\n",
|
||||
"Unstructured document loader allow users to pass in a `strategy` parameter that lets Unstructured\n",
|
||||
"know how to partition pdf and other OCR'd documents. Currently supported strategies are `\"auto\"`,\n",
|
||||
"`\"hi_res\"`, `\"ocr_only\"`, and `\"fast\"`. Learn more about the different strategies\n",
|
||||
"[here](https://docs.unstructured.io/open-source/core-functionality/partitioning#partition-pdf). \n",
|
||||
"\n",
|
||||
"Not all document types have separate hi res and fast partitioning strategies. For those document types, the `strategy` kwarg is\n",
|
||||
"ignored. In some cases, the high res strategy will fallback to fast if there is a dependency missing\n",
|
||||
"(i.e. a model for document partitioning). You can see how to apply a strategy to an\n",
|
||||
"`UnstructuredLoader` below."
|
||||
"## Lazy Load"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "60685353",
|
||||
"execution_count": 4,
|
||||
"id": "b05604d2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 393.9), (16.34, 560.0), (36.34, 560.0), (36.34, 393.9)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'parent_id': '89565df026a24279aaea20dc08cedbec', 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'e9fa370aef7ee5c05744eb7bb7d9981b'}, page_content='2 v 8 4 3 5 1 . 3 0 1 2 : v i X r a'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((157.62199999999999, 114.23496279999995), (157.62199999999999, 146.5141628), (457.7358962799999, 146.5141628), (457.7358962799999, 114.23496279999995)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'Title', 'element_id': 'bde0b230a1aa488e3ce837d33015181b'}, page_content='LayoutParser: A Unified Toolkit for Deep Learning Based Document Image Analysis'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((134.809, 168.64029940800003), (134.809, 192.2517444), (480.5464199080001, 192.2517444), (480.5464199080001, 168.64029940800003)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'parent_id': 'bde0b230a1aa488e3ce837d33015181b', 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': '54700f902899f0c8c90488fa8d825bce'}, page_content='Zejiang Shen1 ((cid:0)), Ruochen Zhang2, Melissa Dell3, Benjamin Charles Germain Lee4, Jacob Carlson3, and Weining Li5'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((207.23000000000002, 202.57205439999996), (207.23000000000002, 311.8195408), (408.12676, 311.8195408), (408.12676, 202.57205439999996)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'page_number': 1, 'parent_id': 'bde0b230a1aa488e3ce837d33015181b', 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'b650f5867bad9bb4e30384282c79bcfe'}, page_content='1 Allen Institute for AI shannons@allenai.org 2 Brown University ruochen zhang@brown.edu 3 Harvard University {melissadell,jacob carlson}@fas.harvard.edu 4 University of Washington bcgl@cs.washington.edu 5 University of Waterloo w422li@uwaterloo.ca'),\n",
|
||||
" Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((162.779, 338.45008160000003), (162.779, 566.8455408), (454.0372021523199, 566.8455408), (454.0372021523199, 338.45008160000003)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-02-27T15:49:27', 'links': [{'text': ':// layout - parser . github . io', 'url': 'https://layout-parser.github.io', 'start_index': 1477}], 'page_number': 1, 'parent_id': 'bde0b230a1aa488e3ce837d33015181b', 'filetype': 'application/pdf', 'category': 'NarrativeText', 'element_id': 'cfc957c94fe63c8fd7c7f4bcb56e75a7'}, page_content='Abstract. Recent advances in document image analysis (DIA) have been primarily driven by the application of neural networks. Ideally, research outcomes could be easily deployed in production and extended for further investigation. However, various factors like loosely organized codebases and sophisticated model configurations complicate the easy reuse of im- portant innovations by a wide audience. Though there have been on-going efforts to improve reusability and simplify deep learning (DL) model development in disciplines like natural language processing and computer vision, none of them are optimized for challenges in the domain of DIA. This represents a major gap in the existing toolkit, as DIA is central to academic research across a wide range of disciplines in the social sciences and humanities. This paper introduces LayoutParser, an open-source library for streamlining the usage of DL in DIA research and applica- tions. The core LayoutParser library comes with a set of simple and intuitive interfaces for applying and customizing DL models for layout de- tection, character recognition, and many other document processing tasks. To promote extensibility, LayoutParser also incorporates a community platform for sharing both pre-trained models and full document digiti- zation pipelines. We demonstrate that LayoutParser is helpful for both lightweight and large-scale digitization pipelines in real-word use cases. The library is publicly available at https://layout-parser.github.io.')]"
|
||||
"Document(metadata={'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}, page_content='1 2 0 2')"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_unstructured import UnstructuredLoader\n",
|
||||
"pages = []\n",
|
||||
"for doc in loader.lazy_load():\n",
|
||||
" pages.append(doc)\n",
|
||||
"\n",
|
||||
"loader = UnstructuredLoader(\"./example_data/layout-parser-paper.pdf\", strategy=\"fast\")\n",
|
||||
"\n",
|
||||
"docs = loader.load()\n",
|
||||
"\n",
|
||||
"docs[5:10]"
|
||||
"pages[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -242,23 +277,6 @@
|
||||
"if you’d like to self-host the Unstructured API or run it locally."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6e5fde16",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Install package\n",
|
||||
"%pip install \"langchain-unstructured\"\n",
|
||||
"%pip install \"unstructured-client\"\n",
|
||||
"\n",
|
||||
"# Set API key\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"UNSTRUCTURED_API_KEY\"] = \"FAKE_API_KEY\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
@@ -496,6 +514,16 @@
|
||||
"print(\"Number of LangChain documents:\", len(docs))\n",
|
||||
"print(\"Length of text in the document:\", len(docs[0].page_content))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ce01aa40",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `UnstructuredLoader` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_unstructured.document_loaders.UnstructuredLoader.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -514,7 +542,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -243,7 +243,7 @@
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `GmailToolkit` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.slack.toolkit.SlackToolkit.html)."
|
||||
"For detailed documentation of all `GmailToolkit` features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/agent_toolkits/langchain_community.agent_toolkits.gmail.toolkit.GmailToolkit.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -34,7 +34,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-qdrant 'qdrant-client[fastembed]'"
|
||||
"%pip install -qU langchain-qdrant"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -628,7 +628,7 @@
|
||||
"id": "525e3582",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For a full list of all the search functions available for a `QdrantVectorStore`, read the [API reference](https://api.python.langchain.com/en/latest/vectorstores/langchain_qdrant.vectorstores.Qdrant.html)\n",
|
||||
"For a full list of all the search functions available for a `QdrantVectorStore`, read the [API reference](https://api.python.langchain.com/en/latest/qdrant/langchain_qdrant.qdrant.QdrantVectorStore.html)\n",
|
||||
"\n",
|
||||
"### Metadata filtering\n",
|
||||
"\n",
|
||||
@@ -814,7 +814,7 @@
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `QdrantVectorStore` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/vectorstores/langchain_qdrant.vectorstores.Qdrant.html"
|
||||
"For detailed documentation of all `QdrantVectorStore` features and configurations head to the API reference: https://api.python.langchain.com/en/latest/qdrant/langchain_qdrant.qdrant.QdrantVectorStore.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -87,18 +87,18 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \\n\\nIn this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \\n\\nLet each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \\n\\nPlease rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \\n\\nThroughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos. \\n\\nThey keep moving. \\n\\nAnd the costs and the threats to America and the world keep rising. \\n\\nThat’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. \\n\\nThe United States is a member along with 29 other nations. \\n\\nIt matters. American diplomacy matters. American resolve matters.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='Putin’s latest attack on Ukraine was premeditated and unprovoked. \\n\\nHe rejected repeated efforts at diplomacy. \\n\\nHe thought the West and NATO wouldn’t respond. And he thought he could divide us at home. Putin was wrong. We were ready. Here is what we did. \\n\\nWe prepared extensively and carefully. \\n\\nWe spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin. \\n\\nI spent countless hours unifying our European allies. We shared with the world in advance what we knew Putin was planning and precisely how he would try to falsely justify his aggression. \\n\\nWe countered Russia’s lies with truth. \\n\\nAnd now that he has acted the free world is holding him accountable. \\n\\nAlong with twenty-seven members of the European Union including France, Germany, Italy, as well as countries like the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and many others, even Switzerland.', metadata={'source': '../../how_to/state_of_the_union.txt'})]"
|
||||
"[Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.'),\n",
|
||||
" Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \\n\\nIn this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \\n\\nLet each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \\n\\nPlease rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \\n\\nThroughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos. \\n\\nThey keep moving. \\n\\nAnd the costs and the threats to America and the world keep rising. \\n\\nThat’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. \\n\\nThe United States is a member along with 29 other nations. \\n\\nIt matters. American diplomacy matters. American resolve matters.'),\n",
|
||||
" Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='Putin’s latest attack on Ukraine was premeditated and unprovoked. \\n\\nHe rejected repeated efforts at diplomacy. \\n\\nHe thought the West and NATO wouldn’t respond. And he thought he could divide us at home. Putin was wrong. We were ready. Here is what we did. \\n\\nWe prepared extensively and carefully. \\n\\nWe spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin. \\n\\nI spent countless hours unifying our European allies. We shared with the world in advance what we knew Putin was planning and precisely how he would try to falsely justify his aggression. \\n\\nWe countered Russia’s lies with truth. \\n\\nAnd now that he has acted the free world is holding him accountable. \\n\\nAlong with twenty-seven members of the European Union including France, Germany, Italy, as well as countries like the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and many others, even Switzerland.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -126,20 +126,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['82b3781b-817c-4a4d-8f8b-cbd07c1d005a',\n",
|
||||
" 'a20e0a49-29d8-465e-8eae-0bc5ac3d24dc',\n",
|
||||
" 'c19f4108-b652-4890-873e-d4cad00f1b1a',\n",
|
||||
" '23d1fcf9-6ee1-4638-8c70-0f5030762301',\n",
|
||||
" '2d775784-825d-4627-97a3-fee4539d8f58']"
|
||||
"['247aa3ae-9be9-43e2-98e4-48f94f920749',\n",
|
||||
" 'c4dfc886-0a2d-497c-b2b7-d923a5cb3832',\n",
|
||||
" '0350761d-ca68-414e-b8db-7eca78cb0d18',\n",
|
||||
" '902fe5eb-8543-486a-bd5f-79858a7a8af1',\n",
|
||||
" '28875612-c672-4de4-b40a-3b658c72036a']"
|
||||
]
|
||||
},
|
||||
"execution_count": 35,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -154,33 +154,116 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"store"
|
||||
"## Querying\n",
|
||||
"\n",
|
||||
"The database can be queried using a vector or a text prompt.\n",
|
||||
"If a text prompt is used, it's first converted into embedding and then queried.\n",
|
||||
"\n",
|
||||
"The `k` parameter specifies how many results to return from the query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['fe1f7a7b-42e2-4828-88b0-5b449c49fe86',\n",
|
||||
" '154a0021-a99c-427e-befb-f0b2b18ed83c',\n",
|
||||
" 'a8218226-18a9-4ab5-ade5-5a71b19a7831',\n",
|
||||
" '62b7ef97-83bf-4b6d-8c93-f471796244dc',\n",
|
||||
" 'ab43fd2e-13df-46d4-8cf7-e6e16506e4bb',\n",
|
||||
" '6841e7f9-adaa-41d9-af3d-0813ee52443f',\n",
|
||||
" '45dda5a1-f0c1-4ac7-9acb-50253e4ee493']"
|
||||
"[Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='If you travel 20 miles east of Columbus, Ohio, you’ll find 1,000 empty acres of land. \\n\\nIt won’t look like much, but if you stop and look closely, you’ll see a “Field of dreams,” the ground on which America’s future will be built. \\n\\nThis is where Intel, the American company that helped build Silicon Valley, is going to build its $20 billion semiconductor “mega site”. \\n\\nUp to eight state-of-the-art factories in one place. 10,000 new good-paying jobs. \\n\\nSome of the most sophisticated manufacturing in the world to make computer chips the size of a fingertip that power the world and our everyday lives. \\n\\nSmartphones. The Internet. Technology we have yet to invent. \\n\\nBut that’s just the beginning. \\n\\nIntel’s CEO, Pat Gelsinger, who is here tonight, told me they are ready to increase their investment from \\n$20 billion to $100 billion. \\n\\nThat would be one of the biggest investments in manufacturing in American history. \\n\\nAnd all they’re waiting for is for you to pass this bill.'),\n",
|
||||
" Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='So let’s not wait any longer. Send it to my desk. I’ll sign it. \\n\\nAnd we will really take off. \\n\\nAnd Intel is not alone. \\n\\nThere’s something happening in America. \\n\\nJust look around and you’ll see an amazing story. \\n\\nThe rebirth of the pride that comes from stamping products “Made In America.” The revitalization of American manufacturing. \\n\\nCompanies are choosing to build new factories here, when just a few years ago, they would have built them overseas. \\n\\nThat’s what is happening. Ford is investing $11 billion to build electric vehicles, creating 11,000 jobs across the country. \\n\\nGM is making the largest investment in its history—$7 billion to build electric vehicles, creating 4,000 jobs in Michigan. \\n\\nAll told, we created 369,000 new manufacturing jobs in America just last year. \\n\\nPowered by people I’ve met like JoJo Burgess, from generations of union steelworkers from Pittsburgh, who’s here with us tonight.'),\n",
|
||||
" Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='When we use taxpayer dollars to rebuild America – we are going to Buy American: buy American products to support American jobs. \\n\\nThe federal government spends about $600 Billion a year to keep the country safe and secure. \\n\\nThere’s been a law on the books for almost a century \\nto make sure taxpayers’ dollars support American jobs and businesses. \\n\\nEvery Administration says they’ll do it, but we are actually doing it. \\n\\nWe will buy American to make sure everything from the deck of an aircraft carrier to the steel on highway guardrails are made in America. \\n\\nBut to compete for the best jobs of the future, we also need to level the playing field with China and other competitors. \\n\\nThat’s why it is so important to pass the Bipartisan Innovation Act sitting in Congress that will make record investments in emerging technologies and American manufacturing. \\n\\nLet me give you one example of why it’s so important to pass it.'),\n",
|
||||
" Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='Last month, I announced our plan to supercharge \\nthe Cancer Moonshot that President Obama asked me to lead six years ago. \\n\\nOur goal is to cut the cancer death rate by at least 50% over the next 25 years, turn more cancers from death sentences into treatable diseases. \\n\\nMore support for patients and families. \\n\\nTo get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health. \\n\\nIt’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much more. \\n\\nARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and more. \\n\\nA unity agenda for the nation. \\n\\nWe can do this. \\n\\nMy fellow Americans—tonight , we have gathered in a sacred space—the citadel of our democracy. \\n\\nIn this Capitol, generation after generation, Americans have debated great questions amid great strife, and have done great things. \\n\\nWe have fought for freedom, expanded liberty, defeated totalitarianism and terror.'),\n",
|
||||
" Document(metadata={'source': '../../how_to/state_of_the_union.txt'}, page_content='And based on the projections, more of the country will reach that point across the next couple of weeks. \\n\\nThanks to the progress we have made this past year, COVID-19 need no longer control our lives. \\n\\nI know some are talking about “living with COVID-19”. Tonight – I say that we will never just accept living with COVID-19. \\n\\nWe will continue to combat the virus as we do other diseases. And because this is a virus that mutates and spreads, we will stay on guard. \\n\\nHere are four common sense steps as we move forward safely. \\n\\nFirst, stay protected with vaccines and treatments. We know how incredibly effective vaccines are. If you’re vaccinated and boosted you have the highest degree of protection. \\n\\nWe will never give up on vaccinating more Americans. Now, I know parents with kids under 5 are eager to see a vaccine authorized for their children. \\n\\nThe scientists are working hard to get that done and we’ll be ready with plenty of vaccines when they do.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"store.add_texts(\n",
|
||||
"result = store.similarity_search(\"technology\", k=5)\n",
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Querying with score\n",
|
||||
"\n",
|
||||
"The score of the query can be included for every result. \n",
|
||||
"\n",
|
||||
"> The score returned in the query requests is a normalized value between 0 and 1, where 1 indicates the highest similarity and 0 the lowest regardless of the similarity function used. For more information look at the [docs](https://upstash.com/docs/vector/overall/features#vector-similarity-functions)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.8968438\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.8895128\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.88626665\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.88538057\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.88432854\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = store.similarity_search_with_score(\"technology\", k=5)\n",
|
||||
"\n",
|
||||
"for doc, score in result:\n",
|
||||
" print(f\"{doc.metadata} - {score}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Namespaces\n",
|
||||
"\n",
|
||||
"Namespaces can be used to separate different types of documents. This can increase the efficiency of the queries since the search space is reduced. When no namespace is provided, the default namespace is used."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"store_books = UpstashVectorStore(embedding=embeddings, namespace=\"books\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['928a5f12-900f-40b7-9406-3861741cc9d6',\n",
|
||||
" '4908670e-0b9c-455b-96b8-e0f83bc59204',\n",
|
||||
" '7083ff98-d900-4435-a67c-d9690fc555ba',\n",
|
||||
" 'b910f9b1-2be0-4e0a-8b6c-93ba9b367df5',\n",
|
||||
" '7c40e950-4d2b-4293-9fb8-623a49e72607',\n",
|
||||
" '25a70e79-4905-42af-8b08-09f13bd48512',\n",
|
||||
" '695e2bcf-23d9-44d4-af26-a7b554c0c375']"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"store_books.add_texts(\n",
|
||||
" [\n",
|
||||
" \"A timeless tale set in the Jazz Age, this novel delves into the lives of affluent socialites, their pursuits of wealth, love, and the elusive American Dream. Amidst extravagant parties and glittering opulence, the story unravels the complexities of desire, ambition, and the consequences of obsession.\",\n",
|
||||
" \"Set in a small Southern town during the 1930s, this novel explores themes of racial injustice, moral growth, and empathy through the eyes of a young girl. It follows her father, a principled lawyer, as he defends a black man accused of assaulting a white woman, confronting deep-seated prejudices and challenging societal norms along the way.\",\n",
|
||||
@@ -202,63 +285,26 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Querying\n",
|
||||
"\n",
|
||||
"The database can be queried using a vector or a text prompt.\n",
|
||||
"If a text prompt is used, it's first converted into embedding and then queried.\n",
|
||||
"\n",
|
||||
"The `k` parameter specifies how many results to return from the query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='And my report is this: the State of the Union is strong—because you, the American people, are strong. \\n\\nWe are stronger today than we were a year ago. \\n\\nAnd we will be stronger a year from now than we are today. \\n\\nNow is our moment to meet and overcome the challenges of our time. \\n\\nAnd we will, as one people. \\n\\nOne America. \\n\\nThe United States of America. \\n\\nMay God bless you all. May God protect our troops.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='And built the strongest, freest, and most prosperous nation the world has ever known. \\n\\nNow is the hour. \\n\\nOur moment of responsibility. \\n\\nOur test of resolve and conscience, of history itself. \\n\\nIt is in this moment that our character is formed. Our purpose is found. Our future is forged. \\n\\nWell I know this nation. \\n\\nWe will meet the test. \\n\\nTo protect freedom and liberty, to expand fairness and opportunity. \\n\\nWe will save democracy. \\n\\nAs hard as these times have been, I am more optimistic about America today than I have been my whole life. \\n\\nBecause I see the future that is within our grasp. \\n\\nBecause I know there is simply nothing beyond our capacity. \\n\\nWe are the only nation on Earth that has always turned every crisis we have faced into an opportunity. \\n\\nThe only nation that can be defined by a single word: possibilities. \\n\\nSo on this night, in our 245th year as a nation, I have come to report on the State of the Union.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \\n\\nIn this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \\n\\nLet each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \\n\\nPlease rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \\n\\nThroughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos. \\n\\nThey keep moving. \\n\\nAnd the costs and the threats to America and the world keep rising. \\n\\nThat’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. \\n\\nThe United States is a member along with 29 other nations. \\n\\nIt matters. American diplomacy matters. American resolve matters.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='When we use taxpayer dollars to rebuild America – we are going to Buy American: buy American products to support American jobs. \\n\\nThe federal government spends about $600 Billion a year to keep the country safe and secure. \\n\\nThere’s been a law on the books for almost a century \\nto make sure taxpayers’ dollars support American jobs and businesses. \\n\\nEvery Administration says they’ll do it, but we are actually doing it. \\n\\nWe will buy American to make sure everything from the deck of an aircraft carrier to the steel on highway guardrails are made in America. \\n\\nBut to compete for the best jobs of the future, we also need to level the playing field with China and other competitors. \\n\\nThat’s why it is so important to pass the Bipartisan Innovation Act sitting in Congress that will make record investments in emerging technologies and American manufacturing. \\n\\nLet me give you one example of why it’s so important to pass it.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n",
|
||||
" Document(page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world.', metadata={'source': '../../how_to/state_of_the_union.txt'})]"
|
||||
"[Document(metadata={'title': '1984', 'author': 'George Orwell', 'year': 1949}, page_content='A chilling portrayal of a totalitarian regime, this dystopian novel offers a bleak vision of a future world dominated by surveillance, propaganda, and thought control. Through the eyes of a disillusioned protagonist, it explores the dangers of totalitarianism and the erosion of individual freedom in a society ruled by fear and oppression.'),\n",
|
||||
" Document(metadata={'title': 'The Road', 'author': 'Cormac McCarthy', 'year': 2006}, page_content='Set in a future world devastated by environmental collapse, this novel follows a group of survivors as they struggle to survive in a harsh, unforgiving landscape. Amidst scarcity and desperation, they must confront moral dilemmas and question the nature of humanity itself.'),\n",
|
||||
" Document(metadata={'title': 'Brave New World', 'author': 'Aldous Huxley', 'year': 1932}, page_content='In a society where emotion is suppressed and individuality is forbidden, one man dares to defy the oppressive regime. Through acts of rebellion and forbidden love, he discovers the power of human connection and the importance of free will.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 29,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = store.similarity_search(\"The United States of America\", k=5)\n",
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='A chilling portrayal of a totalitarian regime, this dystopian novel offers a bleak vision of a future world dominated by surveillance, propaganda, and thought control. Through the eyes of a disillusioned protagonist, it explores the dangers of totalitarianism and the erosion of individual freedom in a society ruled by fear and oppression.', metadata={'title': '1984', 'author': 'George Orwell', 'year': 1949}),\n",
|
||||
" Document(page_content='Narrated by a disillusioned teenager, this novel follows his journey of self-discovery and rebellion against the phoniness of the adult world. Through a series of encounters and reflections, it explores themes of alienation, identity, and the search for authenticity in a society marked by conformity and hypocrisy.', metadata={'title': 'The Catcher in the Rye', 'author': 'J.D. Salinger', 'year': 1951}),\n",
|
||||
" Document(page_content='Set in the English countryside during the early 19th century, this novel follows the lives of the Bennet sisters as they navigate the intricate social hierarchy of their time. Focusing on themes of marriage, class, and societal expectations, the story offers a witty and insightful commentary on the complexities of romantic relationships and the pursuit of happiness.', metadata={'title': 'Pride and Prejudice', 'author': 'Jane Austen', 'year': 1813})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = store.similarity_search(\"dystopia\", k=3, filter=\"year < 2000\")\n",
|
||||
"result = store_books.similarity_search(\"dystopia\", k=3)\n",
|
||||
"result"
|
||||
]
|
||||
},
|
||||
@@ -266,35 +312,63 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Querying with score\n",
|
||||
"## Metadata Filtering\n",
|
||||
"\n",
|
||||
"The score of the query can be included for every result. \n",
|
||||
"\n",
|
||||
"> The score returned in the query requests is a normalized value between 0 and 1, where 1 indicates the highest similarity and 0 the lowest regardless of the similarity function used. For more information look at the [docs](https://upstash.com/docs/vector/overall/features#vector-similarity-functions)."
|
||||
"Metadata can be used to filter the results of a query. You can refer to the [docs](https://upstash.com/docs/vector/features/filtering) to see more complex ways of filtering."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.87391514\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.8549463\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.847913\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.84328896\n",
|
||||
"{'source': '../../how_to/state_of_the_union.txt'} - 0.832347\n"
|
||||
]
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'title': '1984', 'author': 'George Orwell', 'year': 1949}, page_content='A chilling portrayal of a totalitarian regime, this dystopian novel offers a bleak vision of a future world dominated by surveillance, propaganda, and thought control. Through the eyes of a disillusioned protagonist, it explores the dangers of totalitarianism and the erosion of individual freedom in a society ruled by fear and oppression.'),\n",
|
||||
" Document(metadata={'title': 'Brave New World', 'author': 'Aldous Huxley', 'year': 1932}, page_content='In a society where emotion is suppressed and individuality is forbidden, one man dares to defy the oppressive regime. Through acts of rebellion and forbidden love, he discovers the power of human connection and the importance of free will.'),\n",
|
||||
" Document(metadata={'title': 'The Catcher in the Rye', 'author': 'J.D. Salinger', 'year': 1951}, page_content='Narrated by a disillusioned teenager, this novel follows his journey of self-discovery and rebellion against the phoniness of the adult world. Through a series of encounters and reflections, it explores themes of alienation, identity, and the search for authenticity in a society marked by conformity and hypocrisy.')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = store.similarity_search_with_score(\"The United States of America\", k=5)\n",
|
||||
"result = store_books.similarity_search(\"dystopia\", k=3, filter=\"year < 2000\")\n",
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Getting info about vector database\n",
|
||||
"\n",
|
||||
"for doc, score in result:\n",
|
||||
" print(f\"{doc.metadata} - {score}\")"
|
||||
"You can get information about your database like the distance metric dimension using the info function.\n",
|
||||
"\n",
|
||||
"> When an insert happens, the database an indexing takes place. While this is happening new vectors can not be queried. `pendingVectorCount` represents the number of vector that are currently being indexed. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"InfoResult(vector_count=49, pending_vector_count=0, index_size=2978163, dimension=1536, similarity_function='COSINE', namespaces={'': NamespaceInfo(vector_count=42, pending_vector_count=0), 'books': NamespaceInfo(vector_count=7, pending_vector_count=0)})"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"store.info()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -308,7 +382,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -326,42 +400,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"store.delete(delete_all=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Getting info about vector database\n",
|
||||
"\n",
|
||||
"You can get information about your database like the distance metric dimension using the info function.\n",
|
||||
"\n",
|
||||
"> When an insert happens, the database an indexing takes place. While this is happening new vectors can not be queried. `pendingVectorCount` represents the number of vector that are currently being indexed. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 36,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"InfoResult(vector_count=42, pending_vector_count=0, index_size=6470, dimension=384, similarity_function='COSINE')"
|
||||
]
|
||||
},
|
||||
"execution_count": 36,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"store.info()"
|
||||
"store.delete(delete_all=True)\n",
|
||||
"store_books.delete(delete_all=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -381,7 +425,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
"version": "3.12.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -71,11 +71,11 @@
|
||||
"from langchain_anthropic import ChatAnthropic\n",
|
||||
"from langchain_community.tools.tavily_search import TavilySearchResults\n",
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
|
||||
"from langgraph.checkpoint.memory import MemorySaver\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"# Create the agent\n",
|
||||
"memory = SqliteSaver.from_conn_string(\":memory:\")\n",
|
||||
"memory = MemorySaver()\n",
|
||||
"model = ChatAnthropic(model_name=\"claude-3-sonnet-20240229\")\n",
|
||||
"search = TavilySearchResults(max_results=2)\n",
|
||||
"tools = [search]\n",
|
||||
@@ -121,7 +121,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -U langchain-community langgraph langchain-anthropic tavily-python"
|
||||
"%pip install -U langchain-community langgraph langchain-anthropic tavily-python langgraph-checkpoint-sqlite"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -606,9 +606,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
|
||||
"from langgraph.checkpoint.memory import MemorySaver\n",
|
||||
"\n",
|
||||
"memory = SqliteSaver.from_conn_string(\":memory:\")"
|
||||
"memory = MemorySaver()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -857,9 +857,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
|
||||
"from langgraph.checkpoint.memory import MemorySaver\n",
|
||||
"\n",
|
||||
"memory = SqliteSaver.from_conn_string(\":memory:\")\n",
|
||||
"memory = MemorySaver()\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(llm, tools, checkpointer=memory)"
|
||||
]
|
||||
@@ -1012,20 +1012,15 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import bs4\n",
|
||||
"from langchain.agents import AgentExecutor, create_tool_calling_agent\n",
|
||||
"from langchain.tools.retriever import create_retriever_tool\n",
|
||||
"from langchain_chroma import Chroma\n",
|
||||
"from langchain_community.chat_message_histories import ChatMessageHistory\n",
|
||||
"from langchain_community.document_loaders import WebBaseLoader\n",
|
||||
"from langchain_core.chat_history import BaseChatMessageHistory\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
|
||||
"from langchain_core.runnables.history import RunnableWithMessageHistory\n",
|
||||
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"from langgraph.checkpoint.sqlite import SqliteSaver\n",
|
||||
"from langgraph.checkpoint.memory import MemorySaver\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"memory = SqliteSaver.from_conn_string(\":memory:\")\n",
|
||||
"memory = MemorySaver()\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
@@ -54,12 +54,9 @@
|
||||
"id": "00df631d-5121-4918-94aa-b88acce9b769",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"import { ColumnContainer, Column } from \"@theme/Columns\";\n",
|
||||
"## Legacy\n",
|
||||
"\n",
|
||||
"<ColumnContainer>\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### Legacy\n"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -111,12 +108,11 @@
|
||||
"id": "f8e36b0e-c7dc-4130-a51b-189d4b756c7f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</Column>\n",
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"## LCEL\n",
|
||||
"\n",
|
||||
"#### LCEL\n",
|
||||
"\n"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -174,10 +170,6 @@
|
||||
"id": "6b386ce6-895e-442c-88f3-7bec0ab9f401",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"</Column>\n",
|
||||
"</ColumnContainer>\n",
|
||||
"\n",
|
||||
"The above example uses the same `history` for all sessions. The example below shows how to use a different chat history for each session."
|
||||
]
|
||||
},
|
||||
@@ -230,6 +222,8 @@
|
||||
"id": "b2717810",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"See [this tutorial](/docs/tutorials/chatbot) for a more end-to-end guide on building with [`RunnableWithMessageHistory`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html).\n",
|
||||
|
||||
@@ -83,13 +83,9 @@
|
||||
"id": "8bc06416",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"import { ColumnContainer, Column } from \"@theme/Columns\";\n",
|
||||
"## Legacy\n",
|
||||
"\n",
|
||||
"<ColumnContainer>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### Legacy"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -165,12 +161,11 @@
|
||||
"id": "43a8a23c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</Column>\n",
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"## LCEL\n",
|
||||
"\n",
|
||||
"#### LCEL\n",
|
||||
"\n"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -253,9 +248,7 @@
|
||||
"id": "b2717810",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</Column>\n",
|
||||
"\n",
|
||||
"</ColumnContainer>\n",
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
@@ -263,6 +256,14 @@
|
||||
"\n",
|
||||
"Next, check out the [LCEL conceptual docs](/docs/concepts/#langchain-expression-language-lcel) for more background information."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7bfc38bd-0ff8-40ee-83a3-9d7553364fd7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -2,33 +2,48 @@
|
||||
sidebar_position: 1
|
||||
---
|
||||
|
||||
# How to migrate chains to LCEL
|
||||
# How to migrate from v0.0 chains
|
||||
|
||||
:::info Prerequisites
|
||||
|
||||
This guide assumes familiarity with the following concepts:
|
||||
- [LangChain Expression Language](/docs/concepts#langchain-expression-language-lcel)
|
||||
|
||||
- [LangGraph](https://langchain-ai.github.io/langgraph/)
|
||||
:::
|
||||
|
||||
LCEL is designed to streamline the process of building useful apps with LLMs and combining related components. It does this by providing:
|
||||
LangChain maintains a number of legacy abstractions. Many of these can be reimplemented via short combinations of LCEL and LangGraph primitives.
|
||||
|
||||
### LCEL
|
||||
[LCEL](/docs/concepts/#langchain-expression-language-lcel) is designed to streamline the process of building useful apps with LLMs and combining related components. It does this by providing:
|
||||
|
||||
1. **A unified interface**: Every LCEL object implements the `Runnable` interface, which defines a common set of invocation methods (`invoke`, `batch`, `stream`, `ainvoke`, ...). This makes it possible to also automatically and consistently support useful operations like streaming of intermediate steps and batching, since every chain composed of LCEL objects is itself an LCEL object.
|
||||
2. **Composition primitives**: LCEL provides a number of primitives that make it easy to compose chains, parallelize components, add fallbacks, dynamically configure chain internals, and more.
|
||||
|
||||
LangChain maintains a number of legacy abstractions. Many of these can be reimplemented via short combinations of LCEL primitives. Doing so confers some general advantages:
|
||||
### LangGraph
|
||||
[LangGraph](https://langchain-ai.github.io/langgraph/), built on top of LCEL, allows for performant orchestrations of application components while maintaining concise and readable code. It includes built-in persistence, support for cycles, and prioritizes controllability.
|
||||
If LCEL grows unwieldy for larger or more complex chains, they may benefit from a LangGraph implementation.
|
||||
|
||||
### Advantages
|
||||
Using these frameworks for existing v0.0 chains confers some advantages:
|
||||
|
||||
- The resulting chains typically implement the full `Runnable` interface, including streaming and asynchronous support where appropriate;
|
||||
- The chains may be more easily extended or modified;
|
||||
- The parameters of the chain are typically surfaced for easier customization (e.g., prompts) over previous versions, which tended to be subclasses and had opaque parameters and internals.
|
||||
- If using LangGraph, the chain supports built-in persistence, allowing for conversational experiences via a "memory" of the chat history.
|
||||
- If using LangGraph, the steps of the chain can be streamed, allowing for greater control and customizability.
|
||||
|
||||
The LCEL implementations can be slightly more verbose, but there are significant benefits in transparency and customizability.
|
||||
|
||||
The below pages assist with migration from various specific chains to LCEL:
|
||||
The below pages assist with migration from various specific chains to LCEL and LangGraph:
|
||||
|
||||
- [LLMChain](/docs/versions/migrating_chains/llm_chain)
|
||||
- [ConversationChain](/docs/versions/migrating_chains/conversation_chain)
|
||||
- [RetrievalQA](/docs/versions/migrating_chains/retrieval_qa)
|
||||
- [ConversationalRetrievalChain](/docs/versions/migrating_chains/conversation_retrieval_chain)
|
||||
- [StuffDocumentsChain](/docs/versions/migrating_chains/stuff_docs_chain)
|
||||
- [MapReduceDocumentsChain](/docs/versions/migrating_chains/map_reduce_chain)
|
||||
- [MapRerankDocumentsChain](/docs/versions/migrating_chains/map_rerank_docs_chain)
|
||||
- [RefineDocumentsChain](/docs/versions/migrating_chains/refine_docs_chain)
|
||||
- [LLMRouterChain](/docs/versions/migrating_chains/llm_router_chain)
|
||||
- [MultiPromptChain](/docs/versions/migrating_chains/multi_prompt_chain)
|
||||
|
||||
Check out the [LCEL conceptual docs](/docs/concepts/#langchain-expression-language-lcel) for more background information.
|
||||
Check out the [LCEL conceptual docs](/docs/concepts/#langchain-expression-language-lcel) and [LangGraph docs](https://langchain-ai.github.io/langgraph/) for more background information.
|
||||
@@ -52,13 +52,9 @@
|
||||
"id": "e3621b62-a037-42b8-8faa-59575608bb8b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"import { ColumnContainer, Column } from \"@theme/Columns\";\n",
|
||||
"## Legacy\n",
|
||||
"\n",
|
||||
"<ColumnContainer>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### Legacy\n"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -98,13 +94,11 @@
|
||||
"id": "cdc3b527-c09e-4c77-9711-c3cc4506cd95",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"</Column>\n",
|
||||
"## LCEL\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### LCEL\n",
|
||||
"\n"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -143,10 +137,6 @@
|
||||
"id": "3c0b0513-77b8-4371-a20e-3e487cec7e7f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"</Column>\n",
|
||||
"</ColumnContainer>\n",
|
||||
"\n",
|
||||
"Note that `LLMChain` by default returns a `dict` containing both the input and the output. If this behavior is desired, we can replicate it using another LCEL primitive, [`RunnablePassthrough`](https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html):"
|
||||
]
|
||||
},
|
||||
@@ -181,6 +171,8 @@
|
||||
"id": "b2717810",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"See [this tutorial](/docs/tutorials/llm_chain) for more detail on building with prompt templates, LLMs, and output parsers.\n",
|
||||
|
||||
283
docs/docs/versions/migrating_chains/llm_router_chain.ipynb
Normal file
283
docs/docs/versions/migrating_chains/llm_router_chain.ipynb
Normal file
@@ -0,0 +1,283 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "575befea-4d98-4941-8e55-1581b169a674",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"title: Migrating from LLMRouterChain\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "14625d35-efca-41cf-b203-be9f4c375700",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The [`LLMRouterChain`](https://api.python.langchain.com/en/latest/chains/langchain.chains.router.llm_router.LLMRouterChain.html) routed an input query to one of multiple destinations-- that is, given an input query, it used a LLM to select from a list of destination chains, and passed its inputs to the selected chain.\n",
|
||||
"\n",
|
||||
"`LLMRouterChain` does not support common [chat model](/docs/concepts/#chat-models) features, such as message roles and [tool calling](/docs/concepts/#functiontool-calling). Under the hood, `LLMRouterChain` routes a query by instructing the LLM to generate JSON-formatted text, and parsing out the intended destination.\n",
|
||||
"\n",
|
||||
"Consider an example from a [MultiPromptChain](/docs/versions/migrating_chains/multi_prompt_chain), which uses `LLMRouterChain`. Below is an (example) default prompt:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "364814a5-d15c-41bb-bf3f-581df51a4721",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Given a raw text input to a language model select the model prompt best suited for the input. You will be given the names of the available prompts and a description of what the prompt is best suited for. You may also revise the original input if you think that revising it will ultimately lead to a better response from the language model.\n",
|
||||
"\n",
|
||||
"<< FORMATTING >>\n",
|
||||
"Return a markdown code snippet with a JSON object formatted to look like:\n",
|
||||
"'''json\n",
|
||||
"{{\n",
|
||||
" \"destination\": string \\ name of the prompt to use or \"DEFAULT\"\n",
|
||||
" \"next_inputs\": string \\ a potentially modified version of the original input\n",
|
||||
"}}\n",
|
||||
"'''\n",
|
||||
"\n",
|
||||
"REMEMBER: \"destination\" MUST be one of the candidate prompt names specified below OR it can be \"DEFAULT\" if the input is not well suited for any of the candidate prompts.\n",
|
||||
"REMEMBER: \"next_inputs\" can just be the original input if you don't think any modifications are needed.\n",
|
||||
"\n",
|
||||
"<< CANDIDATE PROMPTS >>\n",
|
||||
"\n",
|
||||
"animals: prompt for animal expert\n",
|
||||
"vegetables: prompt for a vegetable expert\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"<< INPUT >>\n",
|
||||
"{input}\n",
|
||||
"\n",
|
||||
"<< OUTPUT (must include '''json at the start of the response) >>\n",
|
||||
"<< OUTPUT (must end with ''') >>\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.chains.router.multi_prompt import MULTI_PROMPT_ROUTER_TEMPLATE\n",
|
||||
"\n",
|
||||
"destinations = \"\"\"\n",
|
||||
"animals: prompt for animal expert\n",
|
||||
"vegetables: prompt for a vegetable expert\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(destinations=destinations)\n",
|
||||
"\n",
|
||||
"print(router_template.replace(\"`\", \"'\")) # for rendering purposes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "934937d1-fc0a-4d3f-b297-29f96e6a8f5e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Most of the behavior is determined via a single natural language prompt. Chat models that support [tool calling](/docs/how_to/tool_calling/) features confer a number of advantages for this task:\n",
|
||||
"\n",
|
||||
"- Supports chat prompt templates, including messages with `system` and other roles;\n",
|
||||
"- Tool-calling models are fine-tuned to generate structured output;\n",
|
||||
"- Support for runnable methods like streaming and async operations.\n",
|
||||
"\n",
|
||||
"Now let's look at `LLMRouterChain` side-by-side with an LCEL implementation that uses tool-calling. Note that for this guide we will `langchain-openai >= 0.1.20`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ed12b22b-5452-4776-aee3-b67d9f965082",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-core langchain-openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b0edbba1-a497-49ef-ade7-4fe7967360eb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5d4dc41c-3fdc-4093-ba5e-31a9ebb54e13",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Legacy\n",
|
||||
"\n",
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "c58c9269-5a1d-4234-88b5-7168944618bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser\n",
|
||||
"from langchain_core.prompts import PromptTemplate\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-4o-mini\")\n",
|
||||
"\n",
|
||||
"router_prompt = PromptTemplate(\n",
|
||||
" # Note: here we use the prompt template from above. Generally this would need\n",
|
||||
" # to be customized.\n",
|
||||
" template=router_template,\n",
|
||||
" input_variables=[\"input\"],\n",
|
||||
" output_parser=RouterOutputParser(),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = LLMRouterChain.from_llm(llm, router_prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a22ebdca-5f53-459e-9cff-a97b2354ffe0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"vegetables\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = chain.invoke({\"input\": \"What color are carrots?\"})\n",
|
||||
"\n",
|
||||
"print(result[\"destination\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6fd48120-056f-4c58-a04f-da5198c23068",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## LCEL\n",
|
||||
"\n",
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "5bbebac2-df19-4f59-8a69-f61cd7286e59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from operator import itemgetter\n",
|
||||
"from typing import Literal\n",
|
||||
"\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"from typing_extensions import TypedDict\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-4o-mini\")\n",
|
||||
"\n",
|
||||
"route_system = \"Route the user's query to either the animal or vegetable expert.\"\n",
|
||||
"route_prompt = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", route_system),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Define schema for output:\n",
|
||||
"class RouteQuery(TypedDict):\n",
|
||||
" \"\"\"Route query to destination expert.\"\"\"\n",
|
||||
"\n",
|
||||
" destination: Literal[\"animal\", \"vegetable\"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Instead of writing formatting instructions into the prompt, we\n",
|
||||
"# leverage .with_structured_output to coerce the output into a simple\n",
|
||||
"# schema.\n",
|
||||
"chain = route_prompt | llm.with_structured_output(RouteQuery)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "88012e10-8def-44fa-833f-989935824182",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"vegetable\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = chain.invoke({\"input\": \"What color are carrots?\"})\n",
|
||||
"\n",
|
||||
"print(result[\"destination\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "baf7ba9e-65b4-48af-8a39-453c01a7b7cb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"See [this tutorial](/docs/tutorials/llm_chain) for more detail on building with prompt templates, LLMs, and output parsers.\n",
|
||||
"\n",
|
||||
"Check out the [LCEL conceptual docs](/docs/concepts/#langchain-expression-language-lcel) for more background information."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "353e4bab-3b8a-4e89-89e2-200a8d8eb8dd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
706
docs/docs/versions/migrating_chains/map_reduce_chain.ipynb
Normal file
706
docs/docs/versions/migrating_chains/map_reduce_chain.ipynb
Normal file
File diff suppressed because one or more lines are too long
341
docs/docs/versions/migrating_chains/map_rerank_docs_chain.ipynb
Normal file
341
docs/docs/versions/migrating_chains/map_rerank_docs_chain.ipynb
Normal file
File diff suppressed because one or more lines are too long
362
docs/docs/versions/migrating_chains/multi_prompt_chain.ipynb
Normal file
362
docs/docs/versions/migrating_chains/multi_prompt_chain.ipynb
Normal file
File diff suppressed because one or more lines are too long
452
docs/docs/versions/migrating_chains/refine_docs_chain.ipynb
Normal file
452
docs/docs/versions/migrating_chains/refine_docs_chain.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -82,13 +82,9 @@
|
||||
"id": "c7e16438",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"import { ColumnContainer, Column } from \"@theme/Columns\";\n",
|
||||
"## Legacy\n",
|
||||
"\n",
|
||||
"<ColumnContainer>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"\n",
|
||||
"#### Legacy"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -128,12 +124,11 @@
|
||||
"id": "081948e5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</Column>\n",
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"<Column>\n",
|
||||
"## LCEL\n",
|
||||
"\n",
|
||||
"#### LCEL\n",
|
||||
"\n"
|
||||
"<details open>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -184,9 +179,6 @@
|
||||
"id": "d6f44fe8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</Column>\n",
|
||||
"</ColumnContainer>\n",
|
||||
"\n",
|
||||
"The LCEL implementation exposes the internals of what's happening around retrieving, formatting documents, and passing them through a prompt to the LLM, but it is more verbose. You can customize and wrap this composition logic in a helper function, or use the higher-level [`create_retrieval_chain`](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval.create_retrieval_chain.html) and [`create_stuff_documents_chain`](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) helper method:"
|
||||
]
|
||||
},
|
||||
@@ -231,6 +223,8 @@
|
||||
"id": "b2717810",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"Check out the [LCEL conceptual docs](/docs/concepts/#langchain-expression-language-lcel) for more background information."
|
||||
|
||||
281
docs/docs/versions/migrating_chains/stuff_docs_chain.ipynb
Normal file
281
docs/docs/versions/migrating_chains/stuff_docs_chain.ipynb
Normal file
@@ -0,0 +1,281 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ed78c53c-55ad-4ea2-9cc2-a39a1963c098",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"title: Migrating from StuffDocumentsChain\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"[StuffDocumentsChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.StuffDocumentsChain.html) combines documents by concatenating them into a single context window. It is a straightforward and effective strategy for combining documents for question-answering, summarization, and other purposes.\n",
|
||||
"\n",
|
||||
"[create_stuff_documents_chain](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html) is the recommended alternative. It functions the same as `StuffDocumentsChain`, with better support for streaming and batch functionality. Because it is a simple combination of [LCEL primitives](/docs/concepts/#langchain-expression-language-lcel), it is also easier to extend and incorporate into other LangChain applications.\n",
|
||||
"\n",
|
||||
"Below we will go through both `StuffDocumentsChain` and `create_stuff_documents_chain` on a simple example for illustrative purposes.\n",
|
||||
"\n",
|
||||
"Let's first load a chat model:\n",
|
||||
"\n",
|
||||
"```{=mdx}\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs customVarName=\"llm\" />\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "dac0bef2-9453-46f2-a893-f7569b6a0170",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d4022d03-7b5e-4c81-98ff-5b82a2a4eaae",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example\n",
|
||||
"\n",
|
||||
"Let's go through an example where we analyze a set of documents. We first generate some simple documents for illustrative purposes:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "24fa0ba9-e245-47d1-bc2e-6286dd884117",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"documents = [\n",
|
||||
" Document(page_content=\"Apples are red\", metadata={\"title\": \"apple_book\"}),\n",
|
||||
" Document(page_content=\"Blueberries are blue\", metadata={\"title\": \"blueberry_book\"}),\n",
|
||||
" Document(page_content=\"Bananas are yelow\", metadata={\"title\": \"banana_book\"}),\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a769128-205f-417d-a25d-519e7cb03be7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Legacy\n",
|
||||
"\n",
|
||||
"<details open>\n",
|
||||
"\n",
|
||||
"Below we show an implementation with `StuffDocumentsChain`. We define the prompt template for a summarization task and instantiate a [LLMChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.llm.LLMChain.html) object for this purpose. We define how documents are formatted into the prompt and ensure consistency among the keys in the various prompts."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "9734c0f3-64e7-4ae6-8578-df03b3dabb26",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains import LLMChain, StuffDocumentsChain\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate, PromptTemplate\n",
|
||||
"\n",
|
||||
"# This controls how each document will be formatted. Specifically,\n",
|
||||
"# it will be passed to `format_document` - see that function for more\n",
|
||||
"# details.\n",
|
||||
"document_prompt = PromptTemplate(\n",
|
||||
" input_variables=[\"page_content\"], template=\"{page_content}\"\n",
|
||||
")\n",
|
||||
"document_variable_name = \"context\"\n",
|
||||
"# The prompt here should take as an input variable the\n",
|
||||
"# `document_variable_name`\n",
|
||||
"prompt = ChatPromptTemplate.from_template(\"Summarize this content: {context}\")\n",
|
||||
"\n",
|
||||
"llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
|
||||
"chain = StuffDocumentsChain(\n",
|
||||
" llm_chain=llm_chain,\n",
|
||||
" document_prompt=document_prompt,\n",
|
||||
" document_variable_name=document_variable_name,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0cb733bf-eb71-4fae-a8f4-d522924020cb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can now invoke our chain:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "d7d1ce10-bbee-4cb0-879d-7de4f69191c4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'This content describes the colors of different fruits: apples are red, blueberries are blue, and bananas are yellow.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = chain.invoke(documents)\n",
|
||||
"result[\"output_text\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "79b10d40-1521-433b-9026-6ec836ffeeb3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'input_documents': [Document(metadata={'title': 'apple_book'}, page_content='Apples are red'), Document(metadata={'title': 'blueberry_book'}, page_content='Blueberries are blue'), Document(metadata={'title': 'banana_book'}, page_content='Bananas are yelow')], 'output_text': 'This content describes the colors of different fruits: apples are red, blueberries are blue, and bananas are yellow.'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for chunk in chain.stream(documents):\n",
|
||||
" print(chunk)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b4cb6a5b-37ea-48cc-a096-b948d3ff7e9f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"### LCEL\n",
|
||||
"\n",
|
||||
"<details open>\n",
|
||||
"\n",
|
||||
"Below we show an implementation using `create_stuff_documents_chain`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "de38f27a-c648-44be-8c37-0a458c2920a9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chains.combine_documents import create_stuff_documents_chain\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate.from_template(\"Summarize this content: {context}\")\n",
|
||||
"chain = create_stuff_documents_chain(llm, prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9d0e6996-9bf8-4097-9c1a-1c539eac3ed1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Invoking the chain, we obtain a similar result as before:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "f2d2bdfb-3a6a-464b-b4c2-e4252b2e53a0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'This content describes the colors of different fruits: apples are red, blueberries are blue, and bananas are yellow.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = chain.invoke({\"context\": documents})\n",
|
||||
"result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "493e6270-c61d-46c5-91b3-0cf7740a88f9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note that this implementation supports streaming of output tokens:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "b5adcabd-9bc1-4c91-a12b-7be82d64e457",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" | This | content | describes | the | colors | of | different | fruits | : | apples | are | red | , | blue | berries | are | blue | , | and | bananas | are | yellow | . | | "
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for chunk in chain.stream({\"context\": documents}):\n",
|
||||
" print(chunk, end=\" | \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "181c5633-38ea-4692-a869-32f4f78398e4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"</details>\n",
|
||||
"\n",
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"Check out the [LCEL conceptual docs](/docs/concepts/#langchain-expression-language-lcel) for more background information.\n",
|
||||
"\n",
|
||||
"See these [how-to guides](/docs/how_to/#qa-with-rag) for more on question-answering tasks with RAG.\n",
|
||||
"\n",
|
||||
"See [this tutorial](/docs/tutorials/summarization/) for more LLM-based summarization strategies."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -93,7 +93,7 @@ module.exports = {
|
||||
},
|
||||
{
|
||||
type: "category",
|
||||
label: "Migrating to LCEL",
|
||||
label: "Migrating from v0.0 chains",
|
||||
link: {type: 'doc', id: 'versions/migrating_chains/index'},
|
||||
collapsible: false,
|
||||
collapsed: false,
|
||||
|
||||
@@ -66,10 +66,6 @@
|
||||
"source": "/cookbook(/?)",
|
||||
"destination": "/v0.1/docs/cookbook/"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/toolkits/document_comparison_toolkit(/?)",
|
||||
"destination": "/docs/tutorials/rag/"
|
||||
},
|
||||
{
|
||||
"source": "/v0.2/docs/how_to/migrate_chains(/?)",
|
||||
"destination": "/v0.2/docs/versions/migrating_chains"
|
||||
@@ -81,6 +77,14 @@
|
||||
{
|
||||
"source": "/v0.2/docs/integrations/toolkits/airbyte_structured_qa/",
|
||||
"destination": "/v0.2/docs/integrations/document_loaders/airbyte/"
|
||||
},
|
||||
{
|
||||
"source": "/v0.2/docs/integrations/toolkits/document_comparison_toolkit(/?)",
|
||||
"destination": "/v0.2/docs/tutorials/rag/"
|
||||
},
|
||||
{
|
||||
"source": "/v0.2/docs/integrations/toolkits/:path(.*/?)*",
|
||||
"destination": "/v0.2/docs/integrations/tools/:path*"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -9,3 +9,6 @@ langchain-nvidia-ai-endpoints
|
||||
langchain-elasticsearch
|
||||
urllib3==1.26.19
|
||||
nbconvert==7.16.4
|
||||
|
||||
# temp fix, uv fails to install 3.10.7
|
||||
orjson<=3.10.6
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
"\n",
|
||||
"- TODO: Make sure API reference link is correct.\n",
|
||||
"\n",
|
||||
"This notebook provides a quick overview for getting started with __ModuleName__ [document loader](/docs/integrations/document_loaders/). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.__module_name___loader.__ModuleName__Loader.html).\n",
|
||||
"This notebook provides a quick overview for getting started with __ModuleName__ [document loader](https://python.langchain.com/v0.2/docs/concepts/#document-loaders). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.__module_name___loader.__ModuleName__Loader.html).\n",
|
||||
"\n",
|
||||
"- TODO: Add any other relevant links, like information about underlying API, etc.\n",
|
||||
"\n",
|
||||
@@ -32,7 +32,7 @@
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| [__ModuleName__Loader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.__module_name__loader.__ModuleName__Loader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅/❌ | beta/❌ | ✅/❌ | \n",
|
||||
"### Loader features\n",
|
||||
"| Source | Document Lazy Loading | Async Support\n",
|
||||
"| Source | Document Lazy Loading | Native Async Support\n",
|
||||
"| :---: | :---: | :---: | \n",
|
||||
"| __ModuleName__Loader | ✅/❌ | ✅/❌ | \n",
|
||||
"\n",
|
||||
@@ -65,7 +65,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
"If you want to get automated best in-class tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -102,7 +102,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"## Initialization\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and load documents:\n",
|
||||
"\n",
|
||||
@@ -193,11 +193,6 @@
|
||||
"\n",
|
||||
"For detailed documentation of all __ModuleName__Loader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.__module_name___loader.__ModuleName__Loader.html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -24,13 +24,9 @@
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"- TODO: Fill in table features.\n",
|
||||
"- TODO: Remove JS support link if not relevant, otherwise ensure link is correct.\n",
|
||||
"- TODO: Make sure API reference links are correct.\n",
|
||||
"import { ItemTable } from \"@theme/FeatureTables\";\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/text_embedding/__package_name_short_snake__) | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [__ModuleName__Embeddings](https://api.python.langchain.com/en/latest/embeddings/__module_name__.embeddings.__ModuleName__Embeddings.html) | [__package_name__](https://api.python.langchain.com/en/latest/__package_name_short_snake___api_reference.html) | ✅/❌ | beta/❌ | ✅/❌ |  |  |\n",
|
||||
"<ItemTable category=\"text_embedding\" item=\"__ModuleName__\" />\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
@@ -157,10 +153,10 @@
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_document = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_document.page_content"
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -210,7 +206,7 @@
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_queries([text, text2])\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
@@ -220,34 +216,10 @@
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Async Usage\n",
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"You can also use `aembed_query` and `aembed_documents` for producing embeddings asynchronously:\n"
|
||||
"For detailed documentation on `__ModuleName__Embeddings` features and configuration options, please refer to the [API reference](https://api.python.langchain.com/en/latest/embeddings/__module_name__.embeddings.__ModuleName__Embeddings.html).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4c3bef91",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"async def async_example():\n",
|
||||
" single_vector = await embeddings.embed_query(text)\n",
|
||||
" print(str(single_vector)[:100]) # Show the first 100 characters of the vector\n",
|
||||
"\n",
|
||||
"asyncio.run(async_example())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f1bd4396",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -44,7 +44,7 @@ class __ModuleName__Retriever(BaseRetriever):
|
||||
|
||||
retriever.invoke(query)
|
||||
|
||||
.. code-block:: python
|
||||
.. code-block:: none
|
||||
|
||||
# TODO: Example output.
|
||||
|
||||
@@ -67,7 +67,7 @@ class __ModuleName__Retriever(BaseRetriever):
|
||||
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
|
||||
|
||||
def format_docs(docs):
|
||||
return "\n\n".join(doc.page_content for doc in docs)
|
||||
return "\\n\\n".join(doc.page_content for doc in docs)
|
||||
|
||||
chain = (
|
||||
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
||||
@@ -78,7 +78,7 @@ class __ModuleName__Retriever(BaseRetriever):
|
||||
|
||||
chain.invoke("...")
|
||||
|
||||
.. code-block:: python
|
||||
.. code-block:: none
|
||||
|
||||
# TODO: Example output.
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ class __ModuleName__Toolkit(BaseToolKit):
|
||||
|
||||
toolkit.get_tools()
|
||||
|
||||
.. code-block:: python
|
||||
.. code-block:: none
|
||||
|
||||
# TODO: Example output.
|
||||
|
||||
@@ -61,7 +61,7 @@ class __ModuleName__Toolkit(BaseToolKit):
|
||||
for event in events:
|
||||
event["messages"][-1].pretty_print()
|
||||
|
||||
.. code-block:: python
|
||||
.. code-block:: none
|
||||
|
||||
# TODO: Example output.
|
||||
|
||||
|
||||
@@ -164,9 +164,105 @@ class GitHubToolkit(BaseToolkit):
|
||||
|
||||
See [Security](https://python.langchain.com/docs/security) for more information.
|
||||
|
||||
Setup:
|
||||
See detailed installation instructions here:
|
||||
https://python.langchain.com/v0.2/docs/integrations/tools/github/#installation
|
||||
|
||||
You will need to install ``pygithub`` and set the following environment
|
||||
variables:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U pygithub
|
||||
export GITHUB_APP_ID="your-app-id"
|
||||
export GITHUB_APP_PRIVATE_KEY="path-to-private-key"
|
||||
export GITHUB_REPOSITORY="your-github-repository"
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.agent_toolkits.github.toolkit import GitHubToolkit
|
||||
from langchain_community.utilities.github import GitHubAPIWrapper
|
||||
|
||||
github = GitHubAPIWrapper()
|
||||
toolkit = GitHubToolkit.from_github_api_wrapper(github)
|
||||
|
||||
Tools:
|
||||
.. code-block:: python
|
||||
|
||||
tools = toolkit.get_tools()
|
||||
for tool in tools:
|
||||
print(tool.name)
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
Get Issues
|
||||
Get Issue
|
||||
Comment on Issue
|
||||
List open pull requests (PRs)
|
||||
Get Pull Request
|
||||
Overview of files included in PR
|
||||
Create Pull Request
|
||||
List Pull Requests' Files
|
||||
Create File
|
||||
Read File
|
||||
Update File
|
||||
Delete File
|
||||
Overview of existing files in Main branch
|
||||
Overview of files in current working branch
|
||||
List branches in this repository
|
||||
Set active branch
|
||||
Create a new branch
|
||||
Get files from a directory
|
||||
Search issues and pull requests
|
||||
Search code
|
||||
Create review request
|
||||
|
||||
Use within an agent:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langgraph.prebuilt import create_react_agent
|
||||
|
||||
# Select example tool
|
||||
tools = [tool for tool in toolkit.get_tools() if tool.name == "Get Issue"]
|
||||
assert len(tools) == 1
|
||||
tools[0].name = "get_issue"
|
||||
|
||||
llm = ChatOpenAI(model="gpt-4o-mini")
|
||||
agent_executor = create_react_agent(llm, tools)
|
||||
|
||||
example_query = "What is the title of issue 24888?"
|
||||
|
||||
events = agent_executor.stream(
|
||||
{"messages": [("user", example_query)]},
|
||||
stream_mode="values",
|
||||
)
|
||||
for event in events:
|
||||
event["messages"][-1].pretty_print()
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
================================[1m Human Message [0m=================================
|
||||
|
||||
What is the title of issue 24888?
|
||||
==================================[1m Ai Message [0m==================================
|
||||
Tool Calls:
|
||||
get_issue (call_iSYJVaM7uchfNHOMJoVPQsOi)
|
||||
Call ID: call_iSYJVaM7uchfNHOMJoVPQsOi
|
||||
Args:
|
||||
issue_number: 24888
|
||||
=================================[1m Tool Message [0m=================================
|
||||
Name: get_issue
|
||||
|
||||
{"number": 24888, "title": "Standardize KV-Store Docs", "body": "..."
|
||||
==================================[1m Ai Message [0m==================================
|
||||
|
||||
The title of issue 24888 is "Standardize KV-Store Docs".
|
||||
|
||||
Parameters:
|
||||
tools: List[BaseTool]. The tools in the toolkit. Default is an empty list.
|
||||
"""
|
||||
""" # noqa: E501
|
||||
|
||||
tools: List[BaseTool] = []
|
||||
|
||||
|
||||
@@ -39,9 +39,81 @@ class GmailToolkit(BaseToolkit):
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
|
||||
Setup:
|
||||
You will need a Google credentials.json file to use this toolkit.
|
||||
See instructions here: https://python.langchain.com/v0.2/docs/integrations/tools/gmail/#setup
|
||||
|
||||
Key init args:
|
||||
api_resource: Optional. The Google API resource. Default is None.
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_google_community import GmailToolkit
|
||||
|
||||
toolkit = GmailToolkit()
|
||||
|
||||
Tools:
|
||||
.. code-block:: python
|
||||
|
||||
toolkit.get_tools()
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
[GmailCreateDraft(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),
|
||||
GmailSendMessage(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),
|
||||
GmailSearch(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),
|
||||
GmailGetMessage(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>),
|
||||
GmailGetThread(api_resource=<googleapiclient.discovery.Resource object at 0x1094509d0>)]
|
||||
|
||||
Use within an agent:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langgraph.prebuilt import create_react_agent
|
||||
|
||||
llm = ChatOpenAI(model="gpt-4o-mini")
|
||||
|
||||
agent_executor = create_react_agent(llm, tools)
|
||||
|
||||
example_query = "Draft an email to fake@fake.com thanking them for coffee."
|
||||
|
||||
events = agent_executor.stream(
|
||||
{"messages": [("user", example_query)]},
|
||||
stream_mode="values",
|
||||
)
|
||||
for event in events:
|
||||
event["messages"][-1].pretty_print()
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
================================[1m Human Message [0m=================================
|
||||
|
||||
Draft an email to fake@fake.com thanking them for coffee.
|
||||
==================================[1m Ai Message [0m==================================
|
||||
Tool Calls:
|
||||
create_gmail_draft (call_slGkYKZKA6h3Mf1CraUBzs6M)
|
||||
Call ID: call_slGkYKZKA6h3Mf1CraUBzs6M
|
||||
Args:
|
||||
message: Dear Fake,
|
||||
|
||||
I wanted to take a moment to thank you for the coffee yesterday. It was a pleasure catching up with you. Let's do it again soon!
|
||||
|
||||
Best regards,
|
||||
[Your Name]
|
||||
to: ['fake@fake.com']
|
||||
subject: Thank You for the Coffee
|
||||
=================================[1m Tool Message [0m=================================
|
||||
Name: create_gmail_draft
|
||||
|
||||
Draft created. Draft Id: r-7233782721440261513
|
||||
==================================[1m Ai Message [0m==================================
|
||||
|
||||
I have drafted an email to fake@fake.com thanking them for the coffee. You can review and send it from your email draft with the subject "Thank You for the Coffee".
|
||||
|
||||
Parameters:
|
||||
api_resource: Optional. The Google API resource. Default is None.
|
||||
"""
|
||||
""" # noqa: E501
|
||||
|
||||
api_resource: Resource = Field(default_factory=build_resource_service)
|
||||
|
||||
|
||||
@@ -38,7 +38,125 @@ class RequestsToolkit(BaseToolkit):
|
||||
what network access it has.
|
||||
|
||||
See https://python.langchain.com/docs/security for more information.
|
||||
"""
|
||||
|
||||
Setup:
|
||||
Install ``langchain-community``.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U langchain-community
|
||||
|
||||
Key init args:
|
||||
requests_wrapper: langchain_community.utilities.requests.GenericRequestsWrapper
|
||||
wrapper for executing requests.
|
||||
allow_dangerous_requests: bool
|
||||
Defaults to False. Must "opt-in" to using dangerous requests by setting to True.
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.agent_toolkits.openapi.toolkit import RequestsToolkit
|
||||
from langchain_community.utilities.requests import TextRequestsWrapper
|
||||
|
||||
toolkit = RequestsToolkit(
|
||||
requests_wrapper=TextRequestsWrapper(headers={}),
|
||||
allow_dangerous_requests=ALLOW_DANGEROUS_REQUEST,
|
||||
)
|
||||
|
||||
Tools:
|
||||
.. code-block:: python
|
||||
|
||||
tools = toolkit.get_tools()
|
||||
tools
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
[RequestsGetTool(requests_wrapper=TextRequestsWrapper(headers={}, aiosession=None, auth=None, response_content_type='text', verify=True), allow_dangerous_requests=True),
|
||||
RequestsPostTool(requests_wrapper=TextRequestsWrapper(headers={}, aiosession=None, auth=None, response_content_type='text', verify=True), allow_dangerous_requests=True),
|
||||
RequestsPatchTool(requests_wrapper=TextRequestsWrapper(headers={}, aiosession=None, auth=None, response_content_type='text', verify=True), allow_dangerous_requests=True),
|
||||
RequestsPutTool(requests_wrapper=TextRequestsWrapper(headers={}, aiosession=None, auth=None, response_content_type='text', verify=True), allow_dangerous_requests=True),
|
||||
RequestsDeleteTool(requests_wrapper=TextRequestsWrapper(headers={}, aiosession=None, auth=None, response_content_type='text', verify=True), allow_dangerous_requests=True)]
|
||||
|
||||
Use within an agent:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langgraph.prebuilt import create_react_agent
|
||||
|
||||
|
||||
api_spec = \"\"\"
|
||||
openapi: 3.0.0
|
||||
info:
|
||||
title: JSONPlaceholder API
|
||||
version: 1.0.0
|
||||
servers:
|
||||
- url: https://jsonplaceholder.typicode.com
|
||||
paths:
|
||||
/posts:
|
||||
get:
|
||||
summary: Get posts
|
||||
parameters: &id001
|
||||
- name: _limit
|
||||
in: query
|
||||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
example: 2
|
||||
description: Limit the number of results
|
||||
\"\"\"
|
||||
|
||||
system_message = \"\"\"
|
||||
You have access to an API to help answer user queries.
|
||||
Here is documentation on the API:
|
||||
{api_spec}
|
||||
\"\"\".format(api_spec=api_spec)
|
||||
|
||||
llm = ChatOpenAI(model="gpt-4o-mini")
|
||||
agent_executor = create_react_agent(llm, tools, state_modifier=system_message)
|
||||
|
||||
example_query = "Fetch the top two posts. What are their titles?"
|
||||
|
||||
events = agent_executor.stream(
|
||||
{"messages": [("user", example_query)]},
|
||||
stream_mode="values",
|
||||
)
|
||||
for event in events:
|
||||
event["messages"][-1].pretty_print()
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
================================[1m Human Message [0m=================================
|
||||
|
||||
Fetch the top two posts. What are their titles?
|
||||
==================================[1m Ai Message [0m==================================
|
||||
Tool Calls:
|
||||
requests_get (call_RV2SOyzCnV5h2sm4WPgG8fND)
|
||||
Call ID: call_RV2SOyzCnV5h2sm4WPgG8fND
|
||||
Args:
|
||||
url: https://jsonplaceholder.typicode.com/posts?_limit=2
|
||||
=================================[1m Tool Message [0m=================================
|
||||
Name: requests_get
|
||||
|
||||
[
|
||||
{
|
||||
"userId": 1,
|
||||
"id": 1,
|
||||
"title": "sunt aut facere repellat provident occaecati excepturi optio reprehenderit",
|
||||
"body": "quia et suscipit..."
|
||||
},
|
||||
{
|
||||
"userId": 1,
|
||||
"id": 2,
|
||||
"title": "qui est esse",
|
||||
"body": "est rerum tempore vitae..."
|
||||
}
|
||||
]
|
||||
==================================[1m Ai Message [0m==================================
|
||||
|
||||
The titles of the top two posts are:
|
||||
1. "sunt aut facere repellat provident occaecati excepturi optio reprehenderit"
|
||||
2. "qui est esse"
|
||||
""" # noqa: E501
|
||||
|
||||
requests_wrapper: TextRequestsWrapper
|
||||
"""The requests wrapper."""
|
||||
|
||||
@@ -21,7 +21,73 @@ class SlackToolkit(BaseToolkit):
|
||||
|
||||
Parameters:
|
||||
client: The Slack client.
|
||||
"""
|
||||
|
||||
Setup:
|
||||
Install ``slack_sdk`` and set environment variable ``SLACK_USER_TOKEN``.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U slack_sdk
|
||||
export SLACK_USER_TOKEN="your-user-token"
|
||||
|
||||
Key init args:
|
||||
client: slack_sdk.WebClient
|
||||
The Slack client.
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.agent_toolkits import SlackToolkit
|
||||
|
||||
toolkit = SlackToolkit()
|
||||
|
||||
Tools:
|
||||
.. code-block:: python
|
||||
|
||||
tools = toolkit.get_tools()
|
||||
tools
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
[SlackGetChannel(client=<slack_sdk.web.client.WebClient object at 0x113caa8c0>),
|
||||
SlackGetMessage(client=<slack_sdk.web.client.WebClient object at 0x113caa4d0>),
|
||||
SlackScheduleMessage(client=<slack_sdk.web.client.WebClient object at 0x113caa440>),
|
||||
SlackSendMessage(client=<slack_sdk.web.client.WebClient object at 0x113caa410>)]
|
||||
|
||||
Use within an agent:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langgraph.prebuilt import create_react_agent
|
||||
|
||||
llm = ChatOpenAI(model="gpt-4o-mini")
|
||||
agent_executor = create_react_agent(llm, tools)
|
||||
|
||||
example_query = "When was the #general channel created?"
|
||||
|
||||
events = agent_executor.stream(
|
||||
{"messages": [("user", example_query)]},
|
||||
stream_mode="values",
|
||||
)
|
||||
for event in events:
|
||||
message = event["messages"][-1]
|
||||
if message.type != "tool": # mask sensitive information
|
||||
event["messages"][-1].pretty_print()
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
================================[1m Human Message [0m=================================
|
||||
|
||||
When was the #general channel created?
|
||||
==================================[1m Ai Message [0m==================================
|
||||
Tool Calls:
|
||||
get_channelid_name_dict (call_NXDkALjoOx97uF1v0CoZTqtJ)
|
||||
Call ID: call_NXDkALjoOx97uF1v0CoZTqtJ
|
||||
Args:
|
||||
==================================[1m Ai Message [0m==================================
|
||||
|
||||
The #general channel was created on timestamp 1671043305.
|
||||
""" # noqa: E501
|
||||
|
||||
client: WebClient = Field(default_factory=login)
|
||||
|
||||
|
||||
@@ -206,7 +206,7 @@ class ChatBaichuan(BaseChatModel):
|
||||
|
||||
Key init args — client params:
|
||||
api_key: Optional[str]
|
||||
MiniMax API key. If not passed in will be read from env var BAICHUAN_API_KEY.
|
||||
Baichuan API key. If not passed in will be read from env var BAICHUAN_API_KEY.
|
||||
base_url: Optional[str]
|
||||
Base URL for API requests.
|
||||
|
||||
|
||||
@@ -200,7 +200,7 @@ class QianfanChatEndpoint(BaseChatModel):
|
||||
("system", "你是一名专业的翻译家,可以将用户的中文翻译为英文。"),
|
||||
("human", "我喜欢编程。"),
|
||||
]
|
||||
qianfan_chat.invoke(message)
|
||||
qianfan_chat.invoke(messages)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@@ -219,6 +219,7 @@ class QianfanChatEndpoint(BaseChatModel):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
stream = chat.stream(messages)
|
||||
full = next(stream)
|
||||
for chunk in stream:
|
||||
full += chunk
|
||||
|
||||
@@ -167,7 +167,7 @@ class GPTRouter(BaseChatModel):
|
||||
"""Number of chat completions to generate for each prompt."""
|
||||
max_tokens: int = 256
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
values["gpt_router_api_base"] = get_from_dict_or_env(
|
||||
values,
|
||||
@@ -183,7 +183,10 @@ class GPTRouter(BaseChatModel):
|
||||
"GPT_ROUTER_API_KEY",
|
||||
)
|
||||
)
|
||||
return values
|
||||
|
||||
@root_validator(pre=True, skip_on_failure=True)
|
||||
def post_init(cls, values: Dict) -> Dict:
|
||||
try:
|
||||
from gpt_router.client import GPTRouterClient
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResu
|
||||
from langchain_core.pydantic_v1 import Field, SecretStr, root_validator
|
||||
from langchain_core.utils import (
|
||||
convert_to_secret_str,
|
||||
from_env,
|
||||
get_from_dict_or_env,
|
||||
get_pydantic_field_names,
|
||||
pre_init,
|
||||
@@ -99,13 +98,9 @@ class ChatHunyuan(BaseChatModel):
|
||||
def lc_serializable(self) -> bool:
|
||||
return True
|
||||
|
||||
hunyuan_app_id: Optional[int] = Field(
|
||||
default_factory=from_env("HUNYUAN_APP_ID", default=None)
|
||||
)
|
||||
hunyuan_app_id: Optional[int] = None
|
||||
"""Hunyuan App ID"""
|
||||
hunyuan_secret_id: Optional[str] = Field(
|
||||
default_factory=from_env("HUNYUAN_SECRET_ID", default=None)
|
||||
)
|
||||
hunyuan_secret_id: Optional[str] = None
|
||||
"""Hunyuan Secret ID"""
|
||||
hunyuan_secret_key: Optional[SecretStr] = None
|
||||
"""Hunyuan Secret Key"""
|
||||
@@ -168,6 +163,16 @@ class ChatHunyuan(BaseChatModel):
|
||||
|
||||
@pre_init
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
values["hunyuan_app_id"] = get_from_dict_or_env(
|
||||
values,
|
||||
"hunyuan_app_id",
|
||||
"HUNYUAN_APP_ID",
|
||||
)
|
||||
values["hunyuan_secret_id"] = get_from_dict_or_env(
|
||||
values,
|
||||
"hunyuan_secret_id",
|
||||
"HUNYUAN_SECRET_ID",
|
||||
)
|
||||
values["hunyuan_secret_key"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(
|
||||
values,
|
||||
|
||||
@@ -55,7 +55,7 @@ from langchain_core.outputs import (
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
from langchain_core.runnables import Runnable
|
||||
from langchain_core.tools import BaseTool
|
||||
from langchain_core.utils import from_env, get_from_dict_or_env, pre_init
|
||||
from langchain_core.utils import get_from_dict_or_env, pre_init
|
||||
from langchain_core.utils.function_calling import convert_to_openai_tool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -219,24 +219,12 @@ class ChatLiteLLM(BaseChatModel):
|
||||
model: str = "gpt-3.5-turbo"
|
||||
model_name: Optional[str] = None
|
||||
"""Model name to use."""
|
||||
openai_api_key: Optional[str] = Field(
|
||||
default_factory=from_env("OPENAI_API_KEY", default="")
|
||||
)
|
||||
azure_api_key: Optional[str] = Field(
|
||||
default_factory=from_env("AZURE_API_KEY", default="")
|
||||
)
|
||||
anthropic_api_key: Optional[str] = Field(
|
||||
default_factory=from_env("ANTHROPIC_API_KEY", default="")
|
||||
)
|
||||
replicate_api_key: Optional[str] = Field(
|
||||
default_factory=from_env("REPLICATE_API_KEY", default="")
|
||||
)
|
||||
cohere_api_key: Optional[str] = Field(
|
||||
default_factory=from_env("COHERE_API_KEY", default="")
|
||||
)
|
||||
openrouter_api_key: Optional[str] = Field(
|
||||
default_factory=from_env("OPENROUTER_API_KEY", default="")
|
||||
)
|
||||
openai_api_key: Optional[str] = None
|
||||
azure_api_key: Optional[str] = None
|
||||
anthropic_api_key: Optional[str] = None
|
||||
replicate_api_key: Optional[str] = None
|
||||
cohere_api_key: Optional[str] = None
|
||||
openrouter_api_key: Optional[str] = None
|
||||
streaming: bool = False
|
||||
api_base: Optional[str] = None
|
||||
organization: Optional[str] = None
|
||||
@@ -314,6 +302,24 @@ class ChatLiteLLM(BaseChatModel):
|
||||
"Please install it with `pip install litellm`"
|
||||
)
|
||||
|
||||
values["openai_api_key"] = get_from_dict_or_env(
|
||||
values, "openai_api_key", "OPENAI_API_KEY", default=""
|
||||
)
|
||||
values["azure_api_key"] = get_from_dict_or_env(
|
||||
values, "azure_api_key", "AZURE_API_KEY", default=""
|
||||
)
|
||||
values["anthropic_api_key"] = get_from_dict_or_env(
|
||||
values, "anthropic_api_key", "ANTHROPIC_API_KEY", default=""
|
||||
)
|
||||
values["replicate_api_key"] = get_from_dict_or_env(
|
||||
values, "replicate_api_key", "REPLICATE_API_KEY", default=""
|
||||
)
|
||||
values["openrouter_api_key"] = get_from_dict_or_env(
|
||||
values, "openrouter_api_key", "OPENROUTER_API_KEY", default=""
|
||||
)
|
||||
values["cohere_api_key"] = get_from_dict_or_env(
|
||||
values, "cohere_api_key", "COHERE_API_KEY", default=""
|
||||
)
|
||||
values["huggingface_api_key"] = get_from_dict_or_env(
|
||||
values, "huggingface_api_key", "HUGGINGFACE_API_KEY", default=""
|
||||
)
|
||||
|
||||
@@ -387,7 +387,7 @@ class MiniMaxChat(BaseChatModel):
|
||||
class Config:
|
||||
allow_population_by_field_name = True
|
||||
|
||||
@root_validator(pre=True, allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
values["minimax_api_key"] = convert_to_secret_str(
|
||||
|
||||
@@ -3,12 +3,7 @@
|
||||
from typing import Dict
|
||||
|
||||
from langchain_core.pydantic_v1 import Field, SecretStr
|
||||
from langchain_core.utils import (
|
||||
convert_to_secret_str,
|
||||
from_env,
|
||||
get_from_dict_or_env,
|
||||
pre_init,
|
||||
)
|
||||
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env, pre_init
|
||||
|
||||
from langchain_community.chat_models.openai import ChatOpenAI
|
||||
from langchain_community.utils.openai import is_openai_v1
|
||||
@@ -36,13 +31,9 @@ class ChatOctoAI(ChatOpenAI):
|
||||
chat = ChatOctoAI(model_name="mixtral-8x7b-instruct")
|
||||
"""
|
||||
|
||||
octoai_api_base: str = Field(
|
||||
default_factory=from_env("OCTOAI_API_BASE", default=DEFAULT_API_BASE)
|
||||
)
|
||||
octoai_api_base: str = Field(default=DEFAULT_API_BASE)
|
||||
octoai_api_token: SecretStr = Field(default=None)
|
||||
model_name: str = Field(
|
||||
default_factory=from_env("MODEL_NAME", default=DEFAULT_MODEL)
|
||||
)
|
||||
model_name: str = Field(default=DEFAULT_MODEL)
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
@@ -60,9 +51,21 @@ class ChatOctoAI(ChatOpenAI):
|
||||
@pre_init
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
values["octoai_api_base"] = get_from_dict_or_env(
|
||||
values,
|
||||
"octoai_api_base",
|
||||
"OCTOAI_API_BASE",
|
||||
default=DEFAULT_API_BASE,
|
||||
)
|
||||
values["octoai_api_token"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(values, "octoai_api_token", "OCTOAI_API_TOKEN")
|
||||
)
|
||||
values["model_name"] = get_from_dict_or_env(
|
||||
values,
|
||||
"model_name",
|
||||
"MODEL_NAME",
|
||||
default=DEFAULT_MODEL,
|
||||
)
|
||||
|
||||
try:
|
||||
import openai
|
||||
|
||||
@@ -47,7 +47,7 @@ from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResu
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field, root_validator
|
||||
from langchain_core.runnables import Runnable
|
||||
from langchain_core.utils import (
|
||||
from_env,
|
||||
get_from_dict_or_env,
|
||||
get_pydantic_field_names,
|
||||
pre_init,
|
||||
)
|
||||
@@ -205,9 +205,7 @@ class ChatOpenAI(BaseChatModel):
|
||||
# When updating this to use a SecretStr
|
||||
# Check for classes that derive from this class (as some of them
|
||||
# may assume openai_api_key is a str)
|
||||
openai_api_key: Optional[str] = Field(
|
||||
default_factory=from_env("OPENAI_API_KEY", default=None), alias="api_key"
|
||||
)
|
||||
openai_api_key: Optional[str] = Field(default=None, alias="api_key")
|
||||
"""Automatically inferred from env var `OPENAI_API_KEY` if not provided."""
|
||||
openai_api_base: Optional[str] = Field(default=None, alias="base_url")
|
||||
"""Base URL path for API requests, leave blank if not using a proxy or service
|
||||
@@ -215,9 +213,7 @@ class ChatOpenAI(BaseChatModel):
|
||||
openai_organization: Optional[str] = Field(default=None, alias="organization")
|
||||
"""Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""
|
||||
# to support explicit proxy for OpenAI
|
||||
openai_proxy: Optional[str] = Field(
|
||||
default_factory=from_env("OPENAI_PROXY", default="")
|
||||
)
|
||||
openai_proxy: Optional[str] = None
|
||||
request_timeout: Union[float, Tuple[float, float], Any, None] = Field(
|
||||
default=None, alias="timeout"
|
||||
)
|
||||
@@ -285,6 +281,9 @@ class ChatOpenAI(BaseChatModel):
|
||||
if values["n"] > 1 and values["streaming"]:
|
||||
raise ValueError("n must be 1 when streaming.")
|
||||
|
||||
values["openai_api_key"] = get_from_dict_or_env(
|
||||
values, "openai_api_key", "OPENAI_API_KEY"
|
||||
)
|
||||
# Check OPENAI_ORGANIZATION for backwards compatibility.
|
||||
values["openai_organization"] = (
|
||||
values["openai_organization"]
|
||||
@@ -294,6 +293,12 @@ class ChatOpenAI(BaseChatModel):
|
||||
values["openai_api_base"] = values["openai_api_base"] or os.getenv(
|
||||
"OPENAI_API_BASE"
|
||||
)
|
||||
values["openai_proxy"] = get_from_dict_or_env(
|
||||
values,
|
||||
"openai_proxy",
|
||||
"OPENAI_PROXY",
|
||||
default="",
|
||||
)
|
||||
try:
|
||||
import openai
|
||||
|
||||
|
||||
@@ -88,7 +88,7 @@ class ChatPerplexity(BaseChatModel):
|
||||
def lc_secrets(self) -> Dict[str, str]:
|
||||
return {"pplx_api_key": "PPLX_API_KEY"}
|
||||
|
||||
@root_validator(pre=True, allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Build extra kwargs from additional params that were passed in."""
|
||||
all_required_field_names = get_pydantic_field_names(cls)
|
||||
@@ -114,7 +114,7 @@ class ChatPerplexity(BaseChatModel):
|
||||
values["model_kwargs"] = extra
|
||||
return values
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
@root_validator(pre=False, skip_on_failure=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
values["pplx_api_key"] = get_from_dict_or_env(
|
||||
|
||||
@@ -14,7 +14,6 @@ from langchain_core.outputs import ChatGeneration, ChatResult
|
||||
from langchain_core.pydantic_v1 import Field, SecretStr, root_validator
|
||||
from langchain_core.utils import (
|
||||
convert_to_secret_str,
|
||||
from_env,
|
||||
get_from_dict_or_env,
|
||||
get_pydantic_field_names,
|
||||
pre_init,
|
||||
@@ -112,31 +111,19 @@ class ChatSnowflakeCortex(BaseChatModel):
|
||||
cumulative probabilities. Value should be ranging between 0.0 and 1.0.
|
||||
"""
|
||||
|
||||
snowflake_username: Optional[str] = Field(
|
||||
default_factory=from_env("SNOWFLAKE_USERNAME", default=None), alias="username"
|
||||
)
|
||||
snowflake_username: Optional[str] = Field(default=None, alias="username")
|
||||
"""Automatically inferred from env var `SNOWFLAKE_USERNAME` if not provided."""
|
||||
snowflake_password: Optional[SecretStr] = Field(default=None, alias="password")
|
||||
"""Automatically inferred from env var `SNOWFLAKE_PASSWORD` if not provided."""
|
||||
snowflake_account: Optional[str] = Field(
|
||||
default_factory=from_env("SNOWFLAKE_ACCOUNT", default=None), alias="account"
|
||||
)
|
||||
snowflake_account: Optional[str] = Field(default=None, alias="account")
|
||||
"""Automatically inferred from env var `SNOWFLAKE_ACCOUNT` if not provided."""
|
||||
snowflake_database: Optional[str] = Field(
|
||||
default_factory=from_env("SNOWFLAKE_DATABASE", default=None), alias="database"
|
||||
)
|
||||
snowflake_database: Optional[str] = Field(default=None, alias="database")
|
||||
"""Automatically inferred from env var `SNOWFLAKE_DATABASE` if not provided."""
|
||||
snowflake_schema: Optional[str] = Field(
|
||||
default_factory=from_env("SNOWFLAKE_SCHEMA", default=None), alias="schema"
|
||||
)
|
||||
snowflake_schema: Optional[str] = Field(default=None, alias="schema")
|
||||
"""Automatically inferred from env var `SNOWFLAKE_SCHEMA` if not provided."""
|
||||
snowflake_warehouse: Optional[str] = Field(
|
||||
default_factory=from_env("SNOWFLAKE_WAREHOUSE", default=None), alias="warehouse"
|
||||
)
|
||||
snowflake_warehouse: Optional[str] = Field(default=None, alias="warehouse")
|
||||
"""Automatically inferred from env var `SNOWFLAKE_WAREHOUSE` if not provided."""
|
||||
snowflake_role: Optional[str] = Field(
|
||||
default_factory=from_env("SNOWFLAKE_ROLE", default=None), alias="role"
|
||||
)
|
||||
snowflake_role: Optional[str] = Field(default=None, alias="role")
|
||||
"""Automatically inferred from env var `SNOWFLAKE_ROLE` if not provided."""
|
||||
|
||||
@root_validator(pre=True)
|
||||
@@ -159,9 +146,27 @@ class ChatSnowflakeCortex(BaseChatModel):
|
||||
"`pip install snowflake-snowpark-python`"
|
||||
)
|
||||
|
||||
values["snowflake_username"] = get_from_dict_or_env(
|
||||
values, "snowflake_username", "SNOWFLAKE_USERNAME"
|
||||
)
|
||||
values["snowflake_password"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(values, "snowflake_password", "SNOWFLAKE_PASSWORD")
|
||||
)
|
||||
values["snowflake_account"] = get_from_dict_or_env(
|
||||
values, "snowflake_account", "SNOWFLAKE_ACCOUNT"
|
||||
)
|
||||
values["snowflake_database"] = get_from_dict_or_env(
|
||||
values, "snowflake_database", "SNOWFLAKE_DATABASE"
|
||||
)
|
||||
values["snowflake_schema"] = get_from_dict_or_env(
|
||||
values, "snowflake_schema", "SNOWFLAKE_SCHEMA"
|
||||
)
|
||||
values["snowflake_warehouse"] = get_from_dict_or_env(
|
||||
values, "snowflake_warehouse", "SNOWFLAKE_WAREHOUSE"
|
||||
)
|
||||
values["snowflake_role"] = get_from_dict_or_env(
|
||||
values, "snowflake_role", "SNOWFLAKE_ROLE"
|
||||
)
|
||||
|
||||
connection_params = {
|
||||
"account": values["snowflake_account"],
|
||||
|
||||
@@ -126,9 +126,9 @@ class ChatSparkLLM(BaseChatModel):
|
||||
|
||||
from langchain_community.chat_models import ChatSparkLLM
|
||||
|
||||
chat = MiniMaxChat(
|
||||
api_key=api_key,
|
||||
api_secret=ak,
|
||||
chat = ChatSparkLLM(
|
||||
api_key="your-api-key",
|
||||
api_secret="your-api-secret",
|
||||
model='Spark4.0 Ultra',
|
||||
# temperature=...,
|
||||
# other params...
|
||||
|
||||
@@ -7,12 +7,25 @@ import logging
|
||||
import time
|
||||
from collections.abc import AsyncIterator, Iterator
|
||||
from contextlib import asynccontextmanager, contextmanager
|
||||
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
||||
from operator import itemgetter
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
List,
|
||||
Literal,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
Type,
|
||||
Union,
|
||||
)
|
||||
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManagerForLLMRun,
|
||||
CallbackManagerForLLMRun,
|
||||
)
|
||||
from langchain_core.language_models import LanguageModelInput
|
||||
from langchain_core.language_models.chat_models import (
|
||||
BaseChatModel,
|
||||
agenerate_from_stream,
|
||||
@@ -30,9 +43,17 @@ from langchain_core.messages import (
|
||||
SystemMessage,
|
||||
SystemMessageChunk,
|
||||
)
|
||||
from langchain_core.output_parsers.base import OutputParserLike
|
||||
from langchain_core.output_parsers.openai_tools import (
|
||||
JsonOutputKeyToolsParser,
|
||||
PydanticToolsParser,
|
||||
)
|
||||
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field, root_validator
|
||||
from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough
|
||||
from langchain_core.tools import BaseTool
|
||||
from langchain_core.utils import get_from_dict_or_env
|
||||
from langchain_core.utils.function_calling import convert_to_openai_tool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -40,6 +61,10 @@ API_TOKEN_TTL_SECONDS = 3 * 60
|
||||
ZHIPUAI_API_BASE = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
|
||||
|
||||
|
||||
def _is_pydantic_class(obj: Any) -> bool:
|
||||
return isinstance(obj, type) and issubclass(obj, BaseModel)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def connect_sse(client: Any, method: str, url: str, **kwargs: Any) -> Iterator:
|
||||
"""Context manager for connecting to an SSE stream.
|
||||
@@ -199,7 +224,7 @@ class ChatZhipuAI(BaseChatModel):
|
||||
|
||||
Key init args — completion params:
|
||||
model: Optional[str]
|
||||
Name of OpenAI model to use.
|
||||
Name of ZhipuAI model to use.
|
||||
temperature: float
|
||||
Sampling temperature.
|
||||
max_tokens: Optional[int]
|
||||
@@ -207,9 +232,9 @@ class ChatZhipuAI(BaseChatModel):
|
||||
|
||||
Key init args — client params:
|
||||
api_key: Optional[str]
|
||||
ZhipuAI API key. If not passed in will be read from env var ZHIPUAI_API_KEY.
|
||||
ZhipuAI API key. If not passed in will be read from env var ZHIPUAI_API_KEY.
|
||||
api_base: Optional[str]
|
||||
Base URL for API requests.
|
||||
Base URL for API requests.
|
||||
|
||||
See full list of supported init args and their descriptions in the params section.
|
||||
|
||||
@@ -255,7 +280,7 @@ class ChatZhipuAI(BaseChatModel):
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
stream = llm.stream(messages)
|
||||
stream = zhipuai_chat.stream(messages)
|
||||
full = next(stream)
|
||||
for chunk in stream:
|
||||
full += chunk
|
||||
@@ -587,3 +612,178 @@ class ChatZhipuAI(BaseChatModel):
|
||||
|
||||
if finish_reason is not None:
|
||||
break
|
||||
|
||||
def bind_tools(
|
||||
self,
|
||||
tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]],
|
||||
*,
|
||||
tool_choice: Optional[
|
||||
Union[dict, str, Literal["auto", "any", "none"], bool]
|
||||
] = None,
|
||||
**kwargs: Any,
|
||||
) -> Runnable[LanguageModelInput, BaseMessage]:
|
||||
"""Bind tool-like objects to this chat model.
|
||||
Args:
|
||||
tools: A list of tool definitions to bind to this chat model.
|
||||
Can be a dictionary, pydantic model, callable, or BaseTool. Pydantic
|
||||
models, callables, and BaseTools will be automatically converted to
|
||||
their schema dictionary representation.
|
||||
tool_choice: Currently this can only be auto for this chat model.
|
||||
**kwargs: Any additional parameters to pass to the
|
||||
:class:`~langchain.runnable.Runnable` constructor.
|
||||
"""
|
||||
if self.model_name == "glm-4v":
|
||||
raise ValueError("glm-4v currently does not support tool calling")
|
||||
|
||||
formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
|
||||
if tool_choice and tool_choice != "auto":
|
||||
raise ValueError("ChatZhipuAI currently only supports `auto` tool choice")
|
||||
elif tool_choice and tool_choice == "auto":
|
||||
kwargs["tool_choice"] = tool_choice
|
||||
return self.bind(tools=formatted_tools, **kwargs)
|
||||
|
||||
def with_structured_output(
|
||||
self,
|
||||
schema: Optional[Union[Dict, Type[BaseModel]]] = None,
|
||||
*,
|
||||
method: Literal["function_calling", "json_mode"] = "function_calling",
|
||||
include_raw: bool = False,
|
||||
**kwargs: Any,
|
||||
) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]:
|
||||
"""Model wrapper that returns outputs formatted to match the given schema.
|
||||
|
||||
Args:
|
||||
schema: The output schema as a dict or a Pydantic class. If a Pydantic class
|
||||
then the model output will be an object of that class. If a dict then
|
||||
the model output will be a dict. With a Pydantic class the returned
|
||||
attributes will be validated, whereas with a dict they will not be. If
|
||||
`method` is "function_calling" and `schema` is a dict, then the dict
|
||||
must match the OpenAI function-calling spec.
|
||||
method: The method for steering model generation, either "function_calling"
|
||||
or "json_mode". ZhipuAI only supports "function_calling" which
|
||||
converts the schema to a OpenAI function and the model will make use of the
|
||||
function-calling API.
|
||||
include_raw: If False then only the parsed structured output is returned. If
|
||||
an error occurs during model output parsing it will be raised. If True
|
||||
then both the raw model response (a BaseMessage) and the parsed model
|
||||
response will be returned. If an error occurs during output parsing it
|
||||
will be caught and returned as well. The final output is always a dict
|
||||
with keys "raw", "parsed", and "parsing_error".
|
||||
|
||||
Returns:
|
||||
A Runnable that takes any ChatModel input and returns as output:
|
||||
|
||||
If include_raw is True then a dict with keys:
|
||||
raw: BaseMessage
|
||||
parsed: Optional[_DictOrPydantic]
|
||||
parsing_error: Optional[BaseException]
|
||||
|
||||
If include_raw is False then just _DictOrPydantic is returned,
|
||||
where _DictOrPydantic depends on the schema:
|
||||
|
||||
If schema is a Pydantic class then _DictOrPydantic is the Pydantic
|
||||
class.
|
||||
|
||||
If schema is a dict then _DictOrPydantic is a dict.
|
||||
|
||||
Example: Function-calling, Pydantic schema (method="function_calling", include_raw=False):
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.chat_models import ChatZhipuAI
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
answer: str
|
||||
justification: str
|
||||
|
||||
llm = ChatZhipuAI(temperature=0)
|
||||
structured_llm = llm.with_structured_output(AnswerWithJustification)
|
||||
|
||||
structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
|
||||
# -> AnswerWithJustification(
|
||||
# answer='A pound of bricks and a pound of feathers weigh the same.'
|
||||
# justification="Both a pound of bricks and a pound of feathers have been defined to have the same weight. The 'pound' is a unit of weight, so any two things that are described as weighing a pound will weigh the same."
|
||||
# )
|
||||
|
||||
Example: Function-calling, Pydantic schema (method="function_calling", include_raw=True):
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.chat_models import ChatZhipuAI
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
answer: str
|
||||
justification: str
|
||||
|
||||
llm = ChatZhipuAI(temperature=0)
|
||||
structured_llm = llm.with_structured_output(AnswerWithJustification, include_raw=True)
|
||||
|
||||
structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
|
||||
# -> {
|
||||
# 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_01htjn3cspevxbqc1d7nkk8wab', 'function': {'arguments': '{"answer": "A pound of bricks and a pound of feathers weigh the same.", "justification": "Both a pound of bricks and a pound of feathers have been defined to have the same weight. The \'pound\' is a unit of weight, so any two things that are described as weighing a pound will weigh the same.", "unit": "pounds"}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}, id='run-456beee6-65f6-4e80-88af-a6065480822c-0'),
|
||||
# 'parsed': AnswerWithJustification(answer='A pound of bricks and a pound of feathers weigh the same.', justification="Both a pound of bricks and a pound of feathers have been defined to have the same weight. The 'pound' is a unit of weight, so any two things that are described as weighing a pound will weigh the same."),
|
||||
# 'parsing_error': None
|
||||
# }
|
||||
|
||||
Example: Function-calling, dict schema (method="function_calling", include_raw=False):
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.chat_models import ChatZhipuAI
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
from langchain_core.utils.function_calling import convert_to_openai_tool
|
||||
|
||||
class AnswerWithJustification(BaseModel):
|
||||
'''An answer to the user question along with justification for the answer.'''
|
||||
answer: str
|
||||
justification: str
|
||||
|
||||
dict_schema = convert_to_openai_tool(AnswerWithJustification)
|
||||
llm = ChatZhipuAI(temperature=0)
|
||||
structured_llm = llm.with_structured_output(dict_schema)
|
||||
|
||||
structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers")
|
||||
# -> {
|
||||
# 'answer': 'A pound of bricks and a pound of feathers weigh the same.',
|
||||
# 'justification': "Both a pound of bricks and a pound of feathers have been defined to have the same weight. The 'pound' is a unit of weight, so any two things that are described as weighing a pound will weigh the same.", 'unit': 'pounds'}
|
||||
# }
|
||||
|
||||
""" # noqa: E501
|
||||
if kwargs:
|
||||
raise ValueError(f"Received unsupported arguments {kwargs}")
|
||||
is_pydantic_schema = _is_pydantic_class(schema)
|
||||
if method == "function_calling":
|
||||
if schema is None:
|
||||
raise ValueError(
|
||||
"schema must be specified when method is 'function_calling'. "
|
||||
"Received None."
|
||||
)
|
||||
tool_name = convert_to_openai_tool(schema)["function"]["name"]
|
||||
llm = self.bind_tools([schema], tool_choice="auto")
|
||||
if is_pydantic_schema:
|
||||
output_parser: OutputParserLike = PydanticToolsParser(
|
||||
tools=[schema], # type: ignore[list-item]
|
||||
first_tool_only=True, # type: ignore[list-item]
|
||||
)
|
||||
else:
|
||||
output_parser = JsonOutputKeyToolsParser(
|
||||
key_name=tool_name, first_tool_only=True
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"""Unrecognized method argument. Expected 'function_calling'.
|
||||
Received: '{method}'"""
|
||||
)
|
||||
|
||||
if include_raw:
|
||||
parser_assign = RunnablePassthrough.assign(
|
||||
parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None
|
||||
)
|
||||
parser_none = RunnablePassthrough.assign(parsed=lambda _: None)
|
||||
parser_with_fallback = parser_assign.with_fallbacks(
|
||||
[parser_none], exception_key="parsing_error"
|
||||
)
|
||||
return RunnableMap(raw=llm) | parser_with_fallback
|
||||
else:
|
||||
return llm | output_parser
|
||||
|
||||
@@ -63,7 +63,10 @@ class FireCrawlLoader(BaseLoader):
|
||||
f"Unrecognized mode '{self.mode}'. Expected one of 'crawl', 'scrape'."
|
||||
)
|
||||
for doc in firecrawl_docs:
|
||||
yield Document(
|
||||
page_content=doc.get("markdown", ""),
|
||||
metadata=doc.get("metadata", {}),
|
||||
)
|
||||
metadata = doc.get("metadata", {})
|
||||
if (self.params is not None) and self.params.get(
|
||||
"extractorOptions", {}
|
||||
).get("mode") == "llm-extraction":
|
||||
metadata["llm_extraction"] = doc.get("llm_extraction")
|
||||
|
||||
yield Document(page_content=doc.get("markdown", ""), metadata=metadata)
|
||||
|
||||
@@ -21,7 +21,7 @@ class BaseGitHubLoader(BaseLoader, BaseModel, ABC):
|
||||
github_api_url: str = "https://api.github.com"
|
||||
"""URL of GitHub API"""
|
||||
|
||||
@root_validator(pre=True, allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that access token exists in environment."""
|
||||
values["access_token"] = get_from_dict_or_env(
|
||||
|
||||
@@ -5,12 +5,7 @@ from __future__ import annotations
|
||||
from typing import Dict
|
||||
|
||||
from langchain_core.pydantic_v1 import Field, SecretStr
|
||||
from langchain_core.utils import (
|
||||
convert_to_secret_str,
|
||||
from_env,
|
||||
get_from_dict_or_env,
|
||||
pre_init,
|
||||
)
|
||||
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env, pre_init
|
||||
|
||||
from langchain_community.embeddings.openai import OpenAIEmbeddings
|
||||
from langchain_community.utils.openai import is_openai_v1
|
||||
@@ -26,9 +21,7 @@ class AnyscaleEmbeddings(OpenAIEmbeddings):
|
||||
"""AnyScale Endpoints API keys."""
|
||||
model: str = Field(default=DEFAULT_MODEL)
|
||||
"""Model name to use."""
|
||||
anyscale_api_base: str = Field(
|
||||
default_factory=from_env("ANYSCALE_API_BASE", default=DEFAULT_API_BASE)
|
||||
)
|
||||
anyscale_api_base: str = Field(default=DEFAULT_API_BASE)
|
||||
"""Base URL path for API requests."""
|
||||
tiktoken_enabled: bool = False
|
||||
"""Set this to False for non-OpenAI implementations of the embeddings API"""
|
||||
@@ -51,6 +44,12 @@ class AnyscaleEmbeddings(OpenAIEmbeddings):
|
||||
"ANYSCALE_API_KEY",
|
||||
)
|
||||
)
|
||||
values["anyscale_api_base"] = get_from_dict_or_env(
|
||||
values,
|
||||
"anyscale_api_base",
|
||||
"ANYSCALE_API_BASE",
|
||||
default=DEFAULT_API_BASE,
|
||||
)
|
||||
try:
|
||||
import openai
|
||||
|
||||
|
||||
@@ -3,7 +3,11 @@ from typing import Any, Dict, List, Optional
|
||||
import requests
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field, SecretStr, root_validator
|
||||
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
|
||||
from langchain_core.utils import (
|
||||
convert_to_secret_str,
|
||||
get_from_dict_or_env,
|
||||
secret_from_env,
|
||||
)
|
||||
from requests import RequestException
|
||||
|
||||
BAICHUAN_API_URL: str = "http://api.baichuan-ai.com/v1/embeddings"
|
||||
@@ -53,7 +57,10 @@ class BaichuanTextEmbeddings(BaseModel, Embeddings):
|
||||
session: Any #: :meta private:
|
||||
model_name: str = Field(default="Baichuan-Text-Embedding", alias="model")
|
||||
"""The model used to embed the documents."""
|
||||
baichuan_api_key: Optional[SecretStr] = Field(default=None, alias="api_key")
|
||||
baichuan_api_key: Optional[SecretStr] = Field(
|
||||
alias="api_key",
|
||||
default_factory=secret_from_env("BAICHUAN_API_KEY", default=None),
|
||||
)
|
||||
"""Automatically inferred from env var `BAICHUAN_API_KEY` if not provided."""
|
||||
chunk_size: int = 16
|
||||
"""Chunk size when multiple texts are input"""
|
||||
@@ -61,22 +68,21 @@ class BaichuanTextEmbeddings(BaseModel, Embeddings):
|
||||
class Config:
|
||||
allow_population_by_field_name = True
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
@root_validator(pre=False, skip_on_failure=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that auth token exists in environment."""
|
||||
try:
|
||||
if values["baichuan_api_key"] is None:
|
||||
# This is likely here for some backwards compatibility with
|
||||
# BAICHUAN_AUTH_TOKEN
|
||||
baichuan_api_key = convert_to_secret_str(
|
||||
get_from_dict_or_env(values, "baichuan_api_key", "BAICHUAN_API_KEY")
|
||||
)
|
||||
except ValueError as original_exc:
|
||||
try:
|
||||
baichuan_api_key = convert_to_secret_str(
|
||||
get_from_dict_or_env(
|
||||
values, "baichuan_auth_token", "BAICHUAN_AUTH_TOKEN"
|
||||
)
|
||||
get_from_dict_or_env(
|
||||
values, "baichuan_auth_token", "BAICHUAN_AUTH_TOKEN"
|
||||
)
|
||||
except ValueError:
|
||||
raise original_exc
|
||||
)
|
||||
values["baichuan_api_key"] = baichuan_api_key
|
||||
else:
|
||||
baichuan_api_key = values["baichuan_api_key"]
|
||||
|
||||
session = requests.Session()
|
||||
session.headers.update(
|
||||
{
|
||||
|
||||
@@ -56,7 +56,7 @@ class ClovaEmbeddings(BaseModel, Embeddings):
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
|
||||
@root_validator(pre=True, allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate api key exists in environment."""
|
||||
values["clova_emb_api_key"] = convert_to_secret_str(
|
||||
|
||||
@@ -6,9 +6,9 @@ from typing import Dict, List, Optional
|
||||
import requests
|
||||
from langchain_core._api.deprecation import deprecated
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
from langchain_core.runnables.config import run_in_executor
|
||||
from langchain_core.utils import from_env, pre_init
|
||||
from langchain_core.utils import get_from_dict_or_env, pre_init
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -20,15 +20,9 @@ logger = logging.getLogger(__name__)
|
||||
class ErnieEmbeddings(BaseModel, Embeddings):
|
||||
"""`Ernie Embeddings V1` embedding models."""
|
||||
|
||||
ernie_api_base: Optional[str] = Field(
|
||||
default_factory=from_env("ERNIE_API_BASE", default="https://aip.baidubce.com")
|
||||
)
|
||||
ernie_client_id: Optional[str] = Field(
|
||||
default_factory=from_env("ERNIE_CLIENT_ID", default=None)
|
||||
)
|
||||
ernie_client_secret: Optional[str] = Field(
|
||||
default_factory=from_env("ERNIE_CLIENT_SECRET", default=None)
|
||||
)
|
||||
ernie_api_base: Optional[str] = None
|
||||
ernie_client_id: Optional[str] = None
|
||||
ernie_client_secret: Optional[str] = None
|
||||
access_token: Optional[str] = None
|
||||
|
||||
chunk_size: int = 16
|
||||
@@ -39,6 +33,19 @@ class ErnieEmbeddings(BaseModel, Embeddings):
|
||||
|
||||
@pre_init
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
values["ernie_api_base"] = get_from_dict_or_env(
|
||||
values, "ernie_api_base", "ERNIE_API_BASE", "https://aip.baidubce.com"
|
||||
)
|
||||
values["ernie_client_id"] = get_from_dict_or_env(
|
||||
values,
|
||||
"ernie_client_id",
|
||||
"ERNIE_CLIENT_ID",
|
||||
)
|
||||
values["ernie_client_secret"] = get_from_dict_or_env(
|
||||
values,
|
||||
"ernie_client_secret",
|
||||
"ERNIE_CLIENT_SECRET",
|
||||
)
|
||||
return values
|
||||
|
||||
def _embedding(self, json: object) -> dict:
|
||||
|
||||
@@ -53,7 +53,7 @@ class GradientEmbeddings(BaseModel, Embeddings):
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
|
||||
@@ -65,8 +65,15 @@ class GradientEmbeddings(BaseModel, Embeddings):
|
||||
)
|
||||
|
||||
values["gradient_api_url"] = get_from_dict_or_env(
|
||||
values, "gradient_api_url", "GRADIENT_API_URL"
|
||||
values,
|
||||
"gradient_api_url",
|
||||
"GRADIENT_API_URL",
|
||||
default="https://api.gradient.ai/api",
|
||||
)
|
||||
return values
|
||||
|
||||
@root_validator(pre=False, skip_on_failure=True)
|
||||
def post_init(cls, values: Dict) -> Dict:
|
||||
try:
|
||||
import gradientai
|
||||
except ImportError:
|
||||
@@ -85,7 +92,6 @@ class GradientEmbeddings(BaseModel, Embeddings):
|
||||
host=values["gradient_api_url"],
|
||||
)
|
||||
values["client"] = gradient.get_embeddings_model(slug=values["model"])
|
||||
|
||||
return values
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
|
||||
@@ -47,7 +47,7 @@ class InfinityEmbeddings(BaseModel, Embeddings):
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ class InfinityEmbeddingsLocal(BaseModel, Embeddings):
|
||||
class Config:
|
||||
extra = "forbid"
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
@root_validator(pre=False, skip_on_failure=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
|
||||
|
||||
@@ -1,12 +1,7 @@
|
||||
from typing import Dict
|
||||
|
||||
from langchain_core.pydantic_v1 import Field, SecretStr
|
||||
from langchain_core.utils import (
|
||||
convert_to_secret_str,
|
||||
from_env,
|
||||
get_from_dict_or_env,
|
||||
pre_init,
|
||||
)
|
||||
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env, pre_init
|
||||
|
||||
from langchain_community.embeddings.openai import OpenAIEmbeddings
|
||||
from langchain_community.utils.openai import is_openai_v1
|
||||
@@ -27,11 +22,9 @@ class OctoAIEmbeddings(OpenAIEmbeddings):
|
||||
|
||||
octoai_api_token: SecretStr = Field(default=None)
|
||||
"""OctoAI Endpoints API keys."""
|
||||
endpoint_url: str = Field(
|
||||
default_factory=from_env("ENDPOINT_URL", default=DEFAULT_API_BASE)
|
||||
)
|
||||
endpoint_url: str = Field(default=DEFAULT_API_BASE)
|
||||
"""Base URL path for API requests."""
|
||||
model: str = Field(default_factory=from_env("MODEL", default=DEFAULT_MODEL))
|
||||
model: str = Field(default=DEFAULT_MODEL)
|
||||
"""Model name to use."""
|
||||
tiktoken_enabled: bool = False
|
||||
"""Set this to False for non-OpenAI implementations of the embeddings API"""
|
||||
@@ -48,9 +41,21 @@ class OctoAIEmbeddings(OpenAIEmbeddings):
|
||||
@pre_init
|
||||
def validate_environment(cls, values: dict) -> dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
values["endpoint_url"] = get_from_dict_or_env(
|
||||
values,
|
||||
"endpoint_url",
|
||||
"ENDPOINT_URL",
|
||||
default=DEFAULT_API_BASE,
|
||||
)
|
||||
values["octoai_api_token"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(values, "octoai_api_token", "OCTOAI_API_TOKEN")
|
||||
)
|
||||
values["model"] = get_from_dict_or_env(
|
||||
values,
|
||||
"model",
|
||||
"MODEL",
|
||||
default=DEFAULT_MODEL,
|
||||
)
|
||||
|
||||
try:
|
||||
import openai
|
||||
|
||||
@@ -23,7 +23,6 @@ from langchain_core._api.deprecation import deprecated
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field, root_validator
|
||||
from langchain_core.utils import (
|
||||
from_env,
|
||||
get_from_dict_or_env,
|
||||
get_pydantic_field_names,
|
||||
pre_init,
|
||||
@@ -203,18 +202,12 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
"""Base URL path for API requests, leave blank if not using a proxy or service
|
||||
emulator."""
|
||||
# to support Azure OpenAI Service custom endpoints
|
||||
openai_api_type: Optional[str] = Field(
|
||||
default_factory=from_env("OPENAI_API_TYPE", default="")
|
||||
)
|
||||
openai_api_type: Optional[str] = None
|
||||
# to support explicit proxy for OpenAI
|
||||
openai_proxy: Optional[str] = Field(
|
||||
default_factory=from_env("OPENAI_PROXY", default="")
|
||||
)
|
||||
openai_proxy: Optional[str] = None
|
||||
embedding_ctx_length: int = 8191
|
||||
"""The maximum number of tokens to embed at once."""
|
||||
openai_api_key: Optional[str] = Field(
|
||||
default_factory=from_env("OPENAI_API_KEY"), alias="api_key"
|
||||
)
|
||||
openai_api_key: Optional[str] = Field(default=None, alias="api_key")
|
||||
"""Automatically inferred from env var `OPENAI_API_KEY` if not provided."""
|
||||
openai_organization: Optional[str] = Field(default=None, alias="organization")
|
||||
"""Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""
|
||||
@@ -294,9 +287,24 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
@pre_init
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
values["openai_api_key"] = get_from_dict_or_env(
|
||||
values, "openai_api_key", "OPENAI_API_KEY"
|
||||
)
|
||||
values["openai_api_base"] = values["openai_api_base"] or os.getenv(
|
||||
"OPENAI_API_BASE"
|
||||
)
|
||||
values["openai_api_type"] = get_from_dict_or_env(
|
||||
values,
|
||||
"openai_api_type",
|
||||
"OPENAI_API_TYPE",
|
||||
default="",
|
||||
)
|
||||
values["openai_proxy"] = get_from_dict_or_env(
|
||||
values,
|
||||
"openai_proxy",
|
||||
"OPENAI_PROXY",
|
||||
default="",
|
||||
)
|
||||
if values["openai_api_type"] in ("azure", "azure_ad", "azuread"):
|
||||
default_api_version = "2023-05-15"
|
||||
# Azure OpenAI embedding models allow a maximum of 16 texts
|
||||
|
||||
@@ -3,8 +3,8 @@ from typing import Dict, Generator, List, Optional
|
||||
|
||||
import requests
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
from langchain_core.utils import from_env, get_from_dict_or_env, pre_init
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
from langchain_core.utils import get_from_dict_or_env, pre_init
|
||||
|
||||
|
||||
class SambaStudioEmbeddings(BaseModel, Embeddings):
|
||||
@@ -43,27 +43,19 @@ class SambaStudioEmbeddings(BaseModel, Embeddings):
|
||||
)
|
||||
"""
|
||||
|
||||
sambastudio_embeddings_base_url: str = Field(
|
||||
default_factory=from_env("SAMBASTUDIO_EMBEDDINGS_BASE_URL", default="")
|
||||
)
|
||||
sambastudio_embeddings_base_url: str = ""
|
||||
"""Base url to use"""
|
||||
|
||||
sambastudio_embeddings_base_uri: str = ""
|
||||
"""endpoint base uri"""
|
||||
|
||||
sambastudio_embeddings_project_id: str = Field(
|
||||
default_factory=from_env("SAMBASTUDIO_EMBEDDINGS_PROJECT_ID", default="")
|
||||
)
|
||||
sambastudio_embeddings_project_id: str = ""
|
||||
"""Project id on sambastudio for model"""
|
||||
|
||||
sambastudio_embeddings_endpoint_id: str = Field(
|
||||
default_factory=from_env("SAMBASTUDIO_EMBEDDINGS_ENDPOINT_ID", default="")
|
||||
)
|
||||
sambastudio_embeddings_endpoint_id: str = ""
|
||||
"""endpoint id on sambastudio for model"""
|
||||
|
||||
sambastudio_embeddings_api_key: str = Field(
|
||||
default_factory=from_env("SAMBASTUDIO_EMBEDDINGS_API_KEY", default="")
|
||||
)
|
||||
sambastudio_embeddings_api_key: str = ""
|
||||
"""sambastudio api key"""
|
||||
|
||||
model_kwargs: dict = {}
|
||||
@@ -75,12 +67,28 @@ class SambaStudioEmbeddings(BaseModel, Embeddings):
|
||||
@pre_init
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
values["sambastudio_embeddings_base_url"] = get_from_dict_or_env(
|
||||
values, "sambastudio_embeddings_base_url", "SAMBASTUDIO_EMBEDDINGS_BASE_URL"
|
||||
)
|
||||
values["sambastudio_embeddings_base_uri"] = get_from_dict_or_env(
|
||||
values,
|
||||
"sambastudio_embeddings_base_uri",
|
||||
"SAMBASTUDIO_EMBEDDINGS_BASE_URI",
|
||||
default="api/predict/generic",
|
||||
)
|
||||
values["sambastudio_embeddings_project_id"] = get_from_dict_or_env(
|
||||
values,
|
||||
"sambastudio_embeddings_project_id",
|
||||
"SAMBASTUDIO_EMBEDDINGS_PROJECT_ID",
|
||||
)
|
||||
values["sambastudio_embeddings_endpoint_id"] = get_from_dict_or_env(
|
||||
values,
|
||||
"sambastudio_embeddings_endpoint_id",
|
||||
"SAMBASTUDIO_EMBEDDINGS_ENDPOINT_ID",
|
||||
)
|
||||
values["sambastudio_embeddings_api_key"] = get_from_dict_or_env(
|
||||
values, "sambastudio_embeddings_api_key", "SAMBASTUDIO_EMBEDDINGS_API_KEY"
|
||||
)
|
||||
return values
|
||||
|
||||
def _get_tuning_params(self) -> str:
|
||||
|
||||
@@ -12,8 +12,10 @@ from wsgiref.handlers import format_date_time
|
||||
import numpy as np
|
||||
import requests
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field, SecretStr, root_validator
|
||||
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field, SecretStr
|
||||
from langchain_core.utils import (
|
||||
secret_from_env,
|
||||
)
|
||||
from numpy import ndarray
|
||||
|
||||
# SparkLLMTextEmbeddings is an embedding model provided by iFLYTEK Co., Ltd.. (https://iflytek.com/en/).
|
||||
@@ -102,11 +104,18 @@ class SparkLLMTextEmbeddings(BaseModel, Embeddings):
|
||||
]
|
||||
""" # noqa: E501
|
||||
|
||||
spark_app_id: Optional[SecretStr] = Field(default=None, alias="app_id")
|
||||
spark_app_id: SecretStr = Field(
|
||||
alias="app_id", default_factory=secret_from_env("SPARK_APP_ID")
|
||||
)
|
||||
"""Automatically inferred from env var `SPARK_APP_ID` if not provided."""
|
||||
spark_api_key: Optional[SecretStr] = Field(default=None, alias="api_key")
|
||||
spark_api_key: Optional[SecretStr] = Field(
|
||||
alias="api_key", default_factory=secret_from_env("SPARK_API_KEY", default=None)
|
||||
)
|
||||
"""Automatically inferred from env var `SPARK_API_KEY` if not provided."""
|
||||
spark_api_secret: Optional[SecretStr] = Field(default=None, alias="api_secret")
|
||||
spark_api_secret: Optional[SecretStr] = Field(
|
||||
alias="api_secret",
|
||||
default_factory=secret_from_env("SPARK_API_SECRET", default=None),
|
||||
)
|
||||
"""Automatically inferred from env var `SPARK_API_SECRET` if not provided."""
|
||||
base_url: str = Field(default="https://emb-cn-huabei-1.xf-yun.com/")
|
||||
"""Base URL path for API requests"""
|
||||
@@ -118,20 +127,6 @@ class SparkLLMTextEmbeddings(BaseModel, Embeddings):
|
||||
class Config:
|
||||
allow_population_by_field_name = True
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that auth token exists in environment."""
|
||||
values["spark_app_id"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(values, "spark_app_id", "SPARK_APP_ID")
|
||||
)
|
||||
values["spark_api_key"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(values, "spark_api_key", "SPARK_API_KEY")
|
||||
)
|
||||
values["spark_api_secret"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(values, "spark_api_secret", "SPARK_API_SECRET")
|
||||
)
|
||||
return values
|
||||
|
||||
def _embed(self, texts: List[str], host: str) -> Optional[List[List[float]]]:
|
||||
"""Internal method to call Spark Embedding API and return embeddings.
|
||||
|
||||
|
||||
@@ -4,8 +4,8 @@ import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
from langchain_core.utils import from_env, pre_init
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
from langchain_core.utils import get_from_dict_or_env, pre_init
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -13,15 +13,11 @@ logger = logging.getLogger(__name__)
|
||||
class VolcanoEmbeddings(BaseModel, Embeddings):
|
||||
"""`Volcengine Embeddings` embedding models."""
|
||||
|
||||
volcano_ak: Optional[str] = Field(
|
||||
default_factory=from_env("VOLC_ACCESSKEY", default=None)
|
||||
)
|
||||
volcano_ak: Optional[str] = None
|
||||
"""volcano access key
|
||||
learn more from: https://www.volcengine.com/docs/6459/76491#ak-sk"""
|
||||
|
||||
volcano_sk: Optional[str] = Field(
|
||||
default_factory=from_env("VOLC_SECRETKEY", default=None)
|
||||
)
|
||||
volcano_sk: Optional[str] = None
|
||||
"""volcano secret key
|
||||
learn more from: https://www.volcengine.com/docs/6459/76491#ak-sk"""
|
||||
|
||||
@@ -70,6 +66,16 @@ class VolcanoEmbeddings(BaseModel, Embeddings):
|
||||
ValueError: volcengine package not found, please install it with
|
||||
`pip install volcengine`
|
||||
"""
|
||||
values["volcano_ak"] = get_from_dict_or_env(
|
||||
values,
|
||||
"volcano_ak",
|
||||
"VOLC_ACCESSKEY",
|
||||
)
|
||||
values["volcano_sk"] = get_from_dict_or_env(
|
||||
values,
|
||||
"volcano_sk",
|
||||
"VOLC_SECRETKEY",
|
||||
)
|
||||
|
||||
try:
|
||||
from volcengine.maas import MaasService
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field, root_validator
|
||||
@@ -70,6 +70,11 @@ class ZhipuAIEmbeddings(BaseModel, Embeddings):
|
||||
"""Model name"""
|
||||
api_key: str
|
||||
"""Automatically inferred from env var `ZHIPU_API_KEY` if not provided."""
|
||||
dimensions: Optional[int] = None
|
||||
"""The number of dimensions the resulting output embeddings should have.
|
||||
|
||||
Only supported in `embedding-3` and later models.
|
||||
"""
|
||||
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
@@ -110,6 +115,13 @@ class ZhipuAIEmbeddings(BaseModel, Embeddings):
|
||||
A list of embeddings for each document in the input list.
|
||||
Each embedding is represented as a list of float values.
|
||||
"""
|
||||
resp = self.client.embeddings.create(model=self.model, input=texts)
|
||||
if self.dimensions is not None:
|
||||
resp = self.client.embeddings.create(
|
||||
model=self.model,
|
||||
input=texts,
|
||||
dimensions=self.dimensions,
|
||||
)
|
||||
else:
|
||||
resp = self.client.embeddings.create(model=self.model, input=texts)
|
||||
embeddings = [r.embedding for r in resp.data]
|
||||
return embeddings
|
||||
|
||||
@@ -32,6 +32,7 @@ class CassandraGraphVectorStore(GraphVectorStore):
|
||||
session: Optional[Session] = None,
|
||||
keyspace: Optional[str] = None,
|
||||
setup_mode: SetupMode = SetupMode.SYNC,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""
|
||||
Create the hybrid graph store.
|
||||
@@ -74,6 +75,7 @@ class CassandraGraphVectorStore(GraphVectorStore):
|
||||
session=session,
|
||||
keyspace=keyspace,
|
||||
setup_mode=_setup_mode,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@property
|
||||
|
||||
@@ -23,7 +23,6 @@ from langchain_core.prompt_values import PromptValue
|
||||
from langchain_core.pydantic_v1 import Field, SecretStr, root_validator
|
||||
from langchain_core.utils import (
|
||||
check_package_version,
|
||||
from_env,
|
||||
get_from_dict_or_env,
|
||||
get_pydantic_field_names,
|
||||
pre_init,
|
||||
@@ -58,11 +57,7 @@ class _AnthropicCommon(BaseLanguageModel):
|
||||
max_retries: int = 2
|
||||
"""Number of retries allowed for requests sent to the Anthropic Completion API."""
|
||||
|
||||
anthropic_api_url: Optional[str] = Field(
|
||||
default_factory=from_env(
|
||||
"ANTHROPIC_API_URL", default="https://api.anthropic.com"
|
||||
)
|
||||
)
|
||||
anthropic_api_url: Optional[str] = None
|
||||
|
||||
anthropic_api_key: Optional[SecretStr] = None
|
||||
|
||||
@@ -87,6 +82,12 @@ class _AnthropicCommon(BaseLanguageModel):
|
||||
get_from_dict_or_env(values, "anthropic_api_key", "ANTHROPIC_API_KEY")
|
||||
)
|
||||
# Get custom api url from environment.
|
||||
values["anthropic_api_url"] = get_from_dict_or_env(
|
||||
values,
|
||||
"anthropic_api_url",
|
||||
"ANTHROPIC_API_URL",
|
||||
default="https://api.anthropic.com",
|
||||
)
|
||||
|
||||
try:
|
||||
import anthropic
|
||||
|
||||
@@ -15,12 +15,7 @@ from langchain_core.callbacks import (
|
||||
)
|
||||
from langchain_core.outputs import Generation, GenerationChunk, LLMResult
|
||||
from langchain_core.pydantic_v1 import Field, SecretStr
|
||||
from langchain_core.utils import (
|
||||
convert_to_secret_str,
|
||||
from_env,
|
||||
get_from_dict_or_env,
|
||||
pre_init,
|
||||
)
|
||||
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env, pre_init
|
||||
|
||||
from langchain_community.llms.openai import (
|
||||
BaseOpenAI,
|
||||
@@ -88,13 +83,9 @@ class Anyscale(BaseOpenAI):
|
||||
"""
|
||||
|
||||
"""Key word arguments to pass to the model."""
|
||||
anyscale_api_base: str = Field(
|
||||
default_factory=from_env("ANYSCALE_API_BASE", default=DEFAULT_BASE_URL)
|
||||
)
|
||||
anyscale_api_base: str = Field(default=DEFAULT_BASE_URL)
|
||||
anyscale_api_key: SecretStr = Field(default=None)
|
||||
model_name: str = Field(
|
||||
default_factory=from_env("MODEL_NAME", default=DEFAULT_MODEL)
|
||||
)
|
||||
model_name: str = Field(default=DEFAULT_MODEL)
|
||||
|
||||
prefix_messages: List = Field(default_factory=list)
|
||||
|
||||
@@ -105,9 +96,21 @@ class Anyscale(BaseOpenAI):
|
||||
@pre_init
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
values["anyscale_api_base"] = get_from_dict_or_env(
|
||||
values,
|
||||
"anyscale_api_base",
|
||||
"ANYSCALE_API_BASE",
|
||||
default=DEFAULT_BASE_URL,
|
||||
)
|
||||
values["anyscale_api_key"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(values, "anyscale_api_key", "ANYSCALE_API_KEY")
|
||||
)
|
||||
values["model_name"] = get_from_dict_or_env(
|
||||
values,
|
||||
"model_name",
|
||||
"MODEL_NAME",
|
||||
default=DEFAULT_MODEL,
|
||||
)
|
||||
|
||||
try:
|
||||
import openai
|
||||
|
||||
@@ -8,12 +8,7 @@ import requests
|
||||
from langchain_core.callbacks import CallbackManagerForLLMRun
|
||||
from langchain_core.language_models.llms import LLM
|
||||
from langchain_core.pydantic_v1 import Field, SecretStr
|
||||
from langchain_core.utils import (
|
||||
convert_to_secret_str,
|
||||
from_env,
|
||||
get_from_dict_or_env,
|
||||
pre_init,
|
||||
)
|
||||
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env, pre_init
|
||||
|
||||
from langchain_community.llms.utils import enforce_stop_tokens
|
||||
|
||||
@@ -33,12 +28,7 @@ class BaichuanLLM(LLM):
|
||||
timeout: int = 60
|
||||
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
baichuan_api_host: Optional[str] = Field(
|
||||
default_factory=from_env(
|
||||
"BAICHUAN_API_HOST",
|
||||
default="https://api.baichuan-ai.com/v1/chat/completions",
|
||||
)
|
||||
)
|
||||
baichuan_api_host: Optional[str] = None
|
||||
baichuan_api_key: Optional[SecretStr] = None
|
||||
|
||||
@pre_init
|
||||
@@ -46,6 +36,12 @@ class BaichuanLLM(LLM):
|
||||
values["baichuan_api_key"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(values, "baichuan_api_key", "BAICHUAN_API_KEY")
|
||||
)
|
||||
values["baichuan_api_host"] = get_from_dict_or_env(
|
||||
values,
|
||||
"baichuan_api_host",
|
||||
"BAICHUAN_API_HOST",
|
||||
default="https://api.baichuan-ai.com/v1/chat/completions",
|
||||
)
|
||||
return values
|
||||
|
||||
@property
|
||||
|
||||
@@ -10,7 +10,7 @@ from langchain_core.callbacks import (
|
||||
)
|
||||
from langchain_core.language_models.llms import LLM
|
||||
from langchain_core.pydantic_v1 import Field, root_validator
|
||||
from langchain_core.utils import from_env, pre_init
|
||||
from langchain_core.utils import get_from_dict_or_env, pre_init
|
||||
from langchain_core.utils.pydantic import get_fields
|
||||
|
||||
from langchain_community.llms.utils import enforce_stop_tokens
|
||||
@@ -34,9 +34,7 @@ class EdenAI(LLM):
|
||||
|
||||
base_url: str = "https://api.edenai.run/v2"
|
||||
|
||||
edenai_api_key: Optional[str] = Field(
|
||||
default_factory=from_env("EDENAI_API_KEY", default=None)
|
||||
)
|
||||
edenai_api_key: Optional[str] = None
|
||||
|
||||
feature: Literal["text", "image"] = "text"
|
||||
"""Which generative feature to use, use text by default"""
|
||||
@@ -77,6 +75,9 @@ class EdenAI(LLM):
|
||||
@pre_init
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key exists in environment."""
|
||||
values["edenai_api_key"] = get_from_dict_or_env(
|
||||
values, "edenai_api_key", "EDENAI_API_KEY"
|
||||
)
|
||||
return values
|
||||
|
||||
@root_validator(pre=True)
|
||||
|
||||
@@ -77,7 +77,7 @@ class GradientLLM(BaseLLM):
|
||||
allow_population_by_field_name = True
|
||||
extra = "forbid"
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
|
||||
@@ -88,6 +88,26 @@ class GradientLLM(BaseLLM):
|
||||
values, "gradient_workspace_id", "GRADIENT_WORKSPACE_ID"
|
||||
)
|
||||
|
||||
values["gradient_api_url"] = get_from_dict_or_env(
|
||||
values, "gradient_api_url", "GRADIENT_API_URL"
|
||||
)
|
||||
return values
|
||||
|
||||
@root_validator(pre=False, skip_on_failure=True)
|
||||
def post_init(cls, values: Dict) -> Dict:
|
||||
"""Post init validation."""
|
||||
# Can be most to post_init_validation
|
||||
try:
|
||||
import gradientai # noqa
|
||||
except ImportError:
|
||||
logging.warning(
|
||||
"DeprecationWarning: `GradientLLM` will use "
|
||||
"`pip install gradientai` in future releases of langchain."
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Can be most to post_init_validation
|
||||
if (
|
||||
values["gradient_access_token"] is None
|
||||
or len(values["gradient_access_token"]) < 10
|
||||
@@ -114,20 +134,6 @@ class GradientLLM(BaseLLM):
|
||||
if 0 >= kw.get("max_generated_token_count", 1):
|
||||
raise ValueError("`max_generated_token_count` must be positive")
|
||||
|
||||
values["gradient_api_url"] = get_from_dict_or_env(
|
||||
values, "gradient_api_url", "GRADIENT_API_URL"
|
||||
)
|
||||
|
||||
try:
|
||||
import gradientai # noqa
|
||||
except ImportError:
|
||||
logging.warning(
|
||||
"DeprecationWarning: `GradientLLM` will use "
|
||||
"`pip install gradientai` in future releases of langchain."
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return values
|
||||
|
||||
@property
|
||||
|
||||
@@ -16,12 +16,7 @@ from langchain_core.callbacks import (
|
||||
)
|
||||
from langchain_core.language_models.llms import LLM
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field, SecretStr, root_validator
|
||||
from langchain_core.utils import (
|
||||
convert_to_secret_str,
|
||||
from_env,
|
||||
get_from_dict_or_env,
|
||||
pre_init,
|
||||
)
|
||||
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env, pre_init
|
||||
|
||||
from langchain_community.llms.utils import enforce_stop_tokens
|
||||
|
||||
@@ -36,7 +31,7 @@ class _MinimaxEndpointClient(BaseModel):
|
||||
api_key: SecretStr
|
||||
api_url: str
|
||||
|
||||
@root_validator(pre=True, allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def set_api_url(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
if "api_url" not in values:
|
||||
host = values["host"]
|
||||
@@ -73,12 +68,8 @@ class MinimaxCommon(BaseModel):
|
||||
"""Total probability mass of tokens to consider at each step."""
|
||||
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
|
||||
"""Holds any model parameters valid for `create` call not explicitly specified."""
|
||||
minimax_api_host: Optional[str] = Field(
|
||||
default_factory=from_env("MINIMAX_API_HOST", default="https://api.minimax.chat")
|
||||
)
|
||||
minimax_group_id: Optional[str] = Field(
|
||||
default_factory=from_env("MINIMAX_GROUP_ID", default=None)
|
||||
)
|
||||
minimax_api_host: Optional[str] = None
|
||||
minimax_group_id: Optional[str] = None
|
||||
minimax_api_key: Optional[SecretStr] = None
|
||||
|
||||
@pre_init
|
||||
@@ -87,7 +78,16 @@ class MinimaxCommon(BaseModel):
|
||||
values["minimax_api_key"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(values, "minimax_api_key", "MINIMAX_API_KEY")
|
||||
)
|
||||
values["minimax_group_id"] = get_from_dict_or_env(
|
||||
values, "minimax_group_id", "MINIMAX_GROUP_ID"
|
||||
)
|
||||
# Get custom api url from environment.
|
||||
values["minimax_api_host"] = get_from_dict_or_env(
|
||||
values,
|
||||
"minimax_api_host",
|
||||
"MINIMAX_API_HOST",
|
||||
default="https://api.minimax.chat",
|
||||
)
|
||||
values["_client"] = _MinimaxEndpointClient( # type: ignore[call-arg]
|
||||
host=values["minimax_api_host"],
|
||||
api_key=values["minimax_api_key"],
|
||||
|
||||
@@ -5,7 +5,7 @@ import requests
|
||||
from langchain_core.callbacks import CallbackManagerForLLMRun
|
||||
from langchain_core.language_models.llms import LLM
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
from langchain_core.utils import from_env, pre_init
|
||||
from langchain_core.utils import get_from_dict_or_env, pre_init
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -35,7 +35,7 @@ class OCIModelDeploymentLLM(LLM):
|
||||
p: float = 0.75
|
||||
"""Total probability mass of tokens to consider at each step."""
|
||||
|
||||
endpoint: str = Field(default_factory=from_env("OCI_LLM_ENDPOINT", default=""))
|
||||
endpoint: str = ""
|
||||
"""The uri of the endpoint from the deployed Model Deployment model."""
|
||||
|
||||
best_of: int = 1
|
||||
@@ -62,6 +62,11 @@ class OCIModelDeploymentLLM(LLM):
|
||||
) from ex
|
||||
if not values.get("auth", None):
|
||||
values["auth"] = ads.common.auth.default_signer()
|
||||
values["endpoint"] = get_from_dict_or_env(
|
||||
values,
|
||||
"endpoint",
|
||||
"OCI_LLM_ENDPOINT",
|
||||
)
|
||||
return values
|
||||
|
||||
@property
|
||||
|
||||
@@ -1,12 +1,7 @@
|
||||
from typing import Any, Dict
|
||||
|
||||
from langchain_core.pydantic_v1 import Field, SecretStr
|
||||
from langchain_core.utils import (
|
||||
convert_to_secret_str,
|
||||
from_env,
|
||||
get_from_dict_or_env,
|
||||
pre_init,
|
||||
)
|
||||
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env, pre_init
|
||||
|
||||
from langchain_community.llms.openai import BaseOpenAI
|
||||
from langchain_community.utils.openai import is_openai_v1
|
||||
@@ -40,13 +35,9 @@ class OctoAIEndpoint(BaseOpenAI):
|
||||
"""
|
||||
|
||||
"""Key word arguments to pass to the model."""
|
||||
octoai_api_base: str = Field(
|
||||
default_factory=from_env("OCTOAI_API_BASE", default=DEFAULT_BASE_URL)
|
||||
)
|
||||
octoai_api_base: str = Field(default=DEFAULT_BASE_URL)
|
||||
octoai_api_token: SecretStr = Field(default=None)
|
||||
model_name: str = Field(
|
||||
default_factory=from_env("MODEL_NAME", default=DEFAULT_MODEL)
|
||||
)
|
||||
model_name: str = Field(default=DEFAULT_MODEL)
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
@@ -78,9 +69,21 @@ class OctoAIEndpoint(BaseOpenAI):
|
||||
@pre_init
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
values["octoai_api_base"] = get_from_dict_or_env(
|
||||
values,
|
||||
"octoai_api_base",
|
||||
"OCTOAI_API_BASE",
|
||||
default=DEFAULT_BASE_URL,
|
||||
)
|
||||
values["octoai_api_token"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(values, "octoai_api_token", "OCTOAI_API_TOKEN")
|
||||
)
|
||||
values["model_name"] = get_from_dict_or_env(
|
||||
values,
|
||||
"model_name",
|
||||
"MODEL_NAME",
|
||||
default=DEFAULT_MODEL,
|
||||
)
|
||||
|
||||
try:
|
||||
import openai
|
||||
|
||||
@@ -30,7 +30,6 @@ from langchain_core.language_models.llms import BaseLLM, create_base_retry_decor
|
||||
from langchain_core.outputs import Generation, GenerationChunk, LLMResult
|
||||
from langchain_core.pydantic_v1 import Field, root_validator
|
||||
from langchain_core.utils import (
|
||||
from_env,
|
||||
get_from_dict_or_env,
|
||||
get_pydantic_field_names,
|
||||
pre_init,
|
||||
@@ -203,9 +202,7 @@ class BaseOpenAI(BaseLLM):
|
||||
# When updating this to use a SecretStr
|
||||
# Check for classes that derive from this class (as some of them
|
||||
# may assume openai_api_key is a str)
|
||||
openai_api_key: Optional[str] = Field(
|
||||
default_factory=from_env("OPENAI_API_KEY", default=None), alias="api_key"
|
||||
)
|
||||
openai_api_key: Optional[str] = Field(default=None, alias="api_key")
|
||||
"""Automatically inferred from env var `OPENAI_API_KEY` if not provided."""
|
||||
openai_api_base: Optional[str] = Field(default=None, alias="base_url")
|
||||
"""Base URL path for API requests, leave blank if not using a proxy or service
|
||||
@@ -213,9 +210,7 @@ class BaseOpenAI(BaseLLM):
|
||||
openai_organization: Optional[str] = Field(default=None, alias="organization")
|
||||
"""Automatically inferred from env var `OPENAI_ORG_ID` if not provided."""
|
||||
# to support explicit proxy for OpenAI
|
||||
openai_proxy: Optional[str] = Field(
|
||||
default_factory=from_env("OPENAI_PROXY", default="")
|
||||
)
|
||||
openai_proxy: Optional[str] = None
|
||||
batch_size: int = 20
|
||||
"""Batch size to use when passing multiple documents to generate."""
|
||||
request_timeout: Union[float, Tuple[float, float], Any, None] = Field(
|
||||
@@ -287,9 +282,18 @@ class BaseOpenAI(BaseLLM):
|
||||
if values["streaming"] and values["best_of"] > 1:
|
||||
raise ValueError("Cannot stream results when best_of > 1.")
|
||||
|
||||
values["openai_api_key"] = get_from_dict_or_env(
|
||||
values, "openai_api_key", "OPENAI_API_KEY"
|
||||
)
|
||||
values["openai_api_base"] = values["openai_api_base"] or os.getenv(
|
||||
"OPENAI_API_BASE"
|
||||
)
|
||||
values["openai_proxy"] = get_from_dict_or_env(
|
||||
values,
|
||||
"openai_proxy",
|
||||
"OPENAI_PROXY",
|
||||
default="",
|
||||
)
|
||||
values["openai_organization"] = (
|
||||
values["openai_organization"]
|
||||
or os.getenv("OPENAI_ORG_ID")
|
||||
|
||||
@@ -6,7 +6,7 @@ import requests
|
||||
from langchain_core.callbacks import CallbackManagerForLLMRun
|
||||
from langchain_core.language_models.llms import LLM
|
||||
from langchain_core.outputs import GenerationChunk
|
||||
from langchain_core.utils import pre_init
|
||||
from langchain_core.utils import get_from_dict_or_env, pre_init
|
||||
|
||||
from langchain_community.llms.utils import enforce_stop_tokens
|
||||
|
||||
@@ -54,6 +54,12 @@ class PaiEasEndpoint(LLM):
|
||||
@pre_init
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
values["eas_service_url"] = get_from_dict_or_env(
|
||||
values, "eas_service_url", "EAS_SERVICE_URL"
|
||||
)
|
||||
values["eas_service_token"] = get_from_dict_or_env(
|
||||
values, "eas_service_token", "EAS_SERVICE_TOKEN"
|
||||
)
|
||||
|
||||
return values
|
||||
|
||||
|
||||
@@ -5,8 +5,7 @@ import requests
|
||||
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
|
||||
from langchain_core.language_models.llms import LLM
|
||||
from langchain_core.outputs import GenerationChunk
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
from langchain_core.utils import from_env, get_from_dict_or_env, pre_init
|
||||
from langchain_core.utils import get_from_dict_or_env, pre_init
|
||||
|
||||
|
||||
class SVEndpointHandler:
|
||||
@@ -198,14 +197,10 @@ class Sambaverse(LLM):
|
||||
sambaverse_url: str = ""
|
||||
"""Sambaverse url to use"""
|
||||
|
||||
sambaverse_api_key: str = Field(
|
||||
default_factory=from_env("SAMBAVERSE_API_KEY", default="")
|
||||
)
|
||||
sambaverse_api_key: str = ""
|
||||
"""sambaverse api key"""
|
||||
|
||||
sambaverse_model_name: Optional[str] = Field(
|
||||
default_factory=from_env("SAMBAVERSE_MODEL_NAME", default=None)
|
||||
)
|
||||
sambaverse_model_name: Optional[str] = None
|
||||
"""sambaverse expert model to use"""
|
||||
|
||||
model_kwargs: Optional[dict] = None
|
||||
@@ -230,6 +225,12 @@ class Sambaverse(LLM):
|
||||
"SAMBAVERSE_URL",
|
||||
default="https://sambaverse.sambanova.ai",
|
||||
)
|
||||
values["sambaverse_api_key"] = get_from_dict_or_env(
|
||||
values, "sambaverse_api_key", "SAMBAVERSE_API_KEY"
|
||||
)
|
||||
values["sambaverse_model_name"] = get_from_dict_or_env(
|
||||
values, "sambaverse_model_name", "SAMBAVERSE_MODEL_NAME"
|
||||
)
|
||||
return values
|
||||
|
||||
@property
|
||||
@@ -690,27 +691,19 @@ class SambaStudio(LLM):
|
||||
)
|
||||
"""
|
||||
|
||||
sambastudio_base_url: str = Field(
|
||||
default_factory=from_env("SAMBASTUDIO_BASE_URL", default="")
|
||||
)
|
||||
sambastudio_base_url: str = ""
|
||||
"""Base url to use"""
|
||||
|
||||
sambastudio_base_uri: str = ""
|
||||
"""endpoint base uri"""
|
||||
|
||||
sambastudio_project_id: str = Field(
|
||||
default_factory=from_env("SAMBASTUDIO_PROJECT_ID", default="")
|
||||
)
|
||||
sambastudio_project_id: str = ""
|
||||
"""Project id on sambastudio for model"""
|
||||
|
||||
sambastudio_endpoint_id: str = Field(
|
||||
default_factory=from_env("SAMBASTUDIO_ENDPOINT_ID", default="")
|
||||
)
|
||||
sambastudio_endpoint_id: str = ""
|
||||
"""endpoint id on sambastudio for model"""
|
||||
|
||||
sambastudio_api_key: str = Field(
|
||||
default_factory=from_env("SAMBASTUDIO_API_KEY", default="")
|
||||
)
|
||||
sambastudio_api_key: str = ""
|
||||
"""sambastudio api key"""
|
||||
|
||||
model_kwargs: Optional[dict] = None
|
||||
@@ -739,12 +732,24 @@ class SambaStudio(LLM):
|
||||
@pre_init
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
values["sambastudio_base_url"] = get_from_dict_or_env(
|
||||
values, "sambastudio_base_url", "SAMBASTUDIO_BASE_URL"
|
||||
)
|
||||
values["sambastudio_base_uri"] = get_from_dict_or_env(
|
||||
values,
|
||||
"sambastudio_base_uri",
|
||||
"SAMBASTUDIO_BASE_URI",
|
||||
default="api/predict/generic",
|
||||
)
|
||||
values["sambastudio_project_id"] = get_from_dict_or_env(
|
||||
values, "sambastudio_project_id", "SAMBASTUDIO_PROJECT_ID"
|
||||
)
|
||||
values["sambastudio_endpoint_id"] = get_from_dict_or_env(
|
||||
values, "sambastudio_endpoint_id", "SAMBASTUDIO_ENDPOINT_ID"
|
||||
)
|
||||
values["sambastudio_api_key"] = get_from_dict_or_env(
|
||||
values, "sambastudio_api_key", "SAMBASTUDIO_API_KEY"
|
||||
)
|
||||
return values
|
||||
|
||||
def _get_tuning_params(self, stop: Optional[List[str]]) -> str:
|
||||
|
||||
@@ -18,7 +18,7 @@ from langchain_core.callbacks import CallbackManagerForLLMRun
|
||||
from langchain_core.language_models.llms import LLM
|
||||
from langchain_core.outputs import GenerationChunk
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
from langchain_core.utils import from_env, pre_init
|
||||
from langchain_core.utils import get_from_dict_or_env, pre_init
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -42,23 +42,11 @@ class SparkLLM(LLM):
|
||||
"""
|
||||
|
||||
client: Any = None #: :meta private:
|
||||
spark_app_id: Optional[str] = Field(
|
||||
default_factory=from_env("IFLYTEK_SPARK_APP_ID", default=None)
|
||||
)
|
||||
spark_api_key: Optional[str] = Field(
|
||||
default_factory=from_env("IFLYTEK_SPARK_API_KEY", default=None)
|
||||
)
|
||||
spark_api_secret: Optional[str] = Field(
|
||||
default_factory=from_env("IFLYTEK_SPARK_API_SECRET", default=None)
|
||||
)
|
||||
spark_api_url: Optional[str] = Field(
|
||||
default_factory=from_env(
|
||||
"IFLYTEK_SPARK_API_URL", default="wss://spark-api.xf-yun.com/v3.1/chat"
|
||||
)
|
||||
)
|
||||
spark_llm_domain: Optional[str] = Field(
|
||||
default_factory=from_env("IFLYTEK_SPARK_LLM_DOMAIN", default="generalv3")
|
||||
)
|
||||
spark_app_id: Optional[str] = None
|
||||
spark_api_key: Optional[str] = None
|
||||
spark_api_secret: Optional[str] = None
|
||||
spark_api_url: Optional[str] = None
|
||||
spark_llm_domain: Optional[str] = None
|
||||
spark_user_id: str = "lc_user"
|
||||
streaming: bool = False
|
||||
request_timeout: int = 30
|
||||
@@ -68,6 +56,33 @@ class SparkLLM(LLM):
|
||||
|
||||
@pre_init
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
values["spark_app_id"] = get_from_dict_or_env(
|
||||
values,
|
||||
"spark_app_id",
|
||||
"IFLYTEK_SPARK_APP_ID",
|
||||
)
|
||||
values["spark_api_key"] = get_from_dict_or_env(
|
||||
values,
|
||||
"spark_api_key",
|
||||
"IFLYTEK_SPARK_API_KEY",
|
||||
)
|
||||
values["spark_api_secret"] = get_from_dict_or_env(
|
||||
values,
|
||||
"spark_api_secret",
|
||||
"IFLYTEK_SPARK_API_SECRET",
|
||||
)
|
||||
values["spark_api_url"] = get_from_dict_or_env(
|
||||
values,
|
||||
"spark_api_url",
|
||||
"IFLYTEK_SPARK_API_URL",
|
||||
"wss://spark-api.xf-yun.com/v3.1/chat",
|
||||
)
|
||||
values["spark_llm_domain"] = get_from_dict_or_env(
|
||||
values,
|
||||
"spark_llm_domain",
|
||||
"IFLYTEK_SPARK_LLM_DOMAIN",
|
||||
"generalv3",
|
||||
)
|
||||
# put extra params into model_kwargs
|
||||
values["model_kwargs"]["temperature"] = values["temperature"] or cls.temperature
|
||||
values["model_kwargs"]["top_k"] = values["top_k"] or cls.top_k
|
||||
|
||||
@@ -2,14 +2,9 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForRetrieverRun
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.pydantic_v1 import Field, SecretStr
|
||||
from langchain_core.pydantic_v1 import SecretStr
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
from langchain_core.utils import (
|
||||
convert_to_secret_str,
|
||||
from_env,
|
||||
get_from_dict_or_env,
|
||||
pre_init,
|
||||
)
|
||||
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env, pre_init
|
||||
|
||||
from langchain_community.utilities.arcee import ArceeWrapper, DALMFilter
|
||||
|
||||
@@ -42,19 +37,13 @@ class ArceeRetriever(BaseRetriever):
|
||||
model: str
|
||||
"""Arcee DALM name"""
|
||||
|
||||
arcee_api_url: str = Field(
|
||||
default_factory=from_env("ARCEE_API_URL", default="https://api.arcee.ai")
|
||||
)
|
||||
arcee_api_url: str = "https://api.arcee.ai"
|
||||
"""Arcee API URL"""
|
||||
|
||||
arcee_api_version: str = Field(
|
||||
default_factory=from_env("ARCEE_API_VERSION", default="v2")
|
||||
)
|
||||
arcee_api_version: str = "v2"
|
||||
"""Arcee API Version"""
|
||||
|
||||
arcee_app_url: str = Field(
|
||||
default_factory=from_env("ARCEE_APP_URL", default="https://app.arcee.ai")
|
||||
)
|
||||
arcee_app_url: str = "https://app.arcee.ai"
|
||||
"""Arcee App URL"""
|
||||
|
||||
model_kwargs: Optional[Dict[str, Any]] = None
|
||||
@@ -92,6 +81,24 @@ class ArceeRetriever(BaseRetriever):
|
||||
)
|
||||
)
|
||||
|
||||
values["arcee_api_url"] = get_from_dict_or_env(
|
||||
values,
|
||||
"arcee_api_url",
|
||||
"ARCEE_API_URL",
|
||||
)
|
||||
|
||||
values["arcee_app_url"] = get_from_dict_or_env(
|
||||
values,
|
||||
"arcee_app_url",
|
||||
"ARCEE_APP_URL",
|
||||
)
|
||||
|
||||
values["arcee_api_version"] = get_from_dict_or_env(
|
||||
values,
|
||||
"arcee_api_version",
|
||||
"ARCEE_API_VERSION",
|
||||
)
|
||||
|
||||
# validate model kwargs
|
||||
if values["model_kwargs"]:
|
||||
kw = values["model_kwargs"]
|
||||
|
||||
@@ -10,9 +10,76 @@ from langchain_community.utilities.arxiv import ArxivAPIWrapper
|
||||
class ArxivRetriever(BaseRetriever, ArxivAPIWrapper):
|
||||
"""`Arxiv` retriever.
|
||||
|
||||
It wraps load() to get_relevant_documents().
|
||||
It uses all ArxivAPIWrapper arguments without any change.
|
||||
"""
|
||||
Setup:
|
||||
Install ``arxiv``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U arxiv
|
||||
|
||||
Key init args:
|
||||
load_max_docs: int
|
||||
maximum number of documents to load
|
||||
get_ful_documents: bool
|
||||
whether to return full document text or snippets
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.retrievers import ArxivRetriever
|
||||
|
||||
retriever = ArxivRetriever(
|
||||
load_max_docs=2,
|
||||
get_ful_documents=True,
|
||||
)
|
||||
|
||||
Usage:
|
||||
.. code-block:: python
|
||||
|
||||
docs = retriever.invoke("What is the ImageBind model?")
|
||||
docs[0].metadata
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
{'Entry ID': 'http://arxiv.org/abs/2305.05665v2',
|
||||
'Published': datetime.date(2023, 5, 31),
|
||||
'Title': 'ImageBind: One Embedding Space To Bind Them All',
|
||||
'Authors': 'Rohit Girdhar, Alaaeldin El-Nouby, Zhuang Liu, Mannat Singh, Kalyan Vasudev Alwala, Armand Joulin, Ishan Misra'}
|
||||
|
||||
Use within a chain:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
prompt = ChatPromptTemplate.from_template(
|
||||
\"\"\"Answer the question based only on the context provided.
|
||||
|
||||
Context: {context}
|
||||
|
||||
Question: {question}\"\"\"
|
||||
)
|
||||
|
||||
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
|
||||
|
||||
def format_docs(docs):
|
||||
return "\\n\\n".join(doc.page_content for doc in docs)
|
||||
|
||||
chain = (
|
||||
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
||||
| prompt
|
||||
| llm
|
||||
| StrOutputParser()
|
||||
)
|
||||
|
||||
chain.invoke("What is the ImageBind model?")
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
'The ImageBind model is an approach to learn a joint embedding across six different modalities - images, text, audio, depth, thermal, and IMU data...'
|
||||
""" # noqa: E501
|
||||
|
||||
get_full_documents: bool = False
|
||||
|
||||
|
||||
@@ -19,7 +19,71 @@ DEFAULT_URL_SUFFIX = "search.windows.net"
|
||||
|
||||
|
||||
class AzureAISearchRetriever(BaseRetriever):
|
||||
"""`Azure AI Search` service retriever."""
|
||||
"""`Azure AI Search` service retriever.
|
||||
|
||||
Setup:
|
||||
See here for more detail: https://python.langchain.com/v0.2/docs/integrations/retrievers/azure_ai_search/
|
||||
|
||||
We will need to install the below dependencies and set the required
|
||||
environment variables:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U langchain-community azure-identity azure-search-documents
|
||||
export AZURE_AI_SEARCH_SERVICE_NAME="<YOUR_SEARCH_SERVICE_NAME>"
|
||||
export AZURE_AI_SEARCH_INDEX_NAME="<YOUR_SEARCH_INDEX_NAME>"
|
||||
export AZURE_AI_SEARCH_API_KEY="<YOUR_API_KEY>"
|
||||
|
||||
Key init args:
|
||||
content_key: str
|
||||
top_k: int
|
||||
index_name: str
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.retrievers import AzureAISearchRetriever
|
||||
|
||||
retriever = AzureAISearchRetriever(
|
||||
content_key="content", top_k=1, index_name="langchain-vector-demo"
|
||||
)
|
||||
|
||||
Usage:
|
||||
.. code-block:: python
|
||||
|
||||
retriever.invoke("here is my unstructured query string")
|
||||
|
||||
Use within a chain:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from langchain_openai import AzureChatOpenAI
|
||||
|
||||
prompt = ChatPromptTemplate.from_template(
|
||||
\"\"\"Answer the question based only on the context provided.
|
||||
|
||||
Context: {context}
|
||||
|
||||
Question: {question}\"\"\"
|
||||
)
|
||||
|
||||
llm = AzureChatOpenAI(azure_deployment="gpt-35-turbo")
|
||||
|
||||
def format_docs(docs):
|
||||
return "\\n\\n".join(doc.page_content for doc in docs)
|
||||
|
||||
chain = (
|
||||
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
||||
| prompt
|
||||
| llm
|
||||
| StrOutputParser()
|
||||
)
|
||||
|
||||
chain.invoke("...")
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
service_name: str = ""
|
||||
"""Name of Azure AI Search service"""
|
||||
|
||||
@@ -19,11 +19,18 @@ class RetrievalConfig(BaseModel, extra="allow"): # type: ignore[call-arg]
|
||||
|
||||
|
||||
class AmazonKnowledgeBasesRetriever(BaseRetriever):
|
||||
"""`Amazon Bedrock Knowledge Bases` retrieval.
|
||||
"""Amazon Bedrock Knowledge Bases retriever.
|
||||
|
||||
See https://aws.amazon.com/bedrock/knowledge-bases for more info.
|
||||
|
||||
Args:
|
||||
Setup:
|
||||
Install ``langchain-aws``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U langchain-aws
|
||||
|
||||
Key init args:
|
||||
knowledge_base_id: Knowledge Base ID.
|
||||
region_name: The aws region e.g., `us-west-2`.
|
||||
Fallback to AWS_DEFAULT_REGION env variable or region specified in
|
||||
@@ -35,7 +42,7 @@ class AmazonKnowledgeBasesRetriever(BaseRetriever):
|
||||
client: boto3 client for bedrock agent runtime.
|
||||
retrieval_config: Configuration for retrieval.
|
||||
|
||||
Example:
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.retrievers import AmazonKnowledgeBasesRetriever
|
||||
@@ -48,7 +55,48 @@ class AmazonKnowledgeBasesRetriever(BaseRetriever):
|
||||
}
|
||||
},
|
||||
)
|
||||
"""
|
||||
|
||||
Usage:
|
||||
.. code-block:: python
|
||||
|
||||
query = "..."
|
||||
|
||||
retriever.invoke(query)
|
||||
|
||||
Use within a chain:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_aws import ChatBedrockConverse
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
prompt = ChatPromptTemplate.from_template(
|
||||
\"\"\"Answer the question based only on the context provided.
|
||||
|
||||
Context: {context}
|
||||
|
||||
Question: {question}\"\"\"
|
||||
)
|
||||
|
||||
llm = ChatBedrockConverse(
|
||||
model_id="anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||
)
|
||||
|
||||
def format_docs(docs):
|
||||
return "\\n\\n".join(doc.page_content for doc in docs)
|
||||
|
||||
chain = (
|
||||
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
||||
| prompt
|
||||
| llm
|
||||
| StrOutputParser()
|
||||
)
|
||||
|
||||
chain.invoke("...")
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
knowledge_base_id: str
|
||||
region_name: Optional[str] = None
|
||||
|
||||
@@ -6,7 +6,7 @@ from langchain_core._api.deprecation import deprecated
|
||||
from langchain_core.callbacks import CallbackManagerForRetrieverRun
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
from langchain_core.utils import pre_init
|
||||
from langchain_core.utils import get_from_dict_or_env, pre_init
|
||||
|
||||
from langchain_community.utilities.vertexai import get_client_info
|
||||
|
||||
@@ -58,6 +58,9 @@ class GoogleDocumentAIWarehouseRetriever(BaseRetriever):
|
||||
"Please install it with pip install google-cloud-contentwarehouse"
|
||||
) from exc
|
||||
|
||||
values["project_number"] = get_from_dict_or_env(
|
||||
values, "project_number", "PROJECT_NUMBER"
|
||||
)
|
||||
values["client"] = DocumentServiceClient(
|
||||
client_info=get_client_info(module="document-ai-warehouse")
|
||||
)
|
||||
|
||||
@@ -15,7 +15,73 @@ from langchain_community.vectorstores.milvus import Milvus
|
||||
|
||||
|
||||
class MilvusRetriever(BaseRetriever):
|
||||
"""`Milvus API` retriever."""
|
||||
"""Milvus API retriever.
|
||||
|
||||
See detailed instructions here: https://python.langchain.com/v0.2/docs/integrations/retrievers/milvus_hybrid_search/
|
||||
|
||||
Setup:
|
||||
Install ``langchain-milvus`` and other dependencies:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U pymilvus[model] langchain-milvus
|
||||
|
||||
Key init args:
|
||||
collection: Milvus Collection
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
retriever = MilvusCollectionHybridSearchRetriever(collection=collection)
|
||||
|
||||
Usage:
|
||||
.. code-block:: python
|
||||
|
||||
query = "What are the story about ventures?"
|
||||
|
||||
retriever.invoke(query)
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
[Document(page_content="In 'The Lost Expedition' by Caspian Grey...", metadata={'doc_id': '449281835035545843'}),
|
||||
Document(page_content="In 'The Phantom Pilgrim' by Rowan Welles...", metadata={'doc_id': '449281835035545845'}),
|
||||
Document(page_content="In 'The Dreamwalker's Journey' by Lyra Snow..", metadata={'doc_id': '449281835035545846'})]
|
||||
|
||||
Use within a chain:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
prompt = ChatPromptTemplate.from_template(
|
||||
\"\"\"Answer the question based only on the context provided.
|
||||
|
||||
Context: {context}
|
||||
|
||||
Question: {question}\"\"\"
|
||||
)
|
||||
|
||||
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
|
||||
|
||||
def format_docs(docs):
|
||||
return "\\n\\n".join(doc.page_content for doc in docs)
|
||||
|
||||
chain = (
|
||||
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
||||
| prompt
|
||||
| llm
|
||||
| StrOutputParser()
|
||||
)
|
||||
|
||||
chain.invoke("What novels has Lila written and what are their contents?")
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
"Lila Rose has written 'The Memory Thief,' which follows a charismatic thief..."
|
||||
|
||||
""" # noqa: E501
|
||||
|
||||
embedding_function: Embeddings
|
||||
collection_name: str = "LangChainCollection"
|
||||
|
||||
@@ -10,9 +10,66 @@ from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
|
||||
class WikipediaRetriever(BaseRetriever, WikipediaAPIWrapper):
|
||||
"""`Wikipedia API` retriever.
|
||||
|
||||
It wraps load() to get_relevant_documents().
|
||||
It uses all WikipediaAPIWrapper arguments without any change.
|
||||
"""
|
||||
Setup:
|
||||
Install the ``wikipedia`` dependency:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U wikipedia
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.retrievers import WikipediaRetriever
|
||||
|
||||
retriever = WikipediaRetriever()
|
||||
|
||||
Usage:
|
||||
.. code-block:: python
|
||||
|
||||
docs = retriever.invoke("TOKYO GHOUL")
|
||||
print(docs[0].page_content[:100])
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
Tokyo Ghoul (Japanese: 東京喰種(トーキョーグール), Hepburn: Tōkyō Gūru) is a Japanese dark fantasy
|
||||
|
||||
Use within a chain:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
prompt = ChatPromptTemplate.from_template(
|
||||
\"\"\"Answer the question based only on the context provided.
|
||||
|
||||
Context: {context}
|
||||
|
||||
Question: {question}\"\"\"
|
||||
)
|
||||
|
||||
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
|
||||
|
||||
def format_docs(docs):
|
||||
return "\\n\\n".join(doc.page_content for doc in docs)
|
||||
|
||||
chain = (
|
||||
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
||||
| prompt
|
||||
| llm
|
||||
| StrOutputParser()
|
||||
)
|
||||
|
||||
chain.invoke(
|
||||
"Who is the main character in `Tokyo Ghoul` and does he transform into a ghoul?"
|
||||
)
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
'The main character in Tokyo Ghoul is Ken Kaneki, who transforms into a ghoul after receiving an organ transplant from a ghoul named Rize.'
|
||||
""" # noqa: E501
|
||||
|
||||
def _get_relevant_documents(
|
||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||
|
||||
@@ -29,8 +29,6 @@ class EdenAiSpeechToTextTool(EdenaiTool):
|
||||
You can find your token here: https://app.edenai.run/admin/account/settings
|
||||
"""
|
||||
|
||||
edenai_api_key: Optional[str] = None
|
||||
|
||||
name: str = "edenai_speech_to_text"
|
||||
description = (
|
||||
"A wrapper around edenai Services speech to text "
|
||||
|
||||
@@ -6,9 +6,9 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
from langchain_core.callbacks import CallbackManagerForToolRun
|
||||
from langchain_core.pydantic_v1 import root_validator
|
||||
from langchain_core.pydantic_v1 import Field, SecretStr
|
||||
from langchain_core.tools import BaseTool
|
||||
from langchain_core.utils import get_from_dict_or_env
|
||||
from langchain_core.utils import secret_from_env
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -23,20 +23,14 @@ class EdenaiTool(BaseTool):
|
||||
|
||||
feature: str
|
||||
subfeature: str
|
||||
edenai_api_key: Optional[str] = None
|
||||
edenai_api_key: SecretStr = Field(
|
||||
default_factory=secret_from_env("EDENAI_API_KEY", default=None)
|
||||
)
|
||||
is_async: bool = False
|
||||
|
||||
providers: List[str]
|
||||
"""provider to use for the API call."""
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that api key exists in environment."""
|
||||
values["edenai_api_key"] = get_from_dict_or_env(
|
||||
values, "edenai_api_key", "EDENAI_API_KEY"
|
||||
)
|
||||
return values
|
||||
|
||||
@staticmethod
|
||||
def get_user_agent() -> str:
|
||||
from langchain_community import __version__
|
||||
@@ -54,11 +48,8 @@ class EdenaiTool(BaseTool):
|
||||
requests.Response: The response from the EdenAI API call.
|
||||
|
||||
"""
|
||||
|
||||
# faire l'API call
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.edenai_api_key}",
|
||||
"Authorization": f"Bearer {self.edenai_api_key.get_secret_value()}",
|
||||
"User-Agent": self.get_user_agent(),
|
||||
}
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ class PowerBIDataset(BaseModel):
|
||||
"""Fix the table names."""
|
||||
return [fix_table_name(table) for table in table_names]
|
||||
|
||||
@root_validator(pre=True, allow_reuse=True)
|
||||
@root_validator(pre=True)
|
||||
def token_or_credential_present(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate that at least one of token and credentials is present."""
|
||||
if "token" in values or "credential" in values:
|
||||
|
||||
@@ -460,7 +460,45 @@ class ApertureDB(VectorStore):
|
||||
assert db.last_query_ok(), response
|
||||
return response[0]["FindDescriptorSet"]["entities"]
|
||||
|
||||
@override
|
||||
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
|
||||
"""Add or update documents in the vectorstore.
|
||||
|
||||
Args:
|
||||
documents: Documents to add to the vectorstore.
|
||||
kwargs: Additional keyword arguments.
|
||||
if kwargs contains ids and documents contain ids,
|
||||
the ids in the kwargs will receive precedence.
|
||||
|
||||
Returns:
|
||||
List of IDs of the added texts.
|
||||
|
||||
Raises:
|
||||
ValueError: If the number of ids does not match the number of documents.
|
||||
"""
|
||||
|
||||
if "ids" in kwargs:
|
||||
ids = kwargs.pop("ids")
|
||||
if ids and len(ids) != len(documents):
|
||||
raise ValueError(
|
||||
"The number of ids must match the number of documents. "
|
||||
"Got {len(ids)} ids and {len(documents)} documents."
|
||||
)
|
||||
|
||||
documents_ = []
|
||||
|
||||
for id_, document in zip(ids, documents):
|
||||
doc_with_id = Document(
|
||||
page_content=document.page_content,
|
||||
metadata=document.metadata,
|
||||
id=id_,
|
||||
)
|
||||
documents_.append(doc_with_id)
|
||||
else:
|
||||
documents_ = documents
|
||||
|
||||
# If upsert has been implemented, we can use it to add documents
|
||||
return self.upsert(documents_, **kwargs)["succeeded"]
|
||||
|
||||
def upsert(self, items: Sequence[Document], /, **kwargs: Any) -> UpsertResponse:
|
||||
"""Insert or update items
|
||||
|
||||
|
||||
@@ -1288,7 +1288,7 @@ class FAISS(VectorStore):
|
||||
relevance_score_fn = self._select_relevance_score_fn()
|
||||
if relevance_score_fn is None:
|
||||
raise ValueError(
|
||||
"normalize_score_fn must be provided to"
|
||||
"relevance_score_fn must be provided to"
|
||||
" FAISS constructor to normalize scores"
|
||||
)
|
||||
docs_and_scores = self.similarity_search_with_score(
|
||||
@@ -1317,7 +1317,7 @@ class FAISS(VectorStore):
|
||||
relevance_score_fn = self._select_relevance_score_fn()
|
||||
if relevance_score_fn is None:
|
||||
raise ValueError(
|
||||
"normalize_score_fn must be provided to"
|
||||
"relevance_score_fn must be provided to"
|
||||
" FAISS constructor to normalize scores"
|
||||
)
|
||||
docs_and_scores = await self.asimilarity_search_with_score(
|
||||
|
||||
@@ -6,8 +6,6 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
||||
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import root_validator
|
||||
from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
|
||||
|
||||
@@ -164,18 +162,6 @@ class NeuralDBVectorStore(VectorStore):
|
||||
offset = self.db._savable_state.documents.get_source_by_id(source_id)[1]
|
||||
return [str(offset + i) for i in range(len(texts))] # type: ignore[arg-type]
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
def validate_environments(cls, values: Dict) -> Dict:
|
||||
"""Validate ThirdAI environment variables."""
|
||||
values["thirdai_key"] = convert_to_secret_str(
|
||||
get_from_dict_or_env(
|
||||
values,
|
||||
"thirdai_key",
|
||||
"THIRDAI_KEY",
|
||||
)
|
||||
)
|
||||
return values
|
||||
|
||||
def insert( # type: ignore[no-untyped-def, no-untyped-def]
|
||||
self,
|
||||
sources: List[Any],
|
||||
|
||||
@@ -18,3 +18,14 @@ def test_zhipuai_embedding_query() -> None:
|
||||
embedding = ZhipuAIEmbeddings() # type: ignore[call-arg]
|
||||
res = embedding.embed_query(document)
|
||||
assert len(res) == 1024 # type: ignore[arg-type]
|
||||
|
||||
|
||||
def test_zhipuai_embedding_dimensions() -> None:
|
||||
"""Test ZhipuAI Text Embedding for query by assigning dimensions"""
|
||||
document = "This is a test query."
|
||||
embedding = ZhipuAIEmbeddings(
|
||||
model="embedding-3",
|
||||
dimensions=2048,
|
||||
) # type: ignore[call-arg]
|
||||
res = embedding.embed_query(document)
|
||||
assert len(res) == 2048 # type: ignore[arg-type]
|
||||
|
||||
@@ -8,7 +8,7 @@ from langchain_community.embeddings import BaichuanTextEmbeddings
|
||||
def test_sparkllm_initialization_by_alias() -> None:
|
||||
# Effective initialization
|
||||
embeddings = BaichuanTextEmbeddings( # type: ignore[call-arg]
|
||||
model="embedding_model", # type: ignore[arg-type]
|
||||
model="embedding_model",
|
||||
api_key="your-api-key", # type: ignore[arg-type]
|
||||
)
|
||||
assert embeddings.model_name == "embedding_model"
|
||||
|
||||
@@ -2,7 +2,7 @@ import os
|
||||
from typing import cast
|
||||
|
||||
import pytest
|
||||
from langchain_core.pydantic_v1 import SecretStr, ValidationError
|
||||
from langchain_core.pydantic_v1 import SecretStr
|
||||
|
||||
from langchain_community.embeddings import SparkLLMTextEmbeddings
|
||||
|
||||
@@ -43,5 +43,5 @@ def test_initialization_parameters_from_env() -> None:
|
||||
|
||||
# Environment variable missing
|
||||
del os.environ["SPARK_APP_ID"]
|
||||
with pytest.raises(ValidationError):
|
||||
with pytest.raises(ValueError):
|
||||
SparkLLMTextEmbeddings()
|
||||
|
||||
@@ -74,6 +74,11 @@ async def test_fake_retriever_v1_upgrade_async(
|
||||
assert callbacks.retriever_errors == 0
|
||||
|
||||
|
||||
def test_fake_retriever_v1_standard_params(fake_retriever_v1: BaseRetriever) -> None:
|
||||
ls_params = fake_retriever_v1._get_ls_params()
|
||||
assert ls_params == {"ls_retriever_name": "fakeretrieverv1"}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_retriever_v1_with_kwargs() -> BaseRetriever:
|
||||
# Test for things like the Weaviate V1 Retriever.
|
||||
@@ -213,3 +218,8 @@ async def test_fake_retriever_v2_async(
|
||||
await fake_erroring_retriever_v2.ainvoke(
|
||||
"Foo", config={"callbacks": [callbacks]}
|
||||
)
|
||||
|
||||
|
||||
def test_fake_retriever_v2_standard_params(fake_retriever_v2: BaseRetriever) -> None:
|
||||
ls_params = fake_retriever_v2._get_ls_params()
|
||||
assert ls_params == {"ls_retriever_name": "fakeretrieverv2"}
|
||||
|
||||
@@ -33,6 +33,11 @@ def test_create_client(amazon_retriever: AmazonKnowledgeBasesRetriever) -> None:
|
||||
amazon_retriever.create_client({})
|
||||
|
||||
|
||||
def test_standard_params(amazon_retriever: AmazonKnowledgeBasesRetriever) -> None:
|
||||
ls_params = amazon_retriever._get_ls_params()
|
||||
assert ls_params == {"ls_retriever_name": "amazonknowledgebases"}
|
||||
|
||||
|
||||
def test_get_relevant_documents(
|
||||
amazon_retriever: AmazonKnowledgeBasesRetriever, mock_client: MagicMock
|
||||
) -> None:
|
||||
|
||||
@@ -6,7 +6,9 @@ import pytest
|
||||
from langchain_community.tools.edenai import EdenAiTextModerationTool
|
||||
|
||||
tool = EdenAiTextModerationTool( # type: ignore[call-arg]
|
||||
providers=["openai"], language="en", edenai_api_key="fake_key"
|
||||
providers=["openai"],
|
||||
language="en",
|
||||
edenai_api_key="fake_key", # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -633,6 +633,28 @@ def test_similarity_score_threshold(index_details: dict, threshold: float) -> No
|
||||
assert len(search_result) == 0
|
||||
|
||||
|
||||
@pytest.mark.requires("databricks", "databricks.vector_search")
|
||||
def test_standard_params() -> None:
|
||||
index = mock_index(DIRECT_ACCESS_INDEX)
|
||||
vectorstore = default_databricks_vector_search(index)
|
||||
retriever = vectorstore.as_retriever()
|
||||
ls_params = retriever._get_ls_params()
|
||||
assert ls_params == {
|
||||
"ls_retriever_name": "vectorstore",
|
||||
"ls_vector_store_provider": "DatabricksVectorSearch",
|
||||
"ls_embedding_provider": "FakeEmbeddingsWithDimension",
|
||||
}
|
||||
|
||||
index = mock_index(DELTA_SYNC_INDEX_MANAGED_EMBEDDINGS)
|
||||
vectorstore = default_databricks_vector_search(index)
|
||||
retriever = vectorstore.as_retriever()
|
||||
ls_params = retriever._get_ls_params()
|
||||
assert ls_params == {
|
||||
"ls_retriever_name": "vectorstore",
|
||||
"ls_vector_store_provider": "DatabricksVectorSearch",
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.requires("databricks", "databricks.vector_search")
|
||||
@pytest.mark.parametrize(
|
||||
"index_details", [DELTA_SYNC_INDEX_SELF_MANAGED_EMBEDDINGS, DIRECT_ACCESS_INDEX]
|
||||
|
||||
@@ -49,6 +49,15 @@ def test_faiss() -> None:
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
|
||||
# Retriever standard params
|
||||
retriever = docsearch.as_retriever()
|
||||
ls_params = retriever._get_ls_params()
|
||||
assert ls_params == {
|
||||
"ls_retriever_name": "vectorstore",
|
||||
"ls_vector_store_provider": "FAISS",
|
||||
"ls_embedding_provider": "FakeEmbeddings",
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.requires("faiss")
|
||||
async def test_faiss_afrom_texts() -> None:
|
||||
|
||||
@@ -30,7 +30,8 @@ class LangChainPendingDeprecationWarning(PendingDeprecationWarning):
|
||||
# PUBLIC API
|
||||
|
||||
|
||||
T = TypeVar("T", bound=Union[Type, Callable[..., Any]])
|
||||
# Last Any should be FieldInfoV1 but this leads to circular imports
|
||||
T = TypeVar("T", bound=Union[Type, Callable[..., Any], Any])
|
||||
|
||||
|
||||
def _validate_deprecation_params(
|
||||
@@ -133,7 +134,7 @@ def deprecated(
|
||||
_package: str = package,
|
||||
) -> T:
|
||||
"""Implementation of the decorator returned by `deprecated`."""
|
||||
from pydantic.v1.fields import FieldInfo # pydantic: ignore
|
||||
from langchain_core.utils.pydantic import FieldInfoV1
|
||||
|
||||
def emit_warning() -> None:
|
||||
"""Emit the warning."""
|
||||
@@ -208,9 +209,7 @@ def deprecated(
|
||||
)
|
||||
return cast(T, obj)
|
||||
|
||||
elif isinstance(obj, FieldInfo):
|
||||
from langchain_core.pydantic_v1 import Field
|
||||
|
||||
elif isinstance(obj, FieldInfoV1):
|
||||
wrapped = None
|
||||
if not _obj_type:
|
||||
_obj_type = "attribute"
|
||||
@@ -219,58 +218,64 @@ def deprecated(
|
||||
old_doc = obj.description
|
||||
|
||||
def finalize(wrapper: Callable[..., Any], new_doc: str) -> T:
|
||||
return Field(
|
||||
default=obj.default,
|
||||
default_factory=obj.default_factory,
|
||||
description=new_doc,
|
||||
alias=obj.alias,
|
||||
exclude=obj.exclude,
|
||||
return cast(
|
||||
T,
|
||||
FieldInfoV1(
|
||||
default=obj.default,
|
||||
default_factory=obj.default_factory,
|
||||
description=new_doc,
|
||||
alias=obj.alias,
|
||||
exclude=obj.exclude,
|
||||
),
|
||||
)
|
||||
|
||||
elif isinstance(obj, property):
|
||||
if not _obj_type:
|
||||
_obj_type = "attribute"
|
||||
wrapped = None
|
||||
_name = _name or obj.fget.__qualname__
|
||||
_name = _name or cast(Union[Type, Callable], obj.fget).__qualname__
|
||||
old_doc = obj.__doc__
|
||||
|
||||
class _deprecated_property(property):
|
||||
"""A deprecated property."""
|
||||
|
||||
def __init__(self, fget=None, fset=None, fdel=None, doc=None):
|
||||
def __init__(self, fget=None, fset=None, fdel=None, doc=None): # type: ignore[no-untyped-def]
|
||||
super().__init__(fget, fset, fdel, doc)
|
||||
self.__orig_fget = fget
|
||||
self.__orig_fset = fset
|
||||
self.__orig_fdel = fdel
|
||||
|
||||
def __get__(self, instance, owner=None):
|
||||
def __get__(self, instance, owner=None): # type: ignore[no-untyped-def]
|
||||
if instance is not None or owner is not None:
|
||||
emit_warning()
|
||||
return self.fget(instance)
|
||||
|
||||
def __set__(self, instance, value):
|
||||
def __set__(self, instance, value): # type: ignore[no-untyped-def]
|
||||
if instance is not None:
|
||||
emit_warning()
|
||||
return self.fset(instance, value)
|
||||
|
||||
def __delete__(self, instance):
|
||||
def __delete__(self, instance): # type: ignore[no-untyped-def]
|
||||
if instance is not None:
|
||||
emit_warning()
|
||||
return self.fdel(instance)
|
||||
|
||||
def __set_name__(self, owner, set_name):
|
||||
def __set_name__(self, owner, set_name): # type: ignore[no-untyped-def]
|
||||
nonlocal _name
|
||||
if _name == "<lambda>":
|
||||
_name = set_name
|
||||
|
||||
def finalize(wrapper: Callable[..., Any], new_doc: str) -> Any:
|
||||
def finalize(wrapper: Callable[..., Any], new_doc: str) -> T:
|
||||
"""Finalize the property."""
|
||||
return _deprecated_property(
|
||||
fget=obj.fget, fset=obj.fset, fdel=obj.fdel, doc=new_doc
|
||||
return cast(
|
||||
T,
|
||||
_deprecated_property(
|
||||
fget=obj.fget, fset=obj.fset, fdel=obj.fdel, doc=new_doc
|
||||
),
|
||||
)
|
||||
|
||||
else:
|
||||
_name = _name or obj.__qualname__
|
||||
_name = _name or cast(Union[Type, Callable], obj).__qualname__
|
||||
if not _obj_type:
|
||||
# edge case: when a function is within another function
|
||||
# within a test, this will call it a "method" not a "function"
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user