mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-03 03:59:42 +00:00
core[patch], langchain[patch]: ByteStore (#14312)
This commit is contained in:
@@ -9,7 +9,7 @@ SCRIPT_DIR="$(cd "$(dirname "$0")"; pwd)"
|
|||||||
cd "${SCRIPT_DIR}"
|
cd "${SCRIPT_DIR}"
|
||||||
|
|
||||||
mkdir -p ../_dist
|
mkdir -p ../_dist
|
||||||
rsync -ruv --exclude node_modules . ../_dist
|
rsync -ruv --exclude node_modules --exclude api_reference --exclude .venv --exclude .docusaurus . ../_dist
|
||||||
cd ../_dist
|
cd ../_dist
|
||||||
poetry run python scripts/model_feat_table.py
|
poetry run python scripts/model_feat_table.py
|
||||||
cp ../cookbook/README.md src/pages/cookbook.mdx
|
cp ../cookbook/README.md src/pages/cookbook.mdx
|
||||||
|
@@ -17,6 +17,24 @@ Install the Python SDK:
|
|||||||
pip install redis
|
pip install redis
|
||||||
```
|
```
|
||||||
|
|
||||||
|
To run Redis locally, you can use Docker:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --name langchain-redis -d -p 6379:6379 redis redis-server --save 60 1 --loglevel warning
|
||||||
|
```
|
||||||
|
|
||||||
|
To stop the container:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker stop langchain-redis
|
||||||
|
```
|
||||||
|
|
||||||
|
And to start it again:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker start langchain-redis
|
||||||
|
```
|
||||||
|
|
||||||
## Wrappers
|
## Wrappers
|
||||||
|
|
||||||
All wrappers need a redis url connection string to connect to the database support either a stand alone Redis server
|
All wrappers need a redis url connection string to connect to the database support either a stand alone Redis server
|
||||||
|
100
docs/docs/integrations/stores/file_system.ipynb
Normal file
100
docs/docs/integrations/stores/file_system.ipynb
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "raw",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"---\n",
|
||||||
|
"sidebar_label: Local Filesystem\n",
|
||||||
|
"sidebar_position: 3\n",
|
||||||
|
"---"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# LocalFileStore\n",
|
||||||
|
"\n",
|
||||||
|
"The `LocalFileStore` is a persistent implementation of `ByteStore` that stores everything in a folder of your choosing."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[b'v1', b'v2']\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from pathlib import Path\n",
|
||||||
|
"\n",
|
||||||
|
"from langchain.storage import LocalFileStore\n",
|
||||||
|
"\n",
|
||||||
|
"root_path = Path.cwd() / \"data\" # can also be a path set by a string\n",
|
||||||
|
"store = LocalFileStore(root_path)\n",
|
||||||
|
"\n",
|
||||||
|
"store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n",
|
||||||
|
"print(store.mget([\"k1\", \"k2\"]))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now let's see which files exist in our `data` folder:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"k1 k2\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"!ls {root_path}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": ".venv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
73
docs/docs/integrations/stores/in_memory.ipynb
Normal file
73
docs/docs/integrations/stores/in_memory.ipynb
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "raw",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"---\n",
|
||||||
|
"sidebar_label: In Memory\n",
|
||||||
|
"sidebar_position: 2\n",
|
||||||
|
"---"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# InMemoryByteStore\n",
|
||||||
|
"\n",
|
||||||
|
"The `InMemoryByteStore` is a non-persistent implementation of `ByteStore` that stores everything in a Python dictionary."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[b'v1', b'v2']\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from langchain.storage import InMemoryByteStore\n",
|
||||||
|
"\n",
|
||||||
|
"store = InMemoryByteStore()\n",
|
||||||
|
"\n",
|
||||||
|
"store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n",
|
||||||
|
"print(store.mget([\"k1\", \"k2\"]))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": ".venv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
29
docs/docs/integrations/stores/index.mdx
Normal file
29
docs/docs/integrations/stores/index.mdx
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
---
|
||||||
|
sidebar_position: 1
|
||||||
|
sidebar_class_name: hidden
|
||||||
|
---
|
||||||
|
|
||||||
|
# Stores
|
||||||
|
|
||||||
|
In many different applications, having some sort of key-value storage is helpful.
|
||||||
|
In this section, we will look at a few different ways to store key-value pairs
|
||||||
|
using implementations of the `ByteStore` interface.
|
||||||
|
|
||||||
|
## Features (natively supported)
|
||||||
|
|
||||||
|
All `ByteStore`s support the following functions, which are used for modifying
|
||||||
|
**m**ultiple key-value pairs at once:
|
||||||
|
|
||||||
|
- `mget(key: Sequence[str]) -> List[Optional[bytes]]`: get the contents of multiple keys, returning `None` if the key does not exist
|
||||||
|
- `mset(key_value_pairs: Sequence[Tuple[str, bytes]]) -> None`: set the contents of multiple keys
|
||||||
|
- `mdelete(key: Sequence[str]) -> None`: delete multiple keys
|
||||||
|
- `yield_keys(prefix: Optional[str] = None) -> Iterator[str]`: yield all keys in the store, optionally filtering by a prefix
|
||||||
|
|
||||||
|
## How to pick one
|
||||||
|
|
||||||
|
`ByteStore`s are designed to be interchangeable. By default, most dependent integrations
|
||||||
|
use the `InMemoryByteStore`, which is a simple in-memory key-value store.
|
||||||
|
|
||||||
|
However, if you start having other requirements, like massive scalability or persistence,
|
||||||
|
you can swap out the `ByteStore` implementation with one of the other ones documented
|
||||||
|
in this section.
|
83
docs/docs/integrations/stores/redis.ipynb
Normal file
83
docs/docs/integrations/stores/redis.ipynb
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "raw",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"---\n",
|
||||||
|
"sidebar_label: Redis\n",
|
||||||
|
"---"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# RedisStore\n",
|
||||||
|
"\n",
|
||||||
|
"The `RedisStore` is an implementation of `ByteStore` that stores everything in your Redis instance.\n",
|
||||||
|
"\n",
|
||||||
|
"To configure Redis, follow our [Redis guide](/docs/integrations/providers/redis)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"!pip install redis"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[b'v1', b'v2']\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from langchain.storage import RedisStore\n",
|
||||||
|
"\n",
|
||||||
|
"store = RedisStore(redis_url=\"redis://localhost:6379\")\n",
|
||||||
|
"\n",
|
||||||
|
"store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n",
|
||||||
|
"print(store.mget([\"k1\", \"k2\"]))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": ".venv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
90
docs/docs/integrations/stores/upstash_redis.ipynb
Normal file
90
docs/docs/integrations/stores/upstash_redis.ipynb
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "raw",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"---\n",
|
||||||
|
"sidebar_label: Upstash Redis\n",
|
||||||
|
"---"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# UpstashRedisByteStore\n",
|
||||||
|
"\n",
|
||||||
|
"The `UpstashRedisStore` is an implementation of `ByteStore` that stores everything in your Upstash-hosted Redis instance.\n",
|
||||||
|
"\n",
|
||||||
|
"To use the base `RedisStore` instead, see [this guide](./redis)\n",
|
||||||
|
"\n",
|
||||||
|
"To configure Upstash Redis, follow our [Upstash guide](/docs/integrations/providers/upstash)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"!pip install upstash-redis"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[b'v1', b'v2']\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from langchain.storage import UpstashRedisByteStore\n",
|
||||||
|
"from upstash_redis import Redis\n",
|
||||||
|
"\n",
|
||||||
|
"URL = \"<UPSTASH_REDIS_REST_URL>\"\n",
|
||||||
|
"TOKEN = \"<UPSTASH_REDIS_REST_TOKEN>\"\n",
|
||||||
|
"\n",
|
||||||
|
"redis_client = Redis(url=URL, token=TOKEN)\n",
|
||||||
|
"store = UpstashRedisByteStore(client=redis_client, ttl=None, namespace=\"test-ns\")\n",
|
||||||
|
"\n",
|
||||||
|
"store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n",
|
||||||
|
"print(store.mget([\"k1\", \"k2\"]))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": ".venv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
@@ -1,11 +1,21 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "raw",
|
||||||
|
"id": "8baf0f21",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"--\n",
|
||||||
|
"sidebar_label: Caching\n",
|
||||||
|
"--"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "bf4061ce",
|
"id": "bf4061ce",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Caching\n",
|
"# CacheBackedEmbeddings\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Embeddings can be stored or temporarily cached to avoid needing to recompute them.\n",
|
"Embeddings can be stored or temporarily cached to avoid needing to recompute them.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -15,7 +25,7 @@
|
|||||||
"The main supported way to initialized a `CacheBackedEmbeddings` is `from_bytes_store`. This takes in the following parameters:\n",
|
"The main supported way to initialized a `CacheBackedEmbeddings` is `from_bytes_store`. This takes in the following parameters:\n",
|
||||||
"\n",
|
"\n",
|
||||||
"- underlying_embedder: The embedder to use for embedding.\n",
|
"- underlying_embedder: The embedder to use for embedding.\n",
|
||||||
"- document_embedding_cache: The cache to use for storing document embeddings.\n",
|
"- document_embedding_cache: Any [`ByteStore`](/docs/integrations/stores/) for caching document embeddings.\n",
|
||||||
"- namespace: (optional, defaults to `\"\"`) The namespace to use for document cache. This namespace is used to avoid collisions with other caches. For example, set it to the name of the embedding model used.\n",
|
"- namespace: (optional, defaults to `\"\"`) The namespace to use for document cache. This namespace is used to avoid collisions with other caches. For example, set it to the name of the embedding model used.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Attention**: Be sure to set the `namespace` parameter to avoid collisions of the same text embedded using different embeddings models."
|
"**Attention**: Be sure to set the `namespace` parameter to avoid collisions of the same text embedded using different embeddings models."
|
||||||
@@ -23,20 +33,14 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 1,
|
||||||
"id": "a463c3c2-749b-40d1-a433-84f68a1cd1c7",
|
"id": "a463c3c2-749b-40d1-a433-84f68a1cd1c7",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.embeddings import CacheBackedEmbeddings, OpenAIEmbeddings\n",
|
"from langchain.embeddings import CacheBackedEmbeddings"
|
||||||
"from langchain.storage import (\n",
|
|
||||||
" InMemoryStore,\n",
|
|
||||||
" LocalFileStore,\n",
|
|
||||||
" RedisStore,\n",
|
|
||||||
" UpstashRedisStore,\n",
|
|
||||||
")"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -44,7 +48,7 @@
|
|||||||
"id": "9ddf07dd-3e72-41de-99d4-78e9521e272f",
|
"id": "9ddf07dd-3e72-41de-99d4-78e9521e272f",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Using with a vector store\n",
|
"## Using with a Vector Store\n",
|
||||||
"\n",
|
"\n",
|
||||||
"First, let's see an example that uses the local file system for storing embeddings and uses FAISS vector store for retrieval."
|
"First, let's see an example that uses the local file system for storing embeddings and uses FAISS vector store for retrieval."
|
||||||
]
|
]
|
||||||
@@ -52,36 +56,32 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
|
"id": "50183825",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"!pip install openai faiss-cpu"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
"id": "9e4314d8-88ef-4f52-81ae-0be771168bb6",
|
"id": "9e4314d8-88ef-4f52-81ae-0be771168bb6",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from langchain.document_loaders import TextLoader\n",
|
"from langchain.document_loaders import TextLoader\n",
|
||||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||||
"from langchain.text_splitter import CharacterTextSplitter"
|
"from langchain.storage import LocalFileStore\n",
|
||||||
]
|
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||||
},
|
"from langchain.vectorstores import FAISS\n",
|
||||||
{
|
"\n",
|
||||||
"cell_type": "code",
|
"underlying_embeddings = OpenAIEmbeddings()\n",
|
||||||
"execution_count": null,
|
"\n",
|
||||||
"id": "3e751f26-9b5b-4c10-843a-d784b5ea8538",
|
"store = LocalFileStore(\"./cache/\")\n",
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"underlying_embeddings = OpenAIEmbeddings()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "30743664-38f5-425d-8216-772b64e7f348",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"fs = LocalFileStore(\"./cache/\")\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"cached_embedder = CacheBackedEmbeddings.from_bytes_store(\n",
|
"cached_embedder = CacheBackedEmbeddings.from_bytes_store(\n",
|
||||||
" underlying_embeddings, fs, namespace=underlying_embeddings.model\n",
|
" underlying_embeddings, store, namespace=underlying_embeddings.model\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -95,7 +95,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 4,
|
||||||
"id": "f9ad627f-ced2-4277-b336-2434f22f2c8a",
|
"id": "f9ad627f-ced2-4277-b336-2434f22f2c8a",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -105,13 +105,13 @@
|
|||||||
"[]"
|
"[]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 9,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"list(fs.yield_keys())"
|
"list(store.yield_keys())"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -124,12 +124,12 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 5,
|
||||||
"id": "cf958ac2-e60e-4668-b32c-8bb2d78b3c61",
|
"id": "cf958ac2-e60e-4668-b32c-8bb2d78b3c61",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"raw_documents = TextLoader(\"../state_of_the_union.txt\").load()\n",
|
"raw_documents = TextLoader(\"../../state_of_the_union.txt\").load()\n",
|
||||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||||
"documents = text_splitter.split_documents(raw_documents)"
|
"documents = text_splitter.split_documents(raw_documents)"
|
||||||
]
|
]
|
||||||
@@ -144,7 +144,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 6,
|
||||||
"id": "3a1d7bb8-3b72-4bb5-9013-cf7729caca61",
|
"id": "3a1d7bb8-3b72-4bb5-9013-cf7729caca61",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -152,8 +152,8 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"CPU times: user 608 ms, sys: 58.9 ms, total: 667 ms\n",
|
"CPU times: user 218 ms, sys: 29.7 ms, total: 248 ms\n",
|
||||||
"Wall time: 1.3 s\n"
|
"Wall time: 1.02 s\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -172,7 +172,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 7,
|
||||||
"id": "714cb2e2-77ba-41a8-bb83-84e75342af2d",
|
"id": "714cb2e2-77ba-41a8-bb83-84e75342af2d",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -180,8 +180,8 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"CPU times: user 33.6 ms, sys: 3.96 ms, total: 37.6 ms\n",
|
"CPU times: user 15.7 ms, sys: 2.22 ms, total: 18 ms\n",
|
||||||
"Wall time: 36.8 ms\n"
|
"Wall time: 17.2 ms\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -200,458 +200,55 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 8,
|
||||||
"id": "f2ca32dd-3712-4093-942b-4122f3dc8a8e",
|
"id": "f2ca32dd-3712-4093-942b-4122f3dc8a8e",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"['text-embedding-ada-002614d7cf6-46f1-52fa-9d3a-740c39e7a20e',\n",
|
"['text-embedding-ada-00217a6727d-8916-54eb-b196-ec9c9d6ca472',\n",
|
||||||
" 'text-embedding-ada-0020fc1ede2-407a-5e14-8f8f-5642214263f5',\n",
|
" 'text-embedding-ada-0025fc0d904-bd80-52da-95c9-441015bfb438',\n",
|
||||||
" 'text-embedding-ada-002e4ad20ef-dfaa-5916-9459-f90c6d8e8159',\n",
|
" 'text-embedding-ada-002e4ad20ef-dfaa-5916-9459-f90c6d8e8159',\n",
|
||||||
" 'text-embedding-ada-002a5ef11e4-0474-5725-8d80-81c91943b37f',\n",
|
" 'text-embedding-ada-002ed199159-c1cd-5597-9757-f80498e8f17b',\n",
|
||||||
" 'text-embedding-ada-00281426526-23fe-58be-9e84-6c7c72c8ca9a']"
|
" 'text-embedding-ada-0021297d37a-2bc1-5e19-bf13-6c950f075062']"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 13,
|
"execution_count": 8,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"list(fs.yield_keys())[:5]"
|
"list(store.yield_keys())[:5]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "564c9801-29f0-4452-aeac-527382e2c0e8",
|
"id": "c1a7fafd",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## In Memory\n",
|
"# Swapping the `ByteStore`\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This section shows how to set up an in memory cache for embeddings. This type of cache is primarily \n",
|
"In order to use a different `ByteStore`, just use it when creating your `CacheBackedEmbeddings`. Below, we create an equivalent cached embeddings object, except using the non-persistent `InMemoryByteStore` instead:"
|
||||||
"useful for unit tests or prototyping. Do **not** use this cache if you need to actually store the embeddings."
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 9,
|
||||||
"id": "13bd1c5b-b7ba-4394-957c-7d5b5a841972",
|
"id": "336a0538",
|
||||||
"metadata": {
|
"metadata": {},
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"store = InMemoryStore()"
|
"from langchain.embeddings import CacheBackedEmbeddings\n",
|
||||||
]
|
"from langchain.storage import InMemoryByteStore\n",
|
||||||
},
|
"\n",
|
||||||
{
|
"store = InMemoryByteStore()\n",
|
||||||
"cell_type": "code",
|
"\n",
|
||||||
"execution_count": null,
|
"cached_embedder = CacheBackedEmbeddings.from_bytes_store(\n",
|
||||||
"id": "9d99885f-99e1-498c-904d-6db539ac9466",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"underlying_embeddings = OpenAIEmbeddings()\n",
|
|
||||||
"embedder = CacheBackedEmbeddings.from_bytes_store(\n",
|
|
||||||
" underlying_embeddings, store, namespace=underlying_embeddings.model\n",
|
" underlying_embeddings, store, namespace=underlying_embeddings.model\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "682eb5d4-0b7a-4dac-b8fb-3de4ca6e421c",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"CPU times: user 10.9 ms, sys: 916 µs, total: 11.8 ms\n",
|
|
||||||
"Wall time: 159 ms\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"embeddings = embedder.embed_documents([\"hello\", \"goodbye\"])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "95233026-147f-49d1-bd87-e1e8b88ebdbc",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The second time we try to embed the embedding time is only 2 ms because the embeddings are looked up in the cache."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "f819c3ff-a212-4d06-a5f7-5eb1435c1feb",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"CPU times: user 1.67 ms, sys: 342 µs, total: 2.01 ms\n",
|
|
||||||
"Wall time: 2.01 ms\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"embeddings_from_cache = embedder.embed_documents([\"hello\", \"goodbye\"])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "ec38fb72-90a9-4687-a483-c62c87d1f4dd",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"True"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 18,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"embeddings == embeddings_from_cache"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "f6cbe100-8587-4830-b207-fb8b524a9854",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## File system\n",
|
|
||||||
"\n",
|
|
||||||
"This section covers how to use a file system store."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "a0070271-0809-4528-97e0-2a88216846f3",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"fs = LocalFileStore(\"./test_cache/\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "0b20e9fe-f57f-4d7c-9f81-105c5f8726f4",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"embedder2 = CacheBackedEmbeddings.from_bytes_store(\n",
|
|
||||||
" underlying_embeddings, fs, namespace=underlying_embeddings.model\n",
|
|
||||||
")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "630515fd-bf5c-4d9c-a404-9705308f3a2c",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"CPU times: user 6.89 ms, sys: 4.89 ms, total: 11.8 ms\n",
|
|
||||||
"Wall time: 184 ms\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"embeddings = embedder2.embed_documents([\"hello\", \"goodbye\"])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "30e6bb87-42c9-4d08-88ac-0d22c9c449a1",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"CPU times: user 0 ns, sys: 3.24 ms, total: 3.24 ms\n",
|
|
||||||
"Wall time: 2.84 ms\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"embeddings = embedder2.embed_documents([\"hello\", \"goodbye\"])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "12ed5a45-8352-4e0f-8583-5537397f53c0",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Here are the embeddings that have been persisted to the directory `./test_cache`. \n",
|
|
||||||
"\n",
|
|
||||||
"Notice that the embedder takes a namespace parameter."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "658e2914-05e9-44a3-a8fe-3fe17ca84039",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"['text-embedding-ada-002e885db5b-c0bd-5fbc-88b1-4d1da6020aa5',\n",
|
|
||||||
" 'text-embedding-ada-0026ba52e44-59c9-5cc9-a084-284061b13c80']"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 23,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"list(fs.yield_keys())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "904c1d47",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Upstash Redis Store"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "d0f9f212",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from langchain.storage.upstash_redis import UpstashRedisStore"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "45bf62e4",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from upstash_redis import Redis\n",
|
|
||||||
"\n",
|
|
||||||
"URL = \"<UPSTASH_REDIS_REST_URL>\"\n",
|
|
||||||
"TOKEN = \"<UPSTASH_REDIS_REST_TOKEN>\"\n",
|
|
||||||
"\n",
|
|
||||||
"redis_client = Redis(url=URL, token=TOKEN)\n",
|
|
||||||
"store = UpstashRedisStore(client=redis_client, ttl=None, namespace=\"test-ns\")\n",
|
|
||||||
"\n",
|
|
||||||
"underlying_embeddings = OpenAIEmbeddings()\n",
|
|
||||||
"embedder = CacheBackedEmbeddings.from_bytes_store(\n",
|
|
||||||
" underlying_embeddings, store, namespace=underlying_embeddings.model\n",
|
|
||||||
")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "3eac3504",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"embeddings = embedder.embed_documents([\"welcome\", \"goodbye\"])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "085dcd30",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"embeddings = embedder.embed_documents([\"welcome\", \"goodbye\"])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "3570e83f",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"list(store.yield_keys())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "d7dc8e51",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"list(store.client.scan(0))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "cd5f5a96-6ffa-429d-aa82-00b3f6532871",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Redis Store\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "4879c134-141f-48a0-acfe-7d6f30253af0",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from langchain.storage import RedisStore"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "8b2bb9a0-6549-4487-8532-29ab4ab7336f",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# For cache isolation can use a separate DB\n",
|
|
||||||
"# Or additional namepace\n",
|
|
||||||
"store = RedisStore(\n",
|
|
||||||
" redis_url=\"redis://localhost:6379\",\n",
|
|
||||||
" client_kwargs={\"db\": 2},\n",
|
|
||||||
" namespace=\"embedding_caches\",\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"underlying_embeddings = OpenAIEmbeddings()\n",
|
|
||||||
"embedder = CacheBackedEmbeddings.from_bytes_store(\n",
|
|
||||||
" underlying_embeddings, store, namespace=underlying_embeddings.model\n",
|
|
||||||
")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "eca3cb99-2bb3-49d5-81f9-1dee03da4b8c",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"CPU times: user 3.99 ms, sys: 0 ns, total: 3.99 ms\n",
|
|
||||||
"Wall time: 3.5 ms\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"embeddings = embedder.embed_documents([\"hello\", \"goodbye\"])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "317ba5d8-89f9-462c-b807-ad4ef26e518b",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"CPU times: user 2.47 ms, sys: 767 µs, total: 3.24 ms\n",
|
|
||||||
"Wall time: 2.75 ms\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"embeddings = embedder.embed_documents([\"hello\", \"goodbye\"])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "8a540317-5142-4491-9062-a097932b56e3",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"['text-embedding-ada-002e885db5b-c0bd-5fbc-88b1-4d1da6020aa5',\n",
|
|
||||||
" 'text-embedding-ada-0026ba52e44-59c9-5cc9-a084-284061b13c80']"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 16,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"list(store.yield_keys())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "cd9b0d4a-f816-4dce-9dde-cde1ad9a65fb",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[b'embedding_caches/text-embedding-ada-002e885db5b-c0bd-5fbc-88b1-4d1da6020aa5',\n",
|
|
||||||
" b'embedding_caches/text-embedding-ada-0026ba52e44-59c9-5cc9-a084-284061b13c80']"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 17,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"list(store.client.scan_iter())"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@@ -670,7 +267,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.11.3"
|
"version": "3.11.4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
@@ -111,6 +111,7 @@ module.exports = {
|
|||||||
{ type: "category", label: "Callbacks", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/callbacks" }], link: {type: "generated-index", slug: "integrations/callbacks" }},
|
{ type: "category", label: "Callbacks", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/callbacks" }], link: {type: "generated-index", slug: "integrations/callbacks" }},
|
||||||
{ type: "category", label: "Chat loaders", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/chat_loaders" }], link: {type: "generated-index", slug: "integrations/chat_loaders" }},
|
{ type: "category", label: "Chat loaders", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/chat_loaders" }], link: {type: "generated-index", slug: "integrations/chat_loaders" }},
|
||||||
{ type: "category", label: "Adapters", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/adapters" }], link: {type: "generated-index", slug: "integrations/adapters" }},
|
{ type: "category", label: "Adapters", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/adapters" }], link: {type: "generated-index", slug: "integrations/adapters" }},
|
||||||
|
{ type: "category", label: "Stores", collapsed: true, items: [{type: "autogenerated", dirName: "integrations/stores" }], link: {type: "doc", id: "integrations/stores/index" }},
|
||||||
],
|
],
|
||||||
link: {
|
link: {
|
||||||
type: 'generated-index',
|
type: 'generated-index',
|
||||||
|
@@ -51,3 +51,6 @@ class BaseStore(Generic[K, V], ABC):
|
|||||||
This method is allowed to return an iterator over either K or str
|
This method is allowed to return an iterator over either K or str
|
||||||
depending on what makes more sense for the given store.
|
depending on what makes more sense for the given store.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
ByteStore = BaseStore[str, bytes]
|
||||||
|
@@ -15,7 +15,7 @@ from functools import partial
|
|||||||
from typing import Callable, List, Sequence, Union, cast
|
from typing import Callable, List, Sequence, Union, cast
|
||||||
|
|
||||||
from langchain_core.embeddings import Embeddings
|
from langchain_core.embeddings import Embeddings
|
||||||
from langchain_core.stores import BaseStore
|
from langchain_core.stores import BaseStore, ByteStore
|
||||||
|
|
||||||
from langchain.storage.encoder_backed import EncoderBackedStore
|
from langchain.storage.encoder_backed import EncoderBackedStore
|
||||||
|
|
||||||
@@ -151,7 +151,7 @@ class CacheBackedEmbeddings(Embeddings):
|
|||||||
def from_bytes_store(
|
def from_bytes_store(
|
||||||
cls,
|
cls,
|
||||||
underlying_embeddings: Embeddings,
|
underlying_embeddings: Embeddings,
|
||||||
document_embedding_cache: BaseStore[str, bytes],
|
document_embedding_cache: ByteStore,
|
||||||
*,
|
*,
|
||||||
namespace: str = "",
|
namespace: str = "",
|
||||||
) -> CacheBackedEmbeddings:
|
) -> CacheBackedEmbeddings:
|
||||||
|
@@ -3,7 +3,7 @@ from typing import List, Optional
|
|||||||
|
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
from langchain_core.retrievers import BaseRetriever
|
from langchain_core.retrievers import BaseRetriever
|
||||||
from langchain_core.stores import BaseStore
|
from langchain_core.stores import BaseStore, ByteStore
|
||||||
from langchain_core.vectorstores import VectorStore
|
from langchain_core.vectorstores import VectorStore
|
||||||
|
|
||||||
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
|
||||||
@@ -38,7 +38,7 @@ class MultiVectorRetriever(BaseRetriever):
|
|||||||
*,
|
*,
|
||||||
vectorstore: VectorStore,
|
vectorstore: VectorStore,
|
||||||
docstore: Optional[BaseStore[str, Document]] = None,
|
docstore: Optional[BaseStore[str, Document]] = None,
|
||||||
base_store: Optional[BaseStore[str, bytes]] = None,
|
base_store: Optional[ByteStore] = None,
|
||||||
id_key: str = "doc_id",
|
id_key: str = "doc_id",
|
||||||
search_kwargs: Optional[dict] = None,
|
search_kwargs: Optional[dict] = None,
|
||||||
search_type: SearchType = SearchType.similarity,
|
search_type: SearchType = SearchType.similarity,
|
||||||
|
@@ -9,16 +9,18 @@ The primary goal of these storages is to support implementation of caching.
|
|||||||
from langchain.storage._lc_store import create_kv_docstore, create_lc_store
|
from langchain.storage._lc_store import create_kv_docstore, create_lc_store
|
||||||
from langchain.storage.encoder_backed import EncoderBackedStore
|
from langchain.storage.encoder_backed import EncoderBackedStore
|
||||||
from langchain.storage.file_system import LocalFileStore
|
from langchain.storage.file_system import LocalFileStore
|
||||||
from langchain.storage.in_memory import InMemoryStore
|
from langchain.storage.in_memory import InMemoryByteStore, InMemoryStore
|
||||||
from langchain.storage.redis import RedisStore
|
from langchain.storage.redis import RedisStore
|
||||||
from langchain.storage.upstash_redis import UpstashRedisStore
|
from langchain.storage.upstash_redis import UpstashRedisByteStore, UpstashRedisStore
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"EncoderBackedStore",
|
"EncoderBackedStore",
|
||||||
"InMemoryStore",
|
"InMemoryStore",
|
||||||
|
"InMemoryByteStore",
|
||||||
"LocalFileStore",
|
"LocalFileStore",
|
||||||
"RedisStore",
|
"RedisStore",
|
||||||
"create_lc_store",
|
"create_lc_store",
|
||||||
"create_kv_docstore",
|
"create_kv_docstore",
|
||||||
|
"UpstashRedisByteStore",
|
||||||
"UpstashRedisStore",
|
"UpstashRedisStore",
|
||||||
]
|
]
|
||||||
|
@@ -3,7 +3,7 @@ from typing import Callable, Optional
|
|||||||
|
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
from langchain_core.load import Serializable, dumps, loads
|
from langchain_core.load import Serializable, dumps, loads
|
||||||
from langchain_core.stores import BaseStore
|
from langchain_core.stores import BaseStore, ByteStore
|
||||||
|
|
||||||
from langchain.storage.encoder_backed import EncoderBackedStore
|
from langchain.storage.encoder_backed import EncoderBackedStore
|
||||||
|
|
||||||
@@ -42,7 +42,7 @@ def _identity(x: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def create_lc_store(
|
def create_lc_store(
|
||||||
store: BaseStore[str, bytes],
|
store: ByteStore,
|
||||||
*,
|
*,
|
||||||
key_encoder: Optional[Callable[[str], str]] = None,
|
key_encoder: Optional[Callable[[str], str]] = None,
|
||||||
) -> BaseStore[str, Serializable]:
|
) -> BaseStore[str, Serializable]:
|
||||||
@@ -64,7 +64,7 @@ def create_lc_store(
|
|||||||
|
|
||||||
|
|
||||||
def create_kv_docstore(
|
def create_kv_docstore(
|
||||||
store: BaseStore[str, bytes],
|
store: ByteStore,
|
||||||
*,
|
*,
|
||||||
key_encoder: Optional[Callable[[str], str]] = None,
|
key_encoder: Optional[Callable[[str], str]] = None,
|
||||||
) -> BaseStore[str, Document]:
|
) -> BaseStore[str, Document]:
|
||||||
|
@@ -2,12 +2,12 @@ import re
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterator, List, Optional, Sequence, Tuple, Union
|
from typing import Iterator, List, Optional, Sequence, Tuple, Union
|
||||||
|
|
||||||
from langchain_core.stores import BaseStore
|
from langchain_core.stores import ByteStore
|
||||||
|
|
||||||
from langchain.storage.exceptions import InvalidKeyException
|
from langchain.storage.exceptions import InvalidKeyException
|
||||||
|
|
||||||
|
|
||||||
class LocalFileStore(BaseStore[str, bytes]):
|
class LocalFileStore(ByteStore):
|
||||||
"""BaseStore interface that works on the local file system.
|
"""BaseStore interface that works on the local file system.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
@@ -3,12 +3,24 @@
|
|||||||
This is a simple implementation of the BaseStore using a dictionary that is useful
|
This is a simple implementation of the BaseStore using a dictionary that is useful
|
||||||
primarily for unit testing purposes.
|
primarily for unit testing purposes.
|
||||||
"""
|
"""
|
||||||
from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple
|
from typing import (
|
||||||
|
Any,
|
||||||
|
Dict,
|
||||||
|
Generic,
|
||||||
|
Iterator,
|
||||||
|
List,
|
||||||
|
Optional,
|
||||||
|
Sequence,
|
||||||
|
Tuple,
|
||||||
|
TypeVar,
|
||||||
|
)
|
||||||
|
|
||||||
from langchain_core.stores import BaseStore
|
from langchain_core.stores import BaseStore
|
||||||
|
|
||||||
|
V = TypeVar("V")
|
||||||
|
|
||||||
class InMemoryStore(BaseStore[str, Any]):
|
|
||||||
|
class InMemoryBaseStore(BaseStore[str, V], Generic[V]):
|
||||||
"""In-memory implementation of the BaseStore using a dictionary.
|
"""In-memory implementation of the BaseStore using a dictionary.
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
@@ -34,9 +46,9 @@ class InMemoryStore(BaseStore[str, Any]):
|
|||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
"""Initialize an empty store."""
|
"""Initialize an empty store."""
|
||||||
self.store: Dict[str, Any] = {}
|
self.store: Dict[str, V] = {}
|
||||||
|
|
||||||
def mget(self, keys: Sequence[str]) -> List[Optional[Any]]:
|
def mget(self, keys: Sequence[str]) -> List[Optional[V]]:
|
||||||
"""Get the values associated with the given keys.
|
"""Get the values associated with the given keys.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -48,7 +60,7 @@ class InMemoryStore(BaseStore[str, Any]):
|
|||||||
"""
|
"""
|
||||||
return [self.store.get(key) for key in keys]
|
return [self.store.get(key) for key in keys]
|
||||||
|
|
||||||
def mset(self, key_value_pairs: Sequence[Tuple[str, Any]]) -> None:
|
def mset(self, key_value_pairs: Sequence[Tuple[str, V]]) -> None:
|
||||||
"""Set the values for the given keys.
|
"""Set the values for the given keys.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -67,7 +79,8 @@ class InMemoryStore(BaseStore[str, Any]):
|
|||||||
keys (Sequence[str]): A sequence of keys to delete.
|
keys (Sequence[str]): A sequence of keys to delete.
|
||||||
"""
|
"""
|
||||||
for key in keys:
|
for key in keys:
|
||||||
self.store.pop(key, None)
|
if key in self.store:
|
||||||
|
del self.store[key]
|
||||||
|
|
||||||
def yield_keys(self, prefix: Optional[str] = None) -> Iterator[str]:
|
def yield_keys(self, prefix: Optional[str] = None) -> Iterator[str]:
|
||||||
"""Get an iterator over keys that match the given prefix.
|
"""Get an iterator over keys that match the given prefix.
|
||||||
@@ -84,3 +97,7 @@ class InMemoryStore(BaseStore[str, Any]):
|
|||||||
for key in self.store.keys():
|
for key in self.store.keys():
|
||||||
if key.startswith(prefix):
|
if key.startswith(prefix):
|
||||||
yield key
|
yield key
|
||||||
|
|
||||||
|
|
||||||
|
InMemoryStore = InMemoryBaseStore[Any]
|
||||||
|
InMemoryByteStore = InMemoryBaseStore[bytes]
|
||||||
|
@@ -1,11 +1,11 @@
|
|||||||
from typing import Any, Iterator, List, Optional, Sequence, Tuple, cast
|
from typing import Any, Iterator, List, Optional, Sequence, Tuple, cast
|
||||||
|
|
||||||
from langchain_core.stores import BaseStore
|
from langchain_core.stores import ByteStore
|
||||||
|
|
||||||
from langchain.utilities.redis import get_client
|
from langchain.utilities.redis import get_client
|
||||||
|
|
||||||
|
|
||||||
class RedisStore(BaseStore[str, bytes]):
|
class RedisStore(ByteStore):
|
||||||
"""BaseStore implementation using Redis as the underlying store.
|
"""BaseStore implementation using Redis as the underlying store.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
from typing import Any, Iterator, List, Optional, Sequence, Tuple, cast
|
from typing import Any, Iterator, List, Optional, Sequence, Tuple, cast
|
||||||
|
|
||||||
from langchain_core._api.deprecation import deprecated
|
from langchain_core._api.deprecation import deprecated
|
||||||
from langchain_core.stores import BaseStore
|
from langchain_core.stores import BaseStore, ByteStore
|
||||||
|
|
||||||
|
|
||||||
class _UpstashRedisStore(BaseStore[str, str]):
|
class _UpstashRedisStore(BaseStore[str, str]):
|
||||||
@@ -130,7 +130,7 @@ class UpstashRedisStore(_UpstashRedisStore):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class UpstashRedisByteStore(BaseStore[str, bytes]):
|
class UpstashRedisByteStore(ByteStore):
|
||||||
"""
|
"""
|
||||||
BaseStore implementation using Upstash Redis
|
BaseStore implementation using Upstash Redis
|
||||||
as the underlying store to store raw bytes.
|
as the underlying store to store raw bytes.
|
||||||
|
@@ -3,10 +3,12 @@ from langchain.storage import __all__
|
|||||||
EXPECTED_ALL = [
|
EXPECTED_ALL = [
|
||||||
"EncoderBackedStore",
|
"EncoderBackedStore",
|
||||||
"InMemoryStore",
|
"InMemoryStore",
|
||||||
|
"InMemoryByteStore",
|
||||||
"LocalFileStore",
|
"LocalFileStore",
|
||||||
"RedisStore",
|
"RedisStore",
|
||||||
"create_lc_store",
|
"create_lc_store",
|
||||||
"create_kv_docstore",
|
"create_kv_docstore",
|
||||||
|
"UpstashRedisByteStore",
|
||||||
"UpstashRedisStore",
|
"UpstashRedisStore",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user