mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-26 08:33:49 +00:00
doc: Add doc for CassandraByteStore (#22126)
Preview: https://langchain-git-fork-cbornet-doc-cassandrabytestore-langchain.vercel.app/v0.2/docs/integrations/stores/cassandra/
This commit is contained in:
parent
2edb512282
commit
c838de5027
228
docs/docs/integrations/stores/cassandra.ipynb
Normal file
228
docs/docs/integrations/stores/cassandra.ipynb
Normal file
@ -0,0 +1,228 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "raw",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"---\n",
|
||||||
|
"sidebar_label: Cassandra\n",
|
||||||
|
"---"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Cassandra\n",
|
||||||
|
"\n",
|
||||||
|
"[Cassandra](https://cassandra.apache.org/) is a NoSQL, row-oriented, highly scalable and highly available database.\n",
|
||||||
|
"\n",
|
||||||
|
"`CassandraByteStore` needs the `cassio` package to be installed:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "plaintext"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%pip install --upgrade --quiet cassio"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The Store takes the following parameters:\n",
|
||||||
|
"\n",
|
||||||
|
"* table: The table where to store the data.\n",
|
||||||
|
"* session: (Optional) The cassandra driver session. If not provided, the cassio resolved session will be used.\n",
|
||||||
|
"* keyspace: (Optional) The keyspace of the table. If not provided, the cassio resolved keyspace will be used.\n",
|
||||||
|
"* setup_mode: (Optional) The mode used to create the Cassandra table (SYNC, ASYNC or OFF). Defaults to SYNC."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## CassandraByteStore\n",
|
||||||
|
"\n",
|
||||||
|
"The `CassandraByteStore` is an implementation of `ByteStore` that stores the data in your Cassandra instance.\n",
|
||||||
|
"The store keys must be strings and will be mapped to the `row_id` column of the Cassandra table.\n",
|
||||||
|
"The store `bytes` values are mapped to the `body_blob` column of the Cassandra table."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain_community.storage import CassandraByteStore"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"### Init from a cassandra driver Session\n",
|
||||||
|
"\n",
|
||||||
|
"You need to create a `cassandra.cluster.Session` object, as described in the [Cassandra driver documentation](https://docs.datastax.com/en/developer/python-driver/latest/api/cassandra/cluster/#module-cassandra.cluster). The details vary (e.g. with network settings and authentication), but this might be something like:"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from cassandra.cluster import Cluster\n",
|
||||||
|
"\n",
|
||||||
|
"cluster = Cluster()\n",
|
||||||
|
"session = cluster.connect()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"You need to provide the name of an existing keyspace of the Cassandra instance:"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"CASSANDRA_KEYSPACE = input(\"CASSANDRA_KEYSPACE = \")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"Creating the store:"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[b'v1', b'v2']\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"store = CassandraByteStore(\n",
|
||||||
|
" table=\"my_store\",\n",
|
||||||
|
" session=session,\n",
|
||||||
|
" keyspace=CASSANDRA_KEYSPACE,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n",
|
||||||
|
"print(store.mget([\"k1\", \"k2\"]))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"### Init from cassio\n",
|
||||||
|
"\n",
|
||||||
|
"It's also possible to use cassio to configure the session and keyspace."
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import cassio\n",
|
||||||
|
"\n",
|
||||||
|
"cassio.init(contact_points=\"127.0.0.1\", keyspace=CASSANDRA_KEYSPACE)\n",
|
||||||
|
"\n",
|
||||||
|
"store = CassandraByteStore(\n",
|
||||||
|
" table=\"my_store\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n",
|
||||||
|
"print(store.mget([\"k1\", \"k2\"]))"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"### Usage with CacheBackedEmbeddings\n",
|
||||||
|
"\n",
|
||||||
|
"You may use the `CassandraByteStore` in conjunction with a [`CacheBackedEmbeddings`](/docs/how_to/caching_embeddings) to cache the result of embeddings computations.\n"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.embeddings import CacheBackedEmbeddings\n",
|
||||||
|
"from langchain_openai import OpenAIEmbeddings\n",
|
||||||
|
"\n",
|
||||||
|
"cassio.init(contact_points=\"127.0.0.1\", keyspace=CASSANDRA_KEYSPACE)\n",
|
||||||
|
"\n",
|
||||||
|
"store = CassandraByteStore(\n",
|
||||||
|
" table=\"my_store\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"embeddings = CacheBackedEmbeddings.from_bytes_store(\n",
|
||||||
|
" underlying_embeddings=OpenAIEmbeddings(), document_embedding_cache=store\n",
|
||||||
|
")"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": ".venv",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
@ -22,6 +22,9 @@ if TYPE_CHECKING:
|
|||||||
AstraDBByteStore,
|
AstraDBByteStore,
|
||||||
AstraDBStore,
|
AstraDBStore,
|
||||||
)
|
)
|
||||||
|
from langchain_community.storage.cassandra import (
|
||||||
|
CassandraByteStore,
|
||||||
|
)
|
||||||
from langchain_community.storage.mongodb import (
|
from langchain_community.storage.mongodb import (
|
||||||
MongoDBStore,
|
MongoDBStore,
|
||||||
)
|
)
|
||||||
@ -36,6 +39,7 @@ if TYPE_CHECKING:
|
|||||||
__all__ = [
|
__all__ = [
|
||||||
"AstraDBByteStore",
|
"AstraDBByteStore",
|
||||||
"AstraDBStore",
|
"AstraDBStore",
|
||||||
|
"CassandraByteStore",
|
||||||
"MongoDBStore",
|
"MongoDBStore",
|
||||||
"RedisStore",
|
"RedisStore",
|
||||||
"UpstashRedisByteStore",
|
"UpstashRedisByteStore",
|
||||||
@ -45,6 +49,7 @@ __all__ = [
|
|||||||
_module_lookup = {
|
_module_lookup = {
|
||||||
"AstraDBByteStore": "langchain_community.storage.astradb",
|
"AstraDBByteStore": "langchain_community.storage.astradb",
|
||||||
"AstraDBStore": "langchain_community.storage.astradb",
|
"AstraDBStore": "langchain_community.storage.astradb",
|
||||||
|
"CassandraByteStore": "langchain_community.storage.cassandra",
|
||||||
"MongoDBStore": "langchain_community.storage.mongodb",
|
"MongoDBStore": "langchain_community.storage.mongodb",
|
||||||
"RedisStore": "langchain_community.storage.redis",
|
"RedisStore": "langchain_community.storage.redis",
|
||||||
"UpstashRedisByteStore": "langchain_community.storage.upstash_redis",
|
"UpstashRedisByteStore": "langchain_community.storage.upstash_redis",
|
||||||
|
@ -3,6 +3,7 @@ from langchain_community.storage import __all__, _module_lookup
|
|||||||
EXPECTED_ALL = [
|
EXPECTED_ALL = [
|
||||||
"AstraDBStore",
|
"AstraDBStore",
|
||||||
"AstraDBByteStore",
|
"AstraDBByteStore",
|
||||||
|
"CassandraByteStore",
|
||||||
"MongoDBStore",
|
"MongoDBStore",
|
||||||
"RedisStore",
|
"RedisStore",
|
||||||
"UpstashRedisByteStore",
|
"UpstashRedisByteStore",
|
||||||
|
Loading…
Reference in New Issue
Block a user