mirror of
https://github.com/hwchase17/langchain.git
synced 2025-04-28 11:55:21 +00:00
docs: Add openGauss vector store documentation (#30742)
Hey LangChain community! 👋 Excited to propose official documentation for our new openGauss integration that brings powerful vector capabilities to the stack! ### What's Inside 📦 1. **Full Integration Guide** Introducing [langchain-opengauss](https://pypi.org/project/langchain-opengauss/) on PyPI - your new toolkit for: 🔍 Native hybrid search (vectors + metadata) 🚀 Production-grade connection pooling 🧩 Automatic schema management 2. **Rigorous Testing Passed** ✅  - 100% non-async test coverage ps: Current implementation resides in my personal repository: https://github.com/mpb159753/langchain-opengauss, How can I transfer process to langchain-ai org?? *Keen to hear your thoughts and make this integration shine!* ✨ --------- Co-authored-by: Eugene Yurtsev <eugene@langchain.dev> Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
parent
913c896598
commit
bb2c2fd885
373
docs/docs/integrations/vectorstores/opengauss.ipynb
Normal file
373
docs/docs/integrations/vectorstores/opengauss.ipynb
Normal file
@ -0,0 +1,373 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "raw",
|
||||||
|
"id": "1957f5cb",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"---\n",
|
||||||
|
"sidebar_label: openGauss\n",
|
||||||
|
"---"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "ef1f0986",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# openGauss VectorStore\n",
|
||||||
|
"\n",
|
||||||
|
"This notebook covers how to get started with the openGauss VectorStore. [openGauss](https://opengauss.org/en/) is a high-performance relational database with native vector storage and retrieval capabilities. This integration enables ACID-compliant vector operations within LangChain applications, combining traditional SQL functionality with modern AI-driven similarity search.\n",
|
||||||
|
" vector store."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "36fdc060",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Setup\n",
|
||||||
|
"\n",
|
||||||
|
"### Launch openGauss Container"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"metadata": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"```bash\n",
|
||||||
|
"docker run --name opengauss \\\n",
|
||||||
|
" -d \\\n",
|
||||||
|
" -e GS_PASSWORD='MyStrongPass@123' \\\n",
|
||||||
|
" -p 8888:5432 \\\n",
|
||||||
|
" opengauss/opengauss-server:latest\n",
|
||||||
|
"```"
|
||||||
|
],
|
||||||
|
"id": "e006fdc593107ef5"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "a51b3f07b83b8a1d",
|
||||||
|
"metadata": {},
|
||||||
|
"source": "### Install langchain-opengauss"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "raw",
|
||||||
|
"id": "ad030f666e228cc8",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"```bash\n",
|
||||||
|
"pip install langchain-opengauss\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "4d14f2f5f8ab0df7",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"**System Requirements**:\n",
|
||||||
|
"- openGauss ≥ 7.0.0\n",
|
||||||
|
"- Python ≥ 3.8\n",
|
||||||
|
"- psycopg2-binary"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "9695dee7",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Credentials\n",
|
||||||
|
"\n",
|
||||||
|
"Using your openGauss Credentials"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "93df377e",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialization\n",
|
||||||
|
"\n",
|
||||||
|
"import EmbeddingTabs from \"@theme/EmbeddingTabs\";\n",
|
||||||
|
"\n",
|
||||||
|
"<EmbeddingTabs/>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "dc37144c-208d-4ab3-9f3a-0407a69fe052",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain_opengauss import OpenGauss, OpenGaussSettings\n",
|
||||||
|
"\n",
|
||||||
|
"# Configure with schema validation\n",
|
||||||
|
"config = OpenGaussSettings(\n",
|
||||||
|
" table_name=\"test_langchain\",\n",
|
||||||
|
" embedding_dimension=384,\n",
|
||||||
|
" index_type=\"HNSW\",\n",
|
||||||
|
" distance_strategy=\"COSINE\",\n",
|
||||||
|
")\n",
|
||||||
|
"vector_store = OpenGauss(embedding=embeddings, config=config)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "ac6071d4",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Manage vector store\n",
|
||||||
|
"\n",
|
||||||
|
"### Add items to vector store\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "17f5efc0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain_core.documents import Document\n",
|
||||||
|
"\n",
|
||||||
|
"document_1 = Document(page_content=\"foo\", metadata={\"source\": \"https://example.com\"})\n",
|
||||||
|
"\n",
|
||||||
|
"document_2 = Document(page_content=\"bar\", metadata={\"source\": \"https://example.com\"})\n",
|
||||||
|
"\n",
|
||||||
|
"document_3 = Document(page_content=\"baz\", metadata={\"source\": \"https://example.com\"})\n",
|
||||||
|
"\n",
|
||||||
|
"documents = [document_1, document_2, document_3]\n",
|
||||||
|
"\n",
|
||||||
|
"vector_store.add_documents(documents=documents, ids=[\"1\", \"2\", \"3\"])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "c738c3e0",
|
||||||
|
"metadata": {},
|
||||||
|
"source": "### Update items in vector store\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "f0aa8b71",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"updated_document = Document(\n",
|
||||||
|
" page_content=\"qux\", metadata={\"source\": \"https://another-example.com\"}\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"# If the id is already exist, will update the document\n",
|
||||||
|
"vector_store.add_documents(document_id=\"1\", document=updated_document)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "dcf1b905",
|
||||||
|
"metadata": {},
|
||||||
|
"source": "### Delete items from vector store\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "ef61e188",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"vector_store.delete(ids=[\"3\"])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "c3620501",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Query vector store\n",
|
||||||
|
"\n",
|
||||||
|
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent.\n",
|
||||||
|
"\n",
|
||||||
|
"### Query directly\n",
|
||||||
|
"\n",
|
||||||
|
"Performing a simple similarity search can be done as follows:\n",
|
||||||
|
"\n",
|
||||||
|
"- TODO: Edit and then run code cell to generate output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "aa0a16fa",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"results = vector_store.similarity_search(\n",
|
||||||
|
" query=\"thud\", k=1, filter={\"source\": \"https://another-example.com\"}\n",
|
||||||
|
")\n",
|
||||||
|
"for doc in results:\n",
|
||||||
|
" print(f\"* {doc.page_content} [{doc.metadata}]\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "3ed9d733",
|
||||||
|
"metadata": {},
|
||||||
|
"source": "If you want to execute a similarity search and receive the corresponding scores you can run:\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "5efd2eaa",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"results = vector_store.similarity_search_with_score(\n",
|
||||||
|
" query=\"thud\", k=1, filter={\"source\": \"https://example.com\"}\n",
|
||||||
|
")\n",
|
||||||
|
"for doc, score in results:\n",
|
||||||
|
" print(f\"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "0c235cdc",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Query by turning into retriever\n",
|
||||||
|
"\n",
|
||||||
|
"You can also transform the vector store into a retriever for easier usage in your chains.\n",
|
||||||
|
"\n",
|
||||||
|
"- TODO: Edit and then run code cell to generate output"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "f3460093",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"retriever = vector_store.as_retriever(search_type=\"mmr\", search_kwargs={\"k\": 1})\n",
|
||||||
|
"retriever.invoke(\"thud\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "901c75dc",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Usage for retrieval-augmented generation\n",
|
||||||
|
"\n",
|
||||||
|
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||||
|
"\n",
|
||||||
|
"- [Tutorials](/docs/tutorials/)\n",
|
||||||
|
"- [How-to: Question and answer with RAG](https://python.langchain.com/docs/how_to/#qa-with-rag)\n",
|
||||||
|
"- [Retrieval conceptual docs](https://python.langchain.com/docs/concepts/retrieval/)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "069f1b5f",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Configuration\n",
|
||||||
|
"\n",
|
||||||
|
"### Connection Settings\n",
|
||||||
|
"| Parameter | Default | Description |\n",
|
||||||
|
"|---------------------|-------------------------|--------------------------------------------------------|\n",
|
||||||
|
"| `host` | localhost | Database server address |\n",
|
||||||
|
"| `port` | 8888 | Database connection port |\n",
|
||||||
|
"| `user` | gaussdb | Database username |\n",
|
||||||
|
"| `password` | - | Complex password string |\n",
|
||||||
|
"| `database` | postgres | Default database name |\n",
|
||||||
|
"| `min_connections` | 1 | Connection pool minimum size |\n",
|
||||||
|
"| `max_connections` | 5 | Connection pool maximum size |\n",
|
||||||
|
"| `table_name` | langchain_docs | Name of the table for storing vector data and metadata |\n",
|
||||||
|
"| `index_type` | IndexType.HNSW |Vector index algorithm type. Options: HNSW or IVFFLAT\\nDefault is HNSW.|\n",
|
||||||
|
"| `vector_type` | VectorType.vector |Type of vector representation to use. Default is Vector.|\n",
|
||||||
|
"| `distance_strategy` | DistanceStrategy.COSINE |Vector similarity metric to use for retrieval. Options: euclidean (L2 distance), cosine (angular distance, ideal for text embeddings), manhattan (L1 distance for sparse data), negative_inner_product (dot product for normalized vectors).\\n Default is cosine.|\n",
|
||||||
|
"|`embedding_dimension`| 1536 |Dimensionality of the vector embeddings.|\n",
|
||||||
|
"\n",
|
||||||
|
"### Supported Combinations\n",
|
||||||
|
"\n",
|
||||||
|
"| Vector Type | Dimensions | Index Types | Supported Distance Strategies |\n",
|
||||||
|
"|-------------|------------|--------------|---------------------------------------|\n",
|
||||||
|
"| vector | ≤2000 | HNSW/IVFFLAT | COSINE/EUCLIDEAN/MANHATTAN/INNER_PROD |\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "6a7b7b7c4f5a03e1",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Performance Optimization\n",
|
||||||
|
"\n",
|
||||||
|
"### Index Tuning Guidelines\n",
|
||||||
|
"**HNSW Parameters**:\n",
|
||||||
|
"- `m`: 16-100 (balance between recall and memory)\n",
|
||||||
|
"- `ef_construction`: 64-1000 (must be > 2*m)\n",
|
||||||
|
"\n",
|
||||||
|
"**IVFFLAT Recommendations**:\n",
|
||||||
|
"```python\n",
|
||||||
|
"import math\n",
|
||||||
|
"\n",
|
||||||
|
"lists = min(\n",
|
||||||
|
" int(math.sqrt(total_rows)) if total_rows > 1e6 else int(total_rows / 1000),\n",
|
||||||
|
" 2000, # openGauss maximum\n",
|
||||||
|
")\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"### Connection Pooling\n",
|
||||||
|
"```python\n",
|
||||||
|
"OpenGaussSettings(min_connections=3, max_connections=20)\n",
|
||||||
|
"```\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "6b581b499ffed641",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Limitations\n",
|
||||||
|
"- `bit` and `sparsevec` vector types currently in development\n",
|
||||||
|
"- Maximum vector dimensions: 2000 for `vector` type"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "8a27244f",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## API reference\n",
|
||||||
|
"\n",
|
||||||
|
"For detailed documentation of all __ModuleName__VectorStore features and configurations head to the API reference: https://python.langchain.com/api_reference/en/latest/vectorstores/opengauss.OpenGuass.html"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.12"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
@ -140,6 +140,17 @@ def get_vectorstore_table():
|
|||||||
"Local/Cloud": "Local",
|
"Local/Cloud": "Local",
|
||||||
"IDs in add Documents": True,
|
"IDs in add Documents": True,
|
||||||
},
|
},
|
||||||
|
"openGauss": {
|
||||||
|
"Delete by ID": True,
|
||||||
|
"Filtering": True,
|
||||||
|
"similarity_search_by_vector": True,
|
||||||
|
"similarity_search_with_score": True,
|
||||||
|
"asearch": False,
|
||||||
|
"Passes Standard Tests": True,
|
||||||
|
"Multi Tenancy": False,
|
||||||
|
"Local/Cloud": "Local",
|
||||||
|
"IDs in add Documents": True,
|
||||||
|
},
|
||||||
"QdrantVectorStore": {
|
"QdrantVectorStore": {
|
||||||
"Delete by ID": True,
|
"Delete by ID": True,
|
||||||
"Filtering": True,
|
"Filtering": True,
|
||||||
|
@ -1130,6 +1130,19 @@ const FEATURE_TABLES = {
|
|||||||
local: true,
|
local: true,
|
||||||
idsInAddDocuments: false,
|
idsInAddDocuments: false,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "openGauss",
|
||||||
|
link: "openGauss",
|
||||||
|
deleteById: true,
|
||||||
|
filtering: true,
|
||||||
|
searchByVector: true,
|
||||||
|
searchWithScore: true,
|
||||||
|
async: false,
|
||||||
|
passesStandardTests: true,
|
||||||
|
multiTenancy: false,
|
||||||
|
local: true,
|
||||||
|
idsInAddDocuments: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "PGVector",
|
name: "PGVector",
|
||||||
link: "pgvector",
|
link: "pgvector",
|
||||||
|
Loading…
Reference in New Issue
Block a user