mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-21 06:14:37 +00:00
community: added FalkorDB vector store support i.e implementation, test, docs an… (#26245)
**Description:** Added support for FalkorDB Vector Store, including its implementation, unit tests, documentation, and an example notebook. The FalkorDB integration allows users to efficiently manage and query embeddings in a vector database, with relevance scoring and maximal marginal relevance search. The following components were implemented: - Core implementation for FalkorDBVector store. - Unit tests ensuring proper functionality and edge case coverage. - Example notebook demonstrating an end-to-end setup, search, and retrieval using FalkorDB. **Twitter handle:** @tariyekorogha --------- Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
12fced13f4
commit
d262d41cc0
437
docs/docs/integrations/vectorstores/falkordbvector.ipynb
Normal file
437
docs/docs/integrations/vectorstores/falkordbvector.ipynb
Normal file
@ -0,0 +1,437 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# FalkorDBVectorStore\n",
|
||||
"<a href=\"https://docs.falkordb.com/\" target=\"_blank\">FalkorDB</a> is an open-source graph database with integrated support for vector similarity search\n",
|
||||
"\n",
|
||||
"it supports:\n",
|
||||
"- approximate nearest neighbor search\n",
|
||||
"- Euclidean similarity & Cosine Similarity\n",
|
||||
"- Hybrid search combining vector and keyword searches\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the FalkorDB vector index (`FalkorDB`)\n",
|
||||
"\n",
|
||||
"See the <a href=\"https://docs.falkordb.com/\" target=\"_blank\">installation instruction</a>\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: falkordb in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (1.0.10)Note: you may need to restart the kernel to use updated packages.\n",
|
||||
"\n",
|
||||
"Requirement already satisfied: redis<6.0.0,>=5.0.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from falkordb) (5.2.0)\n",
|
||||
"Requirement already satisfied: async-timeout>=4.0.3 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from redis<6.0.0,>=5.0.1->falkordb) (4.0.3)\n",
|
||||
"Requirement already satisfied: tiktoken in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (0.8.0)\n",
|
||||
"Requirement already satisfied: regex>=2022.1.18 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from tiktoken) (2024.11.6)\n",
|
||||
"Requirement already satisfied: requests>=2.26.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from tiktoken) (2.32.3)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests>=2.26.0->tiktoken) (3.4.0)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests>=2.26.0->tiktoken) (3.10)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests>=2.26.0->tiktoken) (1.26.20)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests>=2.26.0->tiktoken) (2024.8.30)\n",
|
||||
"Note: you may need to restart the kernel to use updated packages.\n",
|
||||
"Requirement already satisfied: langchain in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (0.3.9)Note: you may need to restart the kernel to use updated packages.\n",
|
||||
"\n",
|
||||
"Requirement already satisfied: langchain_huggingface in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (0.1.2)\n",
|
||||
"Requirement already satisfied: PyYAML>=5.3 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (6.0.2)\n",
|
||||
"Requirement already satisfied: SQLAlchemy<3,>=1.4 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (2.0.36)\n",
|
||||
"Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (3.11.8)\n",
|
||||
"Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (4.0.3)\n",
|
||||
"Requirement already satisfied: langchain-core<0.4.0,>=0.3.21 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (0.3.21)\n",
|
||||
"Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (0.3.2)\n",
|
||||
"Requirement already satisfied: langsmith<0.2.0,>=0.1.17 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (0.1.147)\n",
|
||||
"Requirement already satisfied: numpy<2,>=1.22.4 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (1.26.4)\n",
|
||||
"Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (2.9.2)\n",
|
||||
"Requirement already satisfied: requests<3,>=2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (2.32.3)\n",
|
||||
"Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (9.0.0)\n",
|
||||
"Requirement already satisfied: huggingface-hub>=0.23.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain_huggingface) (0.26.3)\n",
|
||||
"Requirement already satisfied: sentence-transformers>=2.6.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain_huggingface) (3.3.1)\n",
|
||||
"Requirement already satisfied: tokenizers>=0.19.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain_huggingface) (0.20.3)\n",
|
||||
"Requirement already satisfied: transformers>=4.39.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain_huggingface) (4.46.3)\n",
|
||||
"Requirement already satisfied: aiohappyeyeballs>=2.3.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.4.4)\n",
|
||||
"Requirement already satisfied: aiosignal>=1.1.2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n",
|
||||
"Requirement already satisfied: attrs>=17.3.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (24.2.0)\n",
|
||||
"Requirement already satisfied: frozenlist>=1.1.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.5.0)\n",
|
||||
"Requirement already satisfied: multidict<7.0,>=4.5 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.1.0)\n",
|
||||
"Requirement already satisfied: propcache>=0.2.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (0.2.0)\n",
|
||||
"Requirement already satisfied: yarl<2.0,>=1.17.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.18.0)\n",
|
||||
"Requirement already satisfied: filelock in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (3.16.1)\n",
|
||||
"Requirement already satisfied: fsspec>=2023.5.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (2024.10.0)\n",
|
||||
"Requirement already satisfied: packaging>=20.9 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (24.2)\n",
|
||||
"Requirement already satisfied: tqdm>=4.42.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (4.67.1)\n",
|
||||
"Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (4.12.2)\n",
|
||||
"Requirement already satisfied: jsonpatch<2.0,>=1.33 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain-core<0.4.0,>=0.3.21->langchain) (1.33)\n",
|
||||
"Requirement already satisfied: httpx<1,>=0.23.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (0.27.2)\n",
|
||||
"Requirement already satisfied: orjson<4.0.0,>=3.9.14 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (3.10.12)\n",
|
||||
"Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (1.0.0)\n",
|
||||
"Requirement already satisfied: annotated-types>=0.6.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n",
|
||||
"Requirement already satisfied: pydantic-core==2.23.4 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.23.4)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests<3,>=2->langchain) (3.4.0)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests<3,>=2->langchain) (3.10)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests<3,>=2->langchain) (1.26.20)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests<3,>=2->langchain) (2024.8.30)\n",
|
||||
"Requirement already satisfied: torch>=1.11.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (2.5.1)\n",
|
||||
"Requirement already satisfied: scikit-learn in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (1.5.2)\n",
|
||||
"Requirement already satisfied: scipy in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (1.13.1)\n",
|
||||
"Requirement already satisfied: Pillow in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (11.0.0)\n",
|
||||
"Requirement already satisfied: greenlet!=0.4.17 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from SQLAlchemy<3,>=1.4->langchain) (3.1.1)\n",
|
||||
"Requirement already satisfied: regex!=2019.12.17 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from transformers>=4.39.0->langchain_huggingface) (2024.11.6)\n",
|
||||
"Requirement already satisfied: safetensors>=0.4.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from transformers>=4.39.0->langchain_huggingface) (0.4.5)\n",
|
||||
"Requirement already satisfied: anyio in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (4.6.2.post1)\n",
|
||||
"Requirement already satisfied: httpcore==1.* in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (1.0.7)\n",
|
||||
"Requirement already satisfied: sniffio in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (1.3.1)\n",
|
||||
"Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (0.14.0)\n",
|
||||
"Requirement already satisfied: jsonpointer>=1.9 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.21->langchain) (3.0.0)\n",
|
||||
"Requirement already satisfied: networkx in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (3.2.1)\n",
|
||||
"Requirement already satisfied: jinja2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (3.1.4)\n",
|
||||
"Requirement already satisfied: sympy==1.13.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (1.13.1)\n",
|
||||
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from sympy==1.13.1->torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (1.3.0)\n",
|
||||
"Requirement already satisfied: colorama in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from tqdm>=4.42.1->huggingface-hub>=0.23.0->langchain_huggingface) (0.4.6)\n",
|
||||
"Requirement already satisfied: joblib>=1.2.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from scikit-learn->sentence-transformers>=2.6.0->langchain_huggingface) (1.4.2)\n",
|
||||
"Requirement already satisfied: threadpoolctl>=3.1.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from scikit-learn->sentence-transformers>=2.6.0->langchain_huggingface) (3.5.0)\n",
|
||||
"Requirement already satisfied: exceptiongroup>=1.0.2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from anyio->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (1.2.2)\n",
|
||||
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from jinja2->torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (3.0.2)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Pip install necessary package\n",
|
||||
"%pip install --upgrade falkordb\n",
|
||||
"%pip install --upgrade tiktoken\n",
|
||||
"%pip install --upgrade langchain langchain_huggingface"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Credentials\n",
|
||||
"We want to use `HuggingFace` so we have to get the HuggingFace API Key"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if \"HUGGINGFACE_API_KEY\" not in os.environ:\n",
|
||||
" os.environ[\"HUGGINGFACE_API_KEY\"] = getpass.getpass(\"HuggingFace API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing of your model calls you can also set your LangSmith API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialization"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.vectorstores.falkordb_vector import FalkorDBVector\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"from langchain_huggingface import HuggingFaceEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can use FalkorDBVector locally with docker. See <a href=\"https://docs.falkordb.com/\" target=\"_blank\">installation instruction</a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"host = \"localhost\"\n",
|
||||
"port = 6379"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Or you can use FalkorDBVector with <a href=\"https://app.falkordb.cloud\">FalkorDB Cloud</a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# E.g\n",
|
||||
"# host = \"r-6jissuruar.instance-zwb082gpf.hc-v8noonp0c.europe-west1.gcp.f2e0a955bb84.cloud\"\n",
|
||||
"# port = 62471\n",
|
||||
"# username = \"falkordb\" # SET ON FALKORDB CLOUD\n",
|
||||
"# password = \"password\" # SET ON FALKORDB CLOUD"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store = FalkorDBVector(host=host, port=port, embedding=HuggingFaceEmbeddings())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Manage vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Add items to vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['1', '2', '3']"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"document_1 = Document(page_content=\"foo\", metadata={\"source\": \"https://example.com\"})\n",
|
||||
"\n",
|
||||
"document_2 = Document(page_content=\"bar\", metadata={\"source\": \"https://example.com\"})\n",
|
||||
"\n",
|
||||
"document_3 = Document(page_content=\"baz\", metadata={\"source\": \"https://example.com\"})\n",
|
||||
"\n",
|
||||
"documents = [document_1, document_2, document_3]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(documents=documents, ids=[\"1\", \"2\", \"3\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Update items in vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"updated_document = Document(\n",
|
||||
" page_content=\"qux\", metadata={\"source\": \"https://another-example.com\"}\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"vector_store.update_documents(document_id=\"1\", document=updated_document)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete items from vector store"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store.delete(ids=[\"3\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query vector store\n",
|
||||
"\n",
|
||||
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Query directly\n",
|
||||
"\n",
|
||||
"Performing a simple similarity search can be done as follows:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* qux [{'text': 'qux', 'id': '1', 'source': 'https://another-example.com'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" query=\"thud\", k=1, filter={\"source\": \"https://another-example.com\"}\n",
|
||||
")\n",
|
||||
"for doc in results:\n",
|
||||
" print(f\"* {doc.page_content} [{doc.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to execute a similarity search and receive the corresponding scores you can run:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"* [SIM=0.000001] bar [{'text': 'bar', 'id': '2', 'source': 'https://example.com'}]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results = vector_store.similarity_search_with_score(query=\"bar\")\n",
|
||||
"for doc, score in results:\n",
|
||||
" print(f\"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Query by turning into retriever\n",
|
||||
"You can also transform the vector store into a retriever for easier usage in your chains."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(metadata={'text': 'qux', 'id': '1', 'source': 'https://another-example.com'}, page_content='qux')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"retriever = vector_store.as_retriever(search_type=\"mmr\", search_kwargs={\"k\": 1})\n",
|
||||
"retriever.invoke(\"thud\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Usage for retrieval-augmented generation\n",
|
||||
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
|
||||
"- <a href=\"https://python.langchain.com/v0.2/docs/tutorials/#working-with-external-knowledge\" target=\"_blank\">Tutorials: working with external knowledge</a>\n",
|
||||
"- <a href=\"https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag\" target=\"_blank\">How-to: Question and answer with RAG</a>\n",
|
||||
"- <a href=\"Retrieval conceptual docs\" target=\"_blank\">Retrieval conceptual docs</a>\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"For detailed documentation of all `FalkorDBVector` features and configurations head to the API reference: https://python.langchain.com/api_reference/community/vectorstores/langchain_community.vectorstores.falkordb_vector.FalkorDBVector.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
1858
libs/community/langchain_community/vectorstores/falkordb_vector.py
Normal file
1858
libs/community/langchain_community/vectorstores/falkordb_vector.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,671 @@
|
||||
"""
|
||||
Integration tests for FalkorDB vector store functionality.
|
||||
|
||||
These tests validate the end-to-end process of constructing, indexing,
|
||||
and searching vector embeddings in a FalkorDB instance. They include:
|
||||
- Setting up the FalkorDB vector store with a local instance.
|
||||
- Indexing documents with fake embeddings.
|
||||
- Performing vector searches and validating results.
|
||||
|
||||
Note:
|
||||
These tests are conducted using a local FalkorDB instance but can also
|
||||
be run against a Cloud FalkorDB instance. Ensure that appropriate host
|
||||
and port configurations are set up before running the tests.
|
||||
"""
|
||||
|
||||
import os
|
||||
from math import isclose
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.vectorstores.falkordb_vector import (
|
||||
FalkorDBVector,
|
||||
SearchType,
|
||||
process_index_data,
|
||||
)
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
host = os.getenv("FALKORDB_HOST", "localhost")
|
||||
port = int(os.getenv("FALKORDB_PORT", 6379))
|
||||
|
||||
OS_TOKEN_COUNT = 1535
|
||||
|
||||
texts = ["foo", "bar", "baz", "It is the end of the world. Take shelter!"]
|
||||
|
||||
|
||||
def drop_vector_indexes(store: FalkorDBVector) -> None:
|
||||
"""Cleanup all vector indexes"""
|
||||
index_entity_labels: List[Any] = []
|
||||
index_entity_properties: List[Any] = []
|
||||
index_entity_types: List[Any] = []
|
||||
|
||||
# get all indexes
|
||||
result = store._query(
|
||||
"""
|
||||
CALL db.indexes()
|
||||
"""
|
||||
)
|
||||
processed_result: List[Dict[str, Any]] = process_index_data(result)
|
||||
|
||||
# get all vector indexs entity labels, entity properties, entity_types
|
||||
if isinstance(processed_result, list):
|
||||
for index in processed_result:
|
||||
if isinstance(index, dict):
|
||||
if index.get("index_type") == "VECTOR":
|
||||
index_entity_labels.append(index["entity_label"])
|
||||
index_entity_properties.append(index["entity_property"])
|
||||
index_entity_types.append(index["entity_type"])
|
||||
|
||||
# drop vector indexs
|
||||
for entity_label, entity_property, entity_type in zip(
|
||||
index_entity_labels, index_entity_properties, index_entity_types
|
||||
):
|
||||
if entity_type == "NODE":
|
||||
store._database.drop_node_vector_index(
|
||||
label=entity_label,
|
||||
attribute=entity_property,
|
||||
)
|
||||
elif entity_type == "RELATIONSHIP":
|
||||
store._database.drop_edge_vector_index(
|
||||
label=entity_label,
|
||||
attribute=entity_property,
|
||||
)
|
||||
|
||||
|
||||
class FakeEmbeddingsWithOsDimension(FakeEmbeddings):
|
||||
"""Fake embeddings functionality for testing."""
|
||||
|
||||
def embed_documents(self, embedding_texts: List[str]) -> List[List[float]]:
|
||||
"""Return simple embeddings."""
|
||||
return [
|
||||
[float(1.0)] * (OS_TOKEN_COUNT - 1) + [float(i + 1)]
|
||||
for i in range(len(embedding_texts))
|
||||
]
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
"""Return simple embeddings."""
|
||||
return [float(1.0)] * (OS_TOKEN_COUNT - 1) + [float(texts.index(text) + 1)]
|
||||
|
||||
|
||||
def test_falkordbvector() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
docsearch = FalkorDBVector.from_texts(
|
||||
texts=texts,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert type(output) is list
|
||||
assert type(output[0]) is Document
|
||||
assert output[0].page_content == "foo"
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_falkordbvector_embeddings() -> None:
|
||||
"""Test end to end construction with embeddings and search."""
|
||||
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
|
||||
text_embedding_pairs = list(zip(texts, text_embeddings))
|
||||
docsearch = FalkorDBVector.from_embeddings(
|
||||
text_embeddings=text_embedding_pairs,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert type(output) is list
|
||||
assert type(output[0]) is Document
|
||||
assert output[0].page_content == "foo"
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_falkordbvector_catch_wrong_node_label() -> None:
|
||||
"""Test if node label is misspelled, but index name is correct."""
|
||||
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
|
||||
text_embedding_pairs = list(zip(texts, text_embeddings))
|
||||
docsearch = FalkorDBVector.from_embeddings(
|
||||
text_embeddings=text_embedding_pairs,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
try:
|
||||
FalkorDBVector.from_existing_index(
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
node_label="test",
|
||||
)
|
||||
except Exception as e:
|
||||
assert type(e) is ValueError
|
||||
assert str(e) == (
|
||||
"The specified vector index node label "
|
||||
+ "`test` does not exist. Make sure to"
|
||||
+ " check if you spelled the node label correctly"
|
||||
)
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_falkordbvector_with_metadatas() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
metadatas = [{"page": str(i)} for i in range(len(texts))]
|
||||
docsearch = FalkorDBVector.from_texts(
|
||||
texts=texts,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
metadatas=metadatas,
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert type(output) is list
|
||||
assert type(output[0]) is Document
|
||||
assert output[0].metadata.get("page") == "0"
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_falkordbvector_with_metadatas_with_scores() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
metadatas = [{"page": str(i)} for i in range(len(texts))]
|
||||
docsearch = FalkorDBVector.from_texts(
|
||||
texts=texts,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
metadatas=metadatas,
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
output = [
|
||||
(doc, round(score, 1))
|
||||
for doc, score in docsearch.similarity_search_with_score("foo", k=1)
|
||||
]
|
||||
assert output == [
|
||||
(
|
||||
Document(
|
||||
metadata={
|
||||
"text": "foo",
|
||||
"id": "acbd18db4cc2f85cedef654fccc4a4d8",
|
||||
"page": "0",
|
||||
},
|
||||
page_content="foo",
|
||||
),
|
||||
0.0,
|
||||
)
|
||||
]
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_falkordb_relevance_score() -> None:
|
||||
"""Test to make sure the relevance score is scaled to 0-2."""
|
||||
metadatas = [{"page": str(i)} for i in range(len(texts))]
|
||||
docsearch = FalkorDBVector.from_texts(
|
||||
texts=texts,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
metadatas=metadatas,
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
|
||||
expected_output = [
|
||||
(
|
||||
Document(
|
||||
metadata={
|
||||
"text": "foo",
|
||||
"id": "acbd18db4cc2f85cedef654fccc4a4d8",
|
||||
"page": "0",
|
||||
},
|
||||
page_content="foo",
|
||||
),
|
||||
0.0,
|
||||
),
|
||||
(
|
||||
Document(
|
||||
metadata={
|
||||
"text": "bar",
|
||||
"id": "37b51d194a7513e45b56f6524f2d51f2",
|
||||
"page": "1",
|
||||
},
|
||||
page_content="bar",
|
||||
),
|
||||
1.0,
|
||||
),
|
||||
(
|
||||
Document(
|
||||
metadata={
|
||||
"text": "baz",
|
||||
"id": "73feffa4b7f6bb68e44cf984c85f6e88",
|
||||
"page": "2",
|
||||
},
|
||||
page_content="baz",
|
||||
),
|
||||
2.0,
|
||||
),
|
||||
]
|
||||
|
||||
# Check if the length of the outputs matches
|
||||
assert len(output) == len(expected_output)
|
||||
|
||||
# Check if each document and its relevance score is close to the expected value
|
||||
for (doc, score), (expected_doc, expected_score) in zip(output, expected_output):
|
||||
assert doc.page_content == expected_doc.page_content
|
||||
assert doc.metadata == expected_doc.metadata
|
||||
assert isclose(score, expected_score, rel_tol=1e-5)
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_falkordbvector_retriever_search_threshold() -> None:
|
||||
"""Test using retriever for searching with threshold."""
|
||||
metadatas = [{"page": str(i)} for i in range(len(texts))]
|
||||
docsearch = FalkorDBVector.from_texts(
|
||||
texts=texts,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
metadatas=metadatas,
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
retriever = docsearch.as_retriever(
|
||||
search_type="similarity_score_threshold",
|
||||
search_kwargs={"k": 1, "score_threshold": 0.9999},
|
||||
)
|
||||
output = retriever.invoke("foo")
|
||||
assert output == [
|
||||
Document(
|
||||
metadata={
|
||||
"text": "foo",
|
||||
"id": "acbd18db4cc2f85cedef654fccc4a4d8",
|
||||
"page": "0",
|
||||
},
|
||||
page_content="foo",
|
||||
)
|
||||
]
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_custom_return_falkordbvector() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
docsearch = FalkorDBVector.from_texts(
|
||||
texts=["test"],
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
retrieval_query="RETURN 'foo' AS text, score, {test: 'test'} AS metadata",
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"test": "test"})]
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_falkordb_hybrid() -> None:
|
||||
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
|
||||
text_embedding_pairs = list(zip(texts, text_embeddings))
|
||||
docsearch = FalkorDBVector.from_embeddings(
|
||||
text_embeddings=text_embedding_pairs,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
search_type=SearchType.HYBRID,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [
|
||||
Document(
|
||||
metadata={"text": "foo", "id": "acbd18db4cc2f85cedef654fccc4a4d8"},
|
||||
page_content="foo",
|
||||
)
|
||||
]
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_falkordb_hybrid_deduplicate() -> None:
|
||||
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
|
||||
text_embedding_pairs = list(zip(texts, text_embeddings))
|
||||
docsearch = FalkorDBVector.from_embeddings(
|
||||
text_embeddings=text_embedding_pairs,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
search_type=SearchType.HYBRID,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=3)
|
||||
assert output == [
|
||||
Document(
|
||||
metadata={"text": "baz", "id": "73feffa4b7f6bb68e44cf984c85f6e88"},
|
||||
page_content="baz",
|
||||
),
|
||||
Document(
|
||||
metadata={"text": "foo", "id": "acbd18db4cc2f85cedef654fccc4a4d8"},
|
||||
page_content="foo",
|
||||
),
|
||||
Document(
|
||||
metadata={"text": "bar", "id": "37b51d194a7513e45b56f6524f2d51f2"},
|
||||
page_content="bar",
|
||||
),
|
||||
]
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_falkordb_hybrid_retrieval_query() -> None:
|
||||
"""Test custom retrieval_query with hybrid search."""
|
||||
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
|
||||
text_embedding_pairs = list(zip(texts, text_embeddings))
|
||||
docsearch = FalkorDBVector.from_embeddings(
|
||||
text_embeddings=text_embedding_pairs,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
search_type=SearchType.HYBRID,
|
||||
retrieval_query="RETURN 'moo' AS text, score, {test: 'test'} AS metadata",
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="moo", metadata={"test": "test"})]
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_falkordbvector_missing_keyword() -> None:
|
||||
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
|
||||
text_embedding_pairs = list(zip(texts, text_embeddings))
|
||||
node_label = "vector"
|
||||
docsearch = FalkorDBVector.from_embeddings(
|
||||
text_embeddings=text_embedding_pairs,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
try:
|
||||
FalkorDBVector.from_existing_index(
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
node_label=node_label,
|
||||
search_type=SearchType.HYBRID,
|
||||
)
|
||||
except Exception as e:
|
||||
assert str(e) == (
|
||||
"The specified vector index node label "
|
||||
+ f"`{node_label}` does not exist. Make sure"
|
||||
+ " to check if you spelled the node label correctly"
|
||||
)
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_falkordb_hybrid_from_existing() -> None:
|
||||
"""Test hybrid search with missing keyword_index_search."""
|
||||
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
|
||||
text_embedding_pairs = list(zip(texts, text_embeddings))
|
||||
docsearch = FalkorDBVector.from_embeddings(
|
||||
text_embeddings=text_embedding_pairs,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
search_type=SearchType.HYBRID,
|
||||
)
|
||||
existing = FalkorDBVector.from_existing_index(
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
node_label="Chunk", # default node label
|
||||
search_type=SearchType.HYBRID,
|
||||
)
|
||||
output = existing.similarity_search("foo", k=1)
|
||||
assert output == [
|
||||
Document(
|
||||
metadata={"text": "foo", "id": "acbd18db4cc2f85cedef654fccc4a4d8"},
|
||||
page_content="foo",
|
||||
)
|
||||
]
|
||||
|
||||
drop_vector_indexes(existing)
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_falkordbvector_from_existing_graph() -> None:
|
||||
"""Test from_existing_graph with a single property"""
|
||||
graph = FalkorDBVector.from_texts(
|
||||
texts=["test"],
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
node_label="Foo",
|
||||
embedding_node_property="vector",
|
||||
text_node_property="info",
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
graph._query("MATCH (n) DELETE n")
|
||||
graph._query("CREATE (:Test {name:'Foo'}), (:Test {name:'Bar'})")
|
||||
assert graph.database_name, "Database name cannot be empty or None"
|
||||
existing = FalkorDBVector.from_existing_graph(
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
database=graph.database_name,
|
||||
host=host,
|
||||
port=port,
|
||||
node_label="Test",
|
||||
text_node_properties=["name"],
|
||||
embedding_node_property="embedding",
|
||||
)
|
||||
|
||||
output = existing.similarity_search("foo", k=2)
|
||||
|
||||
assert [output[0]] == [Document(page_content="\nname: Foo")]
|
||||
|
||||
drop_vector_indexes(existing)
|
||||
|
||||
|
||||
def test_falkordb_from_existing_graph_mulitiple_properties() -> None:
|
||||
"""Test from_existing_graph with two properties."""
|
||||
graph = FalkorDBVector.from_texts(
|
||||
texts=["test"],
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
node_label="Foo",
|
||||
embedding_node_property="vector",
|
||||
text_node_property="info",
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
graph._query("MATCH (n) DELETE n")
|
||||
graph._query("CREATE (:Test {name:'Foo', name2: 'Fooz'}), (:Test {name:'Bar'})")
|
||||
assert graph.database_name, "Database name cannot be empty or None"
|
||||
existing = FalkorDBVector.from_existing_graph(
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
database=graph.database_name,
|
||||
host=host,
|
||||
port=port,
|
||||
node_label="Test",
|
||||
text_node_properties=["name", "name2"],
|
||||
embedding_node_property="embedding",
|
||||
)
|
||||
|
||||
output = existing.similarity_search("foo", k=2)
|
||||
assert [output[0]] == [Document(page_content="\nname: Foo\nname2: Fooz")]
|
||||
|
||||
drop_vector_indexes(existing)
|
||||
drop_vector_indexes(graph)
|
||||
|
||||
|
||||
def test_falkordbvector_special_character() -> None:
|
||||
"""Test removing lucene."""
|
||||
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
|
||||
text_embedding_pairs = list(zip(texts, text_embeddings))
|
||||
docsearch = FalkorDBVector.from_embeddings(
|
||||
text_embeddings=text_embedding_pairs,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
search_type=SearchType.HYBRID,
|
||||
)
|
||||
output = docsearch.similarity_search(
|
||||
"It is the end of the world. Take shelter!", k=1
|
||||
)
|
||||
|
||||
assert output == [
|
||||
Document(
|
||||
metadata={
|
||||
"text": "It is the end of the world. Take shelter!",
|
||||
"id": "84768c9c477cbe05fbafbe7247990051",
|
||||
},
|
||||
page_content="It is the end of the world. Take shelter!",
|
||||
)
|
||||
]
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_falkordb_from_existing_graph_mulitiple_properties_hybrid() -> None:
|
||||
"""Test from_existing_graph with a two property."""
|
||||
graph = FalkorDBVector.from_texts(
|
||||
texts=["test"],
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
host=host,
|
||||
port=port,
|
||||
node_label="Foo",
|
||||
embedding_node_property="vector",
|
||||
text_node_property="info",
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
|
||||
graph._query("MATCH (n) DELETE n")
|
||||
graph._query("CREATE (:Test {name:'Foo', name2: 'Fooz'}), (:Test {name:'Bar'})")
|
||||
assert graph.database_name, "Database name cannot be empty or None"
|
||||
existing = FalkorDBVector.from_existing_graph(
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
database=graph.database_name,
|
||||
host=host,
|
||||
port=port,
|
||||
node_label="Test",
|
||||
text_node_properties=["name", "name2"],
|
||||
embedding_node_property="embedding",
|
||||
search_type=SearchType.HYBRID,
|
||||
)
|
||||
|
||||
output = existing.similarity_search("foo", k=2)
|
||||
|
||||
assert [output[0]] == [Document(page_content="\nname: Foo\nname2: Fooz")]
|
||||
|
||||
drop_vector_indexes(existing)
|
||||
|
||||
|
||||
def test_index_fetching() -> None:
|
||||
"""testing correct index creation and fetching"""
|
||||
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
|
||||
text_embedding_pairs = list(zip(texts, text_embeddings))
|
||||
embeddings = FakeEmbeddingsWithOsDimension()
|
||||
|
||||
def create_store(node_label: str, text_properties: List[str]) -> FalkorDBVector:
|
||||
return FalkorDBVector.from_embeddings(
|
||||
text_embeddings=text_embedding_pairs,
|
||||
embedding=FakeEmbeddingsWithOsDimension(),
|
||||
node_label=node_label,
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
|
||||
def fetch_store(node_label: str) -> FalkorDBVector:
|
||||
store = FalkorDBVector.from_existing_index(
|
||||
embedding=embeddings,
|
||||
host=host,
|
||||
port=port,
|
||||
node_label=node_label,
|
||||
)
|
||||
return store
|
||||
|
||||
index_0_str = "label0"
|
||||
create_store(index_0_str, ["text"])
|
||||
|
||||
# create index 1
|
||||
index_1_str = "label1"
|
||||
create_store("label1", ["text"])
|
||||
|
||||
index_1_store = fetch_store(index_1_str)
|
||||
assert index_1_store.node_label == index_1_str
|
||||
|
||||
index_0_store = fetch_store(index_0_str)
|
||||
assert index_0_store.node_label == index_0_str
|
||||
|
||||
drop_vector_indexes(index_1_store)
|
||||
drop_vector_indexes(index_0_store)
|
||||
|
||||
|
||||
def test_retrieval_params() -> None:
|
||||
"""Test if we use parameters in retrieval query"""
|
||||
docsearch = FalkorDBVector.from_texts(
|
||||
texts=texts,
|
||||
embedding=FakeEmbeddings(),
|
||||
pre_delete_collection=True,
|
||||
retrieval_query="""
|
||||
RETURN $test as text, score, {test: $test1} AS metadata
|
||||
""",
|
||||
)
|
||||
output = docsearch.similarity_search(
|
||||
"Foo", k=2, params={"test": "test", "test1": "test1"}
|
||||
)
|
||||
assert output == [
|
||||
Document(page_content="test", metadata={"test": "test1"}),
|
||||
Document(page_content="test", metadata={"test": "test1"}),
|
||||
]
|
||||
drop_vector_indexes(docsearch)
|
||||
|
||||
|
||||
def test_falkordb_relationship_index() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
embeddings = FakeEmbeddingsWithOsDimension()
|
||||
docsearch = FalkorDBVector.from_texts(
|
||||
texts=texts,
|
||||
embedding=embeddings,
|
||||
host=host,
|
||||
port=port,
|
||||
pre_delete_collection=True,
|
||||
)
|
||||
# Ingest data
|
||||
docsearch._query(
|
||||
(
|
||||
"MERGE (p1:Person)"
|
||||
"MERGE (p2:Person)"
|
||||
"MERGE (p3:Person)"
|
||||
"MERGE (p4:Person)"
|
||||
"MERGE (p1)-[:REL {text: 'foo', embedding: vecf32($e1)}]->(p2)"
|
||||
"MERGE (p3)-[:REL {text: 'far', embedding: vecf32($e2)}]->(p4)"
|
||||
),
|
||||
params={
|
||||
"e1": embeddings.embed_query("foo"),
|
||||
"e2": embeddings.embed_query("bar"),
|
||||
},
|
||||
)
|
||||
# Create relationship index
|
||||
docsearch.create_new_index_on_relationship(
|
||||
relation_type="REL",
|
||||
embedding_node_property="embedding",
|
||||
embedding_dimension=OS_TOKEN_COUNT,
|
||||
)
|
||||
relationship_index = FalkorDBVector.from_existing_relationship_index(
|
||||
embeddings, relation_type="REL"
|
||||
)
|
||||
output = relationship_index.similarity_search("foo", k=1)
|
||||
assert output == [Document(metadata={"text": "foo"}, page_content="foo")]
|
||||
|
||||
drop_vector_indexes(docsearch)
|
||||
drop_vector_indexes(relationship_index)
|
@ -0,0 +1,24 @@
|
||||
"""Test utils function in falkordb_vector.py"""
|
||||
|
||||
from langchain_community.vectorstores.falkordb_vector import (
|
||||
dict_to_yaml_str,
|
||||
)
|
||||
|
||||
|
||||
def test_converting_to_yaml() -> None:
|
||||
example_dict = {
|
||||
"name": "John Doe",
|
||||
"age": 30,
|
||||
"skills": ["Python", "Data Analysis", "Machine Learning"],
|
||||
"location": {"city": "Ljubljana", "country": "Slovenia"},
|
||||
}
|
||||
|
||||
yaml_str = dict_to_yaml_str(example_dict)
|
||||
|
||||
expected_output = (
|
||||
"name: John Doe\nage: 30\nskills:\n- Python\n- "
|
||||
"Data Analysis\n- Machine Learning\nlocation:\n city: Ljubljana\n"
|
||||
" country: Slovenia\n"
|
||||
)
|
||||
|
||||
assert yaml_str == expected_output
|
Loading…
Reference in New Issue
Block a user