community: added FalkorDB vector store support i.e implementation, test, docs an… (#26245)

**Description:** Added support for FalkorDB Vector Store, including its
implementation, unit tests, documentation, and an example notebook. The
FalkorDB integration allows users to efficiently manage and query
embeddings in a vector database, with relevance scoring and maximal
marginal relevance search. The following components were implemented:

- Core implementation for FalkorDBVector store.
- Unit tests ensuring proper functionality and edge case coverage.
- Example notebook demonstrating an end-to-end setup, search, and
retrieval using FalkorDB.

**Twitter handle:** @tariyekorogha

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Tari Yekorogha 2024-12-16 20:37:55 +01:00 committed by GitHub
parent 12fced13f4
commit d262d41cc0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 2990 additions and 0 deletions

View File

@ -0,0 +1,437 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# FalkorDBVectorStore\n",
"<a href=\"https://docs.falkordb.com/\" target=\"_blank\">FalkorDB</a> is an open-source graph database with integrated support for vector similarity search\n",
"\n",
"it supports:\n",
"- approximate nearest neighbor search\n",
"- Euclidean similarity & Cosine Similarity\n",
"- Hybrid search combining vector and keyword searches\n",
"\n",
"This notebook shows how to use the FalkorDB vector index (`FalkorDB`)\n",
"\n",
"See the <a href=\"https://docs.falkordb.com/\" target=\"_blank\">installation instruction</a>\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: falkordb in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (1.0.10)Note: you may need to restart the kernel to use updated packages.\n",
"\n",
"Requirement already satisfied: redis<6.0.0,>=5.0.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from falkordb) (5.2.0)\n",
"Requirement already satisfied: async-timeout>=4.0.3 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from redis<6.0.0,>=5.0.1->falkordb) (4.0.3)\n",
"Requirement already satisfied: tiktoken in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (0.8.0)\n",
"Requirement already satisfied: regex>=2022.1.18 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from tiktoken) (2024.11.6)\n",
"Requirement already satisfied: requests>=2.26.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from tiktoken) (2.32.3)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests>=2.26.0->tiktoken) (3.4.0)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests>=2.26.0->tiktoken) (3.10)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests>=2.26.0->tiktoken) (1.26.20)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests>=2.26.0->tiktoken) (2024.8.30)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: langchain in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (0.3.9)Note: you may need to restart the kernel to use updated packages.\n",
"\n",
"Requirement already satisfied: langchain_huggingface in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (0.1.2)\n",
"Requirement already satisfied: PyYAML>=5.3 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (6.0.2)\n",
"Requirement already satisfied: SQLAlchemy<3,>=1.4 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (2.0.36)\n",
"Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (3.11.8)\n",
"Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (4.0.3)\n",
"Requirement already satisfied: langchain-core<0.4.0,>=0.3.21 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (0.3.21)\n",
"Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (0.3.2)\n",
"Requirement already satisfied: langsmith<0.2.0,>=0.1.17 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (0.1.147)\n",
"Requirement already satisfied: numpy<2,>=1.22.4 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (1.26.4)\n",
"Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (2.9.2)\n",
"Requirement already satisfied: requests<3,>=2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (2.32.3)\n",
"Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain) (9.0.0)\n",
"Requirement already satisfied: huggingface-hub>=0.23.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain_huggingface) (0.26.3)\n",
"Requirement already satisfied: sentence-transformers>=2.6.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain_huggingface) (3.3.1)\n",
"Requirement already satisfied: tokenizers>=0.19.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain_huggingface) (0.20.3)\n",
"Requirement already satisfied: transformers>=4.39.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain_huggingface) (4.46.3)\n",
"Requirement already satisfied: aiohappyeyeballs>=2.3.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.4.4)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n",
"Requirement already satisfied: attrs>=17.3.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (24.2.0)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.5.0)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.1.0)\n",
"Requirement already satisfied: propcache>=0.2.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (0.2.0)\n",
"Requirement already satisfied: yarl<2.0,>=1.17.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.18.0)\n",
"Requirement already satisfied: filelock in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (3.16.1)\n",
"Requirement already satisfied: fsspec>=2023.5.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (2024.10.0)\n",
"Requirement already satisfied: packaging>=20.9 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (24.2)\n",
"Requirement already satisfied: tqdm>=4.42.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (4.67.1)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (4.12.2)\n",
"Requirement already satisfied: jsonpatch<2.0,>=1.33 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langchain-core<0.4.0,>=0.3.21->langchain) (1.33)\n",
"Requirement already satisfied: httpx<1,>=0.23.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (0.27.2)\n",
"Requirement already satisfied: orjson<4.0.0,>=3.9.14 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (3.10.12)\n",
"Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (1.0.0)\n",
"Requirement already satisfied: annotated-types>=0.6.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.23.4 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.23.4)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests<3,>=2->langchain) (3.4.0)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests<3,>=2->langchain) (3.10)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests<3,>=2->langchain) (1.26.20)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from requests<3,>=2->langchain) (2024.8.30)\n",
"Requirement already satisfied: torch>=1.11.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (2.5.1)\n",
"Requirement already satisfied: scikit-learn in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (1.5.2)\n",
"Requirement already satisfied: scipy in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (1.13.1)\n",
"Requirement already satisfied: Pillow in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (11.0.0)\n",
"Requirement already satisfied: greenlet!=0.4.17 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from SQLAlchemy<3,>=1.4->langchain) (3.1.1)\n",
"Requirement already satisfied: regex!=2019.12.17 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from transformers>=4.39.0->langchain_huggingface) (2024.11.6)\n",
"Requirement already satisfied: safetensors>=0.4.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from transformers>=4.39.0->langchain_huggingface) (0.4.5)\n",
"Requirement already satisfied: anyio in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (4.6.2.post1)\n",
"Requirement already satisfied: httpcore==1.* in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (1.0.7)\n",
"Requirement already satisfied: sniffio in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (1.3.1)\n",
"Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (0.14.0)\n",
"Requirement already satisfied: jsonpointer>=1.9 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.21->langchain) (3.0.0)\n",
"Requirement already satisfied: networkx in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (3.2.1)\n",
"Requirement already satisfied: jinja2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (3.1.4)\n",
"Requirement already satisfied: sympy==1.13.1 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (1.13.1)\n",
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from sympy==1.13.1->torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (1.3.0)\n",
"Requirement already satisfied: colorama in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from tqdm>=4.42.1->huggingface-hub>=0.23.0->langchain_huggingface) (0.4.6)\n",
"Requirement already satisfied: joblib>=1.2.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from scikit-learn->sentence-transformers>=2.6.0->langchain_huggingface) (1.4.2)\n",
"Requirement already satisfied: threadpoolctl>=3.1.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from scikit-learn->sentence-transformers>=2.6.0->langchain_huggingface) (3.5.0)\n",
"Requirement already satisfied: exceptiongroup>=1.0.2 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from anyio->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.17->langchain) (1.2.2)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\dell\\desktop\\langchain\\.venv\\lib\\site-packages (from jinja2->torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (3.0.2)\n"
]
}
],
"source": [
"# Pip install necessary package\n",
"%pip install --upgrade falkordb\n",
"%pip install --upgrade tiktoken\n",
"%pip install --upgrade langchain langchain_huggingface"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Credentials\n",
"We want to use `HuggingFace` so we have to get the HuggingFace API Key"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import getpass\n",
"import os\n",
"\n",
"if \"HUGGINGFACE_API_KEY\" not in os.environ:\n",
" os.environ[\"HUGGINGFACE_API_KEY\"] = getpass.getpass(\"HuggingFace API Key:\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If you want to get automated tracing of your model calls you can also set your LangSmith API key by uncommenting below:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialization"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from langchain_community.vectorstores.falkordb_vector import FalkorDBVector\n",
"from langchain_core.documents import Document\n",
"from langchain_huggingface import HuggingFaceEmbeddings"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can use FalkorDBVector locally with docker. See <a href=\"https://docs.falkordb.com/\" target=\"_blank\">installation instruction</a>"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"host = \"localhost\"\n",
"port = 6379"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Or you can use FalkorDBVector with <a href=\"https://app.falkordb.cloud\">FalkorDB Cloud</a>"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# E.g\n",
"# host = \"r-6jissuruar.instance-zwb082gpf.hc-v8noonp0c.europe-west1.gcp.f2e0a955bb84.cloud\"\n",
"# port = 62471\n",
"# username = \"falkordb\" # SET ON FALKORDB CLOUD\n",
"# password = \"password\" # SET ON FALKORDB CLOUD"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"vector_store = FalkorDBVector(host=host, port=port, embedding=HuggingFaceEmbeddings())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Manage vector store"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Add items to vector store"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['1', '2', '3']"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from langchain_core.documents import Document\n",
"\n",
"document_1 = Document(page_content=\"foo\", metadata={\"source\": \"https://example.com\"})\n",
"\n",
"document_2 = Document(page_content=\"bar\", metadata={\"source\": \"https://example.com\"})\n",
"\n",
"document_3 = Document(page_content=\"baz\", metadata={\"source\": \"https://example.com\"})\n",
"\n",
"documents = [document_1, document_2, document_3]\n",
"\n",
"vector_store.add_documents(documents=documents, ids=[\"1\", \"2\", \"3\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Update items in vector store"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"updated_document = Document(\n",
" page_content=\"qux\", metadata={\"source\": \"https://another-example.com\"}\n",
")\n",
"\n",
"vector_store.update_documents(document_id=\"1\", document=updated_document)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Delete items from vector store"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"vector_store.delete(ids=[\"3\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Query vector store\n",
"\n",
"Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Query directly\n",
"\n",
"Performing a simple similarity search can be done as follows:"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"* qux [{'text': 'qux', 'id': '1', 'source': 'https://another-example.com'}]\n"
]
}
],
"source": [
"results = vector_store.similarity_search(\n",
" query=\"thud\", k=1, filter={\"source\": \"https://another-example.com\"}\n",
")\n",
"for doc in results:\n",
" print(f\"* {doc.page_content} [{doc.metadata}]\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If you want to execute a similarity search and receive the corresponding scores you can run:"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"* [SIM=0.000001] bar [{'text': 'bar', 'id': '2', 'source': 'https://example.com'}]\n"
]
}
],
"source": [
"results = vector_store.similarity_search_with_score(query=\"bar\")\n",
"for doc, score in results:\n",
" print(f\"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Query by turning into retriever\n",
"You can also transform the vector store into a retriever for easier usage in your chains."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(metadata={'text': 'qux', 'id': '1', 'source': 'https://another-example.com'}, page_content='qux')]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"retriever = vector_store.as_retriever(search_type=\"mmr\", search_kwargs={\"k\": 1})\n",
"retriever.invoke(\"thud\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Usage for retrieval-augmented generation\n",
"For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n",
"- <a href=\"https://python.langchain.com/v0.2/docs/tutorials/#working-with-external-knowledge\" target=\"_blank\">Tutorials: working with external knowledge</a>\n",
"- <a href=\"https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag\" target=\"_blank\">How-to: Question and answer with RAG</a>\n",
"- <a href=\"Retrieval conceptual docs\" target=\"_blank\">Retrieval conceptual docs</a>\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## API reference\n",
"For detailed documentation of all `FalkorDBVector` features and configurations head to the API reference: https://python.langchain.com/api_reference/community/vectorstores/langchain_community.vectorstores.falkordb_vector.FalkorDBVector.html"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,671 @@
"""
Integration tests for FalkorDB vector store functionality.
These tests validate the end-to-end process of constructing, indexing,
and searching vector embeddings in a FalkorDB instance. They include:
- Setting up the FalkorDB vector store with a local instance.
- Indexing documents with fake embeddings.
- Performing vector searches and validating results.
Note:
These tests are conducted using a local FalkorDB instance but can also
be run against a Cloud FalkorDB instance. Ensure that appropriate host
and port configurations are set up before running the tests.
"""
import os
from math import isclose
from typing import Any, Dict, List
from dotenv import load_dotenv
from langchain_core.documents import Document
from langchain_community.vectorstores.falkordb_vector import (
FalkorDBVector,
SearchType,
process_index_data,
)
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
# Load environment variables from .env file
load_dotenv()
host = os.getenv("FALKORDB_HOST", "localhost")
port = int(os.getenv("FALKORDB_PORT", 6379))
OS_TOKEN_COUNT = 1535
texts = ["foo", "bar", "baz", "It is the end of the world. Take shelter!"]
def drop_vector_indexes(store: FalkorDBVector) -> None:
"""Cleanup all vector indexes"""
index_entity_labels: List[Any] = []
index_entity_properties: List[Any] = []
index_entity_types: List[Any] = []
# get all indexes
result = store._query(
"""
CALL db.indexes()
"""
)
processed_result: List[Dict[str, Any]] = process_index_data(result)
# get all vector indexs entity labels, entity properties, entity_types
if isinstance(processed_result, list):
for index in processed_result:
if isinstance(index, dict):
if index.get("index_type") == "VECTOR":
index_entity_labels.append(index["entity_label"])
index_entity_properties.append(index["entity_property"])
index_entity_types.append(index["entity_type"])
# drop vector indexs
for entity_label, entity_property, entity_type in zip(
index_entity_labels, index_entity_properties, index_entity_types
):
if entity_type == "NODE":
store._database.drop_node_vector_index(
label=entity_label,
attribute=entity_property,
)
elif entity_type == "RELATIONSHIP":
store._database.drop_edge_vector_index(
label=entity_label,
attribute=entity_property,
)
class FakeEmbeddingsWithOsDimension(FakeEmbeddings):
"""Fake embeddings functionality for testing."""
def embed_documents(self, embedding_texts: List[str]) -> List[List[float]]:
"""Return simple embeddings."""
return [
[float(1.0)] * (OS_TOKEN_COUNT - 1) + [float(i + 1)]
for i in range(len(embedding_texts))
]
def embed_query(self, text: str) -> List[float]:
"""Return simple embeddings."""
return [float(1.0)] * (OS_TOKEN_COUNT - 1) + [float(texts.index(text) + 1)]
def test_falkordbvector() -> None:
"""Test end to end construction and search."""
docsearch = FalkorDBVector.from_texts(
texts=texts,
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
pre_delete_collection=True,
)
output = docsearch.similarity_search("foo", k=1)
assert type(output) is list
assert type(output[0]) is Document
assert output[0].page_content == "foo"
drop_vector_indexes(docsearch)
def test_falkordbvector_embeddings() -> None:
"""Test end to end construction with embeddings and search."""
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
text_embedding_pairs = list(zip(texts, text_embeddings))
docsearch = FalkorDBVector.from_embeddings(
text_embeddings=text_embedding_pairs,
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
pre_delete_collection=True,
)
output = docsearch.similarity_search("foo", k=1)
assert type(output) is list
assert type(output[0]) is Document
assert output[0].page_content == "foo"
drop_vector_indexes(docsearch)
def test_falkordbvector_catch_wrong_node_label() -> None:
"""Test if node label is misspelled, but index name is correct."""
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
text_embedding_pairs = list(zip(texts, text_embeddings))
docsearch = FalkorDBVector.from_embeddings(
text_embeddings=text_embedding_pairs,
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
pre_delete_collection=True,
)
try:
FalkorDBVector.from_existing_index(
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
node_label="test",
)
except Exception as e:
assert type(e) is ValueError
assert str(e) == (
"The specified vector index node label "
+ "`test` does not exist. Make sure to"
+ " check if you spelled the node label correctly"
)
drop_vector_indexes(docsearch)
def test_falkordbvector_with_metadatas() -> None:
"""Test end to end construction and search."""
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = FalkorDBVector.from_texts(
texts=texts,
embedding=FakeEmbeddingsWithOsDimension(),
metadatas=metadatas,
host=host,
port=port,
pre_delete_collection=True,
)
output = docsearch.similarity_search("foo", k=1)
assert type(output) is list
assert type(output[0]) is Document
assert output[0].metadata.get("page") == "0"
drop_vector_indexes(docsearch)
def test_falkordbvector_with_metadatas_with_scores() -> None:
"""Test end to end construction and search."""
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = FalkorDBVector.from_texts(
texts=texts,
embedding=FakeEmbeddingsWithOsDimension(),
metadatas=metadatas,
host=host,
port=port,
pre_delete_collection=True,
)
output = [
(doc, round(score, 1))
for doc, score in docsearch.similarity_search_with_score("foo", k=1)
]
assert output == [
(
Document(
metadata={
"text": "foo",
"id": "acbd18db4cc2f85cedef654fccc4a4d8",
"page": "0",
},
page_content="foo",
),
0.0,
)
]
drop_vector_indexes(docsearch)
def test_falkordb_relevance_score() -> None:
"""Test to make sure the relevance score is scaled to 0-2."""
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = FalkorDBVector.from_texts(
texts=texts,
embedding=FakeEmbeddingsWithOsDimension(),
metadatas=metadatas,
host=host,
port=port,
pre_delete_collection=True,
)
output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
expected_output = [
(
Document(
metadata={
"text": "foo",
"id": "acbd18db4cc2f85cedef654fccc4a4d8",
"page": "0",
},
page_content="foo",
),
0.0,
),
(
Document(
metadata={
"text": "bar",
"id": "37b51d194a7513e45b56f6524f2d51f2",
"page": "1",
},
page_content="bar",
),
1.0,
),
(
Document(
metadata={
"text": "baz",
"id": "73feffa4b7f6bb68e44cf984c85f6e88",
"page": "2",
},
page_content="baz",
),
2.0,
),
]
# Check if the length of the outputs matches
assert len(output) == len(expected_output)
# Check if each document and its relevance score is close to the expected value
for (doc, score), (expected_doc, expected_score) in zip(output, expected_output):
assert doc.page_content == expected_doc.page_content
assert doc.metadata == expected_doc.metadata
assert isclose(score, expected_score, rel_tol=1e-5)
drop_vector_indexes(docsearch)
def test_falkordbvector_retriever_search_threshold() -> None:
"""Test using retriever for searching with threshold."""
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = FalkorDBVector.from_texts(
texts=texts,
embedding=FakeEmbeddingsWithOsDimension(),
metadatas=metadatas,
host=host,
port=port,
pre_delete_collection=True,
)
retriever = docsearch.as_retriever(
search_type="similarity_score_threshold",
search_kwargs={"k": 1, "score_threshold": 0.9999},
)
output = retriever.invoke("foo")
assert output == [
Document(
metadata={
"text": "foo",
"id": "acbd18db4cc2f85cedef654fccc4a4d8",
"page": "0",
},
page_content="foo",
)
]
drop_vector_indexes(docsearch)
def test_custom_return_falkordbvector() -> None:
"""Test end to end construction and search."""
docsearch = FalkorDBVector.from_texts(
texts=["test"],
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
pre_delete_collection=True,
retrieval_query="RETURN 'foo' AS text, score, {test: 'test'} AS metadata",
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo", metadata={"test": "test"})]
drop_vector_indexes(docsearch)
def test_falkordb_hybrid() -> None:
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
text_embedding_pairs = list(zip(texts, text_embeddings))
docsearch = FalkorDBVector.from_embeddings(
text_embeddings=text_embedding_pairs,
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
pre_delete_collection=True,
search_type=SearchType.HYBRID,
)
output = docsearch.similarity_search("foo", k=1)
assert output == [
Document(
metadata={"text": "foo", "id": "acbd18db4cc2f85cedef654fccc4a4d8"},
page_content="foo",
)
]
drop_vector_indexes(docsearch)
def test_falkordb_hybrid_deduplicate() -> None:
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
text_embedding_pairs = list(zip(texts, text_embeddings))
docsearch = FalkorDBVector.from_embeddings(
text_embeddings=text_embedding_pairs,
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
pre_delete_collection=True,
search_type=SearchType.HYBRID,
)
output = docsearch.similarity_search("foo", k=3)
assert output == [
Document(
metadata={"text": "baz", "id": "73feffa4b7f6bb68e44cf984c85f6e88"},
page_content="baz",
),
Document(
metadata={"text": "foo", "id": "acbd18db4cc2f85cedef654fccc4a4d8"},
page_content="foo",
),
Document(
metadata={"text": "bar", "id": "37b51d194a7513e45b56f6524f2d51f2"},
page_content="bar",
),
]
drop_vector_indexes(docsearch)
def test_falkordb_hybrid_retrieval_query() -> None:
"""Test custom retrieval_query with hybrid search."""
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
text_embedding_pairs = list(zip(texts, text_embeddings))
docsearch = FalkorDBVector.from_embeddings(
text_embeddings=text_embedding_pairs,
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
pre_delete_collection=True,
search_type=SearchType.HYBRID,
retrieval_query="RETURN 'moo' AS text, score, {test: 'test'} AS metadata",
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="moo", metadata={"test": "test"})]
drop_vector_indexes(docsearch)
def test_falkordbvector_missing_keyword() -> None:
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
text_embedding_pairs = list(zip(texts, text_embeddings))
node_label = "vector"
docsearch = FalkorDBVector.from_embeddings(
text_embeddings=text_embedding_pairs,
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
pre_delete_collection=True,
)
try:
FalkorDBVector.from_existing_index(
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
node_label=node_label,
search_type=SearchType.HYBRID,
)
except Exception as e:
assert str(e) == (
"The specified vector index node label "
+ f"`{node_label}` does not exist. Make sure"
+ " to check if you spelled the node label correctly"
)
drop_vector_indexes(docsearch)
def test_falkordb_hybrid_from_existing() -> None:
"""Test hybrid search with missing keyword_index_search."""
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
text_embedding_pairs = list(zip(texts, text_embeddings))
docsearch = FalkorDBVector.from_embeddings(
text_embeddings=text_embedding_pairs,
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
pre_delete_collection=True,
search_type=SearchType.HYBRID,
)
existing = FalkorDBVector.from_existing_index(
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
node_label="Chunk", # default node label
search_type=SearchType.HYBRID,
)
output = existing.similarity_search("foo", k=1)
assert output == [
Document(
metadata={"text": "foo", "id": "acbd18db4cc2f85cedef654fccc4a4d8"},
page_content="foo",
)
]
drop_vector_indexes(existing)
drop_vector_indexes(docsearch)
def test_falkordbvector_from_existing_graph() -> None:
"""Test from_existing_graph with a single property"""
graph = FalkorDBVector.from_texts(
texts=["test"],
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
node_label="Foo",
embedding_node_property="vector",
text_node_property="info",
pre_delete_collection=True,
)
graph._query("MATCH (n) DELETE n")
graph._query("CREATE (:Test {name:'Foo'}), (:Test {name:'Bar'})")
assert graph.database_name, "Database name cannot be empty or None"
existing = FalkorDBVector.from_existing_graph(
embedding=FakeEmbeddingsWithOsDimension(),
database=graph.database_name,
host=host,
port=port,
node_label="Test",
text_node_properties=["name"],
embedding_node_property="embedding",
)
output = existing.similarity_search("foo", k=2)
assert [output[0]] == [Document(page_content="\nname: Foo")]
drop_vector_indexes(existing)
def test_falkordb_from_existing_graph_mulitiple_properties() -> None:
"""Test from_existing_graph with two properties."""
graph = FalkorDBVector.from_texts(
texts=["test"],
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
node_label="Foo",
embedding_node_property="vector",
text_node_property="info",
pre_delete_collection=True,
)
graph._query("MATCH (n) DELETE n")
graph._query("CREATE (:Test {name:'Foo', name2: 'Fooz'}), (:Test {name:'Bar'})")
assert graph.database_name, "Database name cannot be empty or None"
existing = FalkorDBVector.from_existing_graph(
embedding=FakeEmbeddingsWithOsDimension(),
database=graph.database_name,
host=host,
port=port,
node_label="Test",
text_node_properties=["name", "name2"],
embedding_node_property="embedding",
)
output = existing.similarity_search("foo", k=2)
assert [output[0]] == [Document(page_content="\nname: Foo\nname2: Fooz")]
drop_vector_indexes(existing)
drop_vector_indexes(graph)
def test_falkordbvector_special_character() -> None:
"""Test removing lucene."""
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
text_embedding_pairs = list(zip(texts, text_embeddings))
docsearch = FalkorDBVector.from_embeddings(
text_embeddings=text_embedding_pairs,
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
pre_delete_collection=True,
search_type=SearchType.HYBRID,
)
output = docsearch.similarity_search(
"It is the end of the world. Take shelter!", k=1
)
assert output == [
Document(
metadata={
"text": "It is the end of the world. Take shelter!",
"id": "84768c9c477cbe05fbafbe7247990051",
},
page_content="It is the end of the world. Take shelter!",
)
]
drop_vector_indexes(docsearch)
def test_falkordb_from_existing_graph_mulitiple_properties_hybrid() -> None:
"""Test from_existing_graph with a two property."""
graph = FalkorDBVector.from_texts(
texts=["test"],
embedding=FakeEmbeddingsWithOsDimension(),
host=host,
port=port,
node_label="Foo",
embedding_node_property="vector",
text_node_property="info",
pre_delete_collection=True,
)
graph._query("MATCH (n) DELETE n")
graph._query("CREATE (:Test {name:'Foo', name2: 'Fooz'}), (:Test {name:'Bar'})")
assert graph.database_name, "Database name cannot be empty or None"
existing = FalkorDBVector.from_existing_graph(
embedding=FakeEmbeddingsWithOsDimension(),
database=graph.database_name,
host=host,
port=port,
node_label="Test",
text_node_properties=["name", "name2"],
embedding_node_property="embedding",
search_type=SearchType.HYBRID,
)
output = existing.similarity_search("foo", k=2)
assert [output[0]] == [Document(page_content="\nname: Foo\nname2: Fooz")]
drop_vector_indexes(existing)
def test_index_fetching() -> None:
"""testing correct index creation and fetching"""
text_embeddings = FakeEmbeddingsWithOsDimension().embed_documents(texts)
text_embedding_pairs = list(zip(texts, text_embeddings))
embeddings = FakeEmbeddingsWithOsDimension()
def create_store(node_label: str, text_properties: List[str]) -> FalkorDBVector:
return FalkorDBVector.from_embeddings(
text_embeddings=text_embedding_pairs,
embedding=FakeEmbeddingsWithOsDimension(),
node_label=node_label,
host=host,
port=port,
pre_delete_collection=True,
)
def fetch_store(node_label: str) -> FalkorDBVector:
store = FalkorDBVector.from_existing_index(
embedding=embeddings,
host=host,
port=port,
node_label=node_label,
)
return store
index_0_str = "label0"
create_store(index_0_str, ["text"])
# create index 1
index_1_str = "label1"
create_store("label1", ["text"])
index_1_store = fetch_store(index_1_str)
assert index_1_store.node_label == index_1_str
index_0_store = fetch_store(index_0_str)
assert index_0_store.node_label == index_0_str
drop_vector_indexes(index_1_store)
drop_vector_indexes(index_0_store)
def test_retrieval_params() -> None:
"""Test if we use parameters in retrieval query"""
docsearch = FalkorDBVector.from_texts(
texts=texts,
embedding=FakeEmbeddings(),
pre_delete_collection=True,
retrieval_query="""
RETURN $test as text, score, {test: $test1} AS metadata
""",
)
output = docsearch.similarity_search(
"Foo", k=2, params={"test": "test", "test1": "test1"}
)
assert output == [
Document(page_content="test", metadata={"test": "test1"}),
Document(page_content="test", metadata={"test": "test1"}),
]
drop_vector_indexes(docsearch)
def test_falkordb_relationship_index() -> None:
"""Test end to end construction and search."""
embeddings = FakeEmbeddingsWithOsDimension()
docsearch = FalkorDBVector.from_texts(
texts=texts,
embedding=embeddings,
host=host,
port=port,
pre_delete_collection=True,
)
# Ingest data
docsearch._query(
(
"MERGE (p1:Person)"
"MERGE (p2:Person)"
"MERGE (p3:Person)"
"MERGE (p4:Person)"
"MERGE (p1)-[:REL {text: 'foo', embedding: vecf32($e1)}]->(p2)"
"MERGE (p3)-[:REL {text: 'far', embedding: vecf32($e2)}]->(p4)"
),
params={
"e1": embeddings.embed_query("foo"),
"e2": embeddings.embed_query("bar"),
},
)
# Create relationship index
docsearch.create_new_index_on_relationship(
relation_type="REL",
embedding_node_property="embedding",
embedding_dimension=OS_TOKEN_COUNT,
)
relationship_index = FalkorDBVector.from_existing_relationship_index(
embeddings, relation_type="REL"
)
output = relationship_index.similarity_search("foo", k=1)
assert output == [Document(metadata={"text": "foo"}, page_content="foo")]
drop_vector_indexes(docsearch)
drop_vector_indexes(relationship_index)

View File

@ -0,0 +1,24 @@
"""Test utils function in falkordb_vector.py"""
from langchain_community.vectorstores.falkordb_vector import (
dict_to_yaml_str,
)
def test_converting_to_yaml() -> None:
example_dict = {
"name": "John Doe",
"age": 30,
"skills": ["Python", "Data Analysis", "Machine Learning"],
"location": {"city": "Ljubljana", "country": "Slovenia"},
}
yaml_str = dict_to_yaml_str(example_dict)
expected_output = (
"name: John Doe\nage: 30\nskills:\n- Python\n- "
"Data Analysis\n- Machine Learning\nlocation:\n city: Ljubljana\n"
" country: Slovenia\n"
)
assert yaml_str == expected_output