community[minor]: Add relyt vector database (#20316)

Co-authored-by: kaka <kaka@zbyte-inc.cloud>
Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: jingsi <jingsi@leadincloud.com>
Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
This commit is contained in:
Jingpan Xiong
2024-04-26 03:49:29 +08:00
committed by GitHub
parent f386f71bb3
commit 1202017c56
7 changed files with 859 additions and 0 deletions

View File

@@ -0,0 +1,167 @@
"""Test Relyt functionality."""
import os
from typing import List
from langchain_core.documents import Document
from langchain_community.vectorstores.relyt import Relyt
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
CONNECTION_STRING = Relyt.connection_string_from_db_params(
driver=os.environ.get("PG_DRIVER", "psycopg2cffi"),
host=os.environ.get("PG_HOST", "localhost"),
port=int(os.environ.get("PG_PORT", "5432")),
database=os.environ.get("PG_DATABASE", "postgres"),
user=os.environ.get("PG_USER", "postgres"),
password=os.environ.get("PG_PASSWORD", "postgres"),
)
ADA_TOKEN_COUNT = 1536
class FakeEmbeddingsWithAdaDimension(FakeEmbeddings):
"""Fake embeddings functionality for testing."""
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Return simple embeddings."""
return [
[float(1.0)] * (ADA_TOKEN_COUNT - 1) + [float(i)] for i in range(len(texts))
]
def embed_query(self, text: str) -> List[float]:
"""Return simple embeddings."""
return [float(1.0)] * (ADA_TOKEN_COUNT - 1) + [float(0.0)]
def test_relyt() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
docsearch = Relyt.from_texts(
texts=texts,
collection_name="test_collection",
embedding=FakeEmbeddingsWithAdaDimension(),
connection_string=CONNECTION_STRING,
pre_delete_collection=True,
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
def test_relyt_with_engine_args() -> None:
engine_args = {"pool_recycle": 3600, "pool_size": 50}
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
docsearch = Relyt.from_texts(
texts=texts,
collection_name="test_collection",
embedding=FakeEmbeddingsWithAdaDimension(),
connection_string=CONNECTION_STRING,
pre_delete_collection=True,
engine_args=engine_args,
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
def test_relyt_with_metadatas() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = Relyt.from_texts(
texts=texts,
collection_name="test_collection",
embedding=FakeEmbeddingsWithAdaDimension(),
metadatas=metadatas,
connection_string=CONNECTION_STRING,
pre_delete_collection=True,
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo", metadata={"page": "0"})]
def test_relyt_with_metadatas_with_scores() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = Relyt.from_texts(
texts=texts,
collection_name="test_collection",
embedding=FakeEmbeddingsWithAdaDimension(),
metadatas=metadatas,
connection_string=CONNECTION_STRING,
pre_delete_collection=True,
)
output = docsearch.similarity_search_with_score("foo", k=1)
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
def test_relyt_with_filter_match() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = Relyt.from_texts(
texts=texts,
collection_name="test_collection_filter",
embedding=FakeEmbeddingsWithAdaDimension(),
metadatas=metadatas,
connection_string=CONNECTION_STRING,
pre_delete_collection=True,
)
output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "0"})
assert output == [(Document(page_content="foo", metadata={"page": "0"}), 0.0)]
def test_relyt_with_filter_distant_match() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = Relyt.from_texts(
texts=texts,
collection_name="test_collection_filter",
embedding=FakeEmbeddingsWithAdaDimension(),
metadatas=metadatas,
connection_string=CONNECTION_STRING,
pre_delete_collection=True,
)
output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "2"})
print(output) # noqa: T201
assert output == [(Document(page_content="baz", metadata={"page": "2"}), 4.0)]
def test_relyt_with_filter_no_match() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = Relyt.from_texts(
texts=texts,
collection_name="test_collection_filter",
embedding=FakeEmbeddingsWithAdaDimension(),
metadatas=metadatas,
connection_string=CONNECTION_STRING,
pre_delete_collection=True,
)
output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "5"})
assert output == []
def test_relyt_delete() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
ids = ["fooid", "barid", "bazid"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = Relyt.from_texts(
texts=texts,
collection_name="test_collection_delete",
embedding=FakeEmbeddingsWithAdaDimension(),
metadatas=metadatas,
connection_string=CONNECTION_STRING,
ids=ids,
pre_delete_collection=True,
)
output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "2"})
print(output) # noqa: T201
assert output == [(Document(page_content="baz", metadata={"page": "2"}), 4.0)]
docsearch.delete(ids=ids)
output = docsearch.similarity_search_with_score("foo", k=1, filter={"page": "2"})
assert output == []

View File

@@ -66,6 +66,7 @@ EXPECTED_ALL = [
"Pinecone",
"Qdrant",
"Redis",
"Relyt",
"Rockset",
"SKLearnVectorStore",
"SQLiteVSS",

View File

@@ -76,6 +76,7 @@ def test_compatible_vectorstore_documentation() -> None:
"Pinecone",
"Qdrant",
"Redis",
"Relyt",
"Rockset",
"ScaNN",
"SemaDB",

View File

@@ -61,6 +61,7 @@ _EXPECTED = [
"Pinecone",
"Qdrant",
"Redis",
"Relyt",
"Rockset",
"SKLearnVectorStore",
"ScaNN",