mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-16 15:04:13 +00:00
Added SingleStoreDB Vector Store (#5619)
- Added `SingleStoreDB` vector store, which is a wrapper over the SingleStore DB database, that can be used as a vector storage and has an efficient similarity search. - Added integration tests for the vector store - Added jupyter notebook with the example @dev2049 --------- Co-authored-by: Volodymyr Tkachuk <vtkachuk-ua@singlestore.com> Co-authored-by: Dev 2049 <dev.dev2049@gmail.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
142
tests/integration_tests/vectorstores/test_singlestoredb.py
Normal file
142
tests/integration_tests/vectorstores/test_singlestoredb.py
Normal file
@@ -0,0 +1,142 @@
|
||||
"""Test SingleStoreDB functionality."""
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.vectorstores.singlestoredb import SingleStoreDB
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
||||
|
||||
TEST_SINGLESTOREDB_URL = "root:pass@localhost:3306/db"
|
||||
TEST_SINGLE_RESULT = [Document(page_content="foo")]
|
||||
TEST_SINGLE_WITH_METADATA_RESULT = [Document(page_content="foo", metadata={"a": "b"})]
|
||||
TEST_RESULT = [Document(page_content="foo"), Document(page_content="foo")]
|
||||
|
||||
try:
|
||||
import singlestoredb as s2
|
||||
|
||||
singlestoredb_installed = True
|
||||
except ImportError:
|
||||
singlestoredb_installed = False
|
||||
|
||||
|
||||
def drop(table_name: str) -> None:
|
||||
with s2.connect(TEST_SINGLESTOREDB_URL) as conn:
|
||||
conn.autocommit(True)
|
||||
with conn.cursor() as cursor:
|
||||
cursor.execute(f"DROP TABLE IF EXISTS {table_name};")
|
||||
|
||||
|
||||
class NormilizedFakeEmbeddings(FakeEmbeddings):
|
||||
"""Fake embeddings with normalization. For testing purposes."""
|
||||
|
||||
def normalize(self, vector: List[float]) -> List[float]:
|
||||
"""Normalize vector."""
|
||||
return [float(v / np.linalg.norm(vector)) for v in vector]
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
return [self.normalize(v) for v in super().embed_documents(texts)]
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
return self.normalize(super().embed_query(text))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def texts() -> List[str]:
|
||||
return ["foo", "bar", "baz"]
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb(texts: List[str]) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
table_name = "test_singlestoredb"
|
||||
drop(table_name)
|
||||
docsearch = SingleStoreDB.from_texts(
|
||||
texts,
|
||||
NormilizedFakeEmbeddings(),
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == TEST_SINGLE_RESULT
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb_new_vector(texts: List[str]) -> None:
|
||||
"""Test adding a new document"""
|
||||
table_name = "test_singlestoredb_new_vector"
|
||||
drop(table_name)
|
||||
docsearch = SingleStoreDB.from_texts(
|
||||
texts,
|
||||
NormilizedFakeEmbeddings(),
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
docsearch.add_texts(["foo"])
|
||||
output = docsearch.similarity_search("foo", k=2)
|
||||
assert output == TEST_RESULT
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb_from_existing(texts: List[str]) -> None:
|
||||
"""Test adding a new document"""
|
||||
table_name = "test_singlestoredb_from_existing"
|
||||
drop(table_name)
|
||||
SingleStoreDB.from_texts(
|
||||
texts,
|
||||
NormilizedFakeEmbeddings(),
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
# Test creating from an existing
|
||||
docsearch2 = SingleStoreDB(
|
||||
NormilizedFakeEmbeddings(),
|
||||
table_name="test_singlestoredb_from_existing",
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
output = docsearch2.similarity_search("foo", k=1)
|
||||
assert output == TEST_SINGLE_RESULT
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb_from_documents(texts: List[str]) -> None:
|
||||
"""Test from_documents constructor."""
|
||||
table_name = "test_singlestoredb_from_documents"
|
||||
drop(table_name)
|
||||
docs = [Document(page_content=t, metadata={"a": "b"}) for t in texts]
|
||||
docsearch = SingleStoreDB.from_documents(
|
||||
docs,
|
||||
NormilizedFakeEmbeddings(),
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == TEST_SINGLE_WITH_METADATA_RESULT
|
||||
drop(table_name)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not singlestoredb_installed, reason="singlestoredb not installed")
|
||||
def test_singlestoredb_add_texts_to_existing(texts: List[str]) -> None:
|
||||
"""Test adding a new document"""
|
||||
table_name = "test_singlestoredb_add_texts_to_existing"
|
||||
drop(table_name)
|
||||
# Test creating from an existing
|
||||
SingleStoreDB.from_texts(
|
||||
texts,
|
||||
NormilizedFakeEmbeddings(),
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
docsearch = SingleStoreDB(
|
||||
NormilizedFakeEmbeddings(),
|
||||
table_name=table_name,
|
||||
host=TEST_SINGLESTOREDB_URL,
|
||||
)
|
||||
docsearch.add_texts(["foo"])
|
||||
output = docsearch.similarity_search("foo", k=2)
|
||||
assert output == TEST_RESULT
|
||||
drop(table_name)
|
Reference in New Issue
Block a user