community[patch]: mmr search for Rockset vectorstore integration (#16908)

- **Description:** Adding support for mmr search in the Rockset
vectorstore integration.
  - **Issue:** N/A
  - **Dependencies:** N/A
  - **Twitter handle:** `@_morgan_adams_`

---------

Co-authored-by: Rockset API Bot <admin@rockset.io>
Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
This commit is contained in:
morgana
2024-03-29 11:45:22 -07:00
committed by GitHub
parent f51e6a35ba
commit 074ad5095f
2 changed files with 110 additions and 6 deletions

View File

@@ -96,16 +96,18 @@ class TestRockset:
client, embeddings, COLLECTION_NAME, TEXT_KEY, EMBEDDING_KEY, WORKSPACE
)
def test_rockset_insert_and_search(self) -> None:
"""Test end to end vector search in Rockset"""
texts = ["foo", "bar", "baz"]
metadatas = [{"metadata_index": i} for i in range(len(texts))]
ids = self.rockset_vectorstore.add_texts(
ids = cls.rockset_vectorstore.add_texts(
texts=texts,
metadatas=metadatas,
)
assert len(ids) == len(texts)
def test_rockset_search(self) -> None:
"""Test end-to-end vector search in Rockset"""
# Test that `foo` is closest to `foo`
output = self.rockset_vectorstore.similarity_search(
query="foo", distance_func=Rockset.DistanceFunction.COSINE_SIM, k=1
@@ -121,6 +123,26 @@ class TestRockset:
)
assert output == [Document(page_content="bar", metadata={"metadata_index": 1})]
def test_rockset_mmr_search(self) -> None:
"""Test end-to-end mmr search in Rockset"""
output = self.rockset_vectorstore.max_marginal_relevance_search(
query="foo",
distance_func=Rockset.DistanceFunction.COSINE_SIM,
fetch_k=1,
k=1,
)
assert output == [Document(page_content="foo", metadata={"metadata_index": 0})]
# Find closest vector to `foo` which is not `foo`
output = self.rockset_vectorstore.max_marginal_relevance_search(
query="foo",
distance_func=Rockset.DistanceFunction.COSINE_SIM,
fetch_k=3,
k=1,
where_str="metadata_index != 0",
)
assert output == [Document(page_content="bar", metadata={"metadata_index": 1})]
def test_add_documents_and_delete(self) -> None:
""" "add_documents" and "delete" are requirements to support use
with RecordManager"""
@@ -184,5 +206,21 @@ FROM {WORKSPACE}.{COLLECTION_NAME}
WHERE age >= 10
ORDER BY dist DESC
LIMIT 4
"""
assert q_str == expected
def test_build_query_sql_with_select_embeddings(self) -> None:
vector = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]
q_str = self.rockset_vectorstore._build_query_sql(
vector, Rockset.DistanceFunction.COSINE_SIM, 4, "age >= 10", False
)
vector_str = ",".join(map(str, vector))
expected = f"""\
SELECT *, \
COSINE_SIM({EMBEDDING_KEY}, [{vector_str}]) as dist
FROM {WORKSPACE}.{COLLECTION_NAME}
WHERE age >= 10
ORDER BY dist DESC
LIMIT 4
"""
assert q_str == expected