From d26555c68277267f995475dcc36358e92ed1d765 Mon Sep 17 00:00:00 2001 From: ZhangShenao <15201440436@163.com> Date: Fri, 6 Dec 2024 00:58:32 +0800 Subject: [PATCH] [VectorStore] Improvement: Improve chroma vector store (#28524) - Complete unit test - Fix spelling error --- .../chroma/langchain_chroma/vectorstores.py | 2 +- .../chroma/tests/unit_tests/test_vectorstores.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/libs/partners/chroma/langchain_chroma/vectorstores.py b/libs/partners/chroma/langchain_chroma/vectorstores.py index 9967ac6d294..afb9191c60a 100644 --- a/libs/partners/chroma/langchain_chroma/vectorstores.py +++ b/libs/partners/chroma/langchain_chroma/vectorstores.py @@ -758,7 +758,7 @@ class Chroma(VectorStore): The most similar documents will have the lowest relevance score. Default relevance score function is euclidean distance. Distance metric must be - provided in `collection_metadata` during initizalition of Chroma object. + provided in `collection_metadata` during initialization of Chroma object. Example: collection_metadata={"hnsw:space": "cosine"}. Available distance metrics are: 'cosine', 'l2' and 'ip'. diff --git a/libs/partners/chroma/tests/unit_tests/test_vectorstores.py b/libs/partners/chroma/tests/unit_tests/test_vectorstores.py index 84d8637879e..66ac3b06224 100644 --- a/libs/partners/chroma/tests/unit_tests/test_vectorstores.py +++ b/libs/partners/chroma/tests/unit_tests/test_vectorstores.py @@ -13,3 +13,18 @@ def test_initialization() -> None: texts=texts, embedding=FakeEmbeddings(size=10), ) + + +def test_similarity_search() -> None: + """Test similarity search by Chroma.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": str(i)} for i in range(len(texts))] + docsearch = Chroma.from_texts( + collection_name="test_collection", + texts=texts, + embedding=FakeEmbeddings(size=10), + metadatas=metadatas, + ) + output = docsearch.similarity_search("foo", k=1) + docsearch.delete_collection() + assert len(output) == 1