diff --git a/libs/community/langchain_community/retrievers/qdrant_sparse_vector_retriever.py b/libs/community/langchain_community/retrievers/qdrant_sparse_vector_retriever.py index 1b0c8292bd4..daf8917173d 100644 --- a/libs/community/langchain_community/retrievers/qdrant_sparse_vector_retriever.py +++ b/libs/community/langchain_community/retrievers/qdrant_sparse_vector_retriever.py @@ -120,7 +120,10 @@ class QdrantSparseVectorRetriever(BaseRetriever): ) return [ Qdrant._document_from_scored_point( - point, self.content_payload_key, self.metadata_payload_key + point, + self.collection_name, + self.content_payload_key, + self.metadata_payload_key, ) for point in results ] diff --git a/libs/community/langchain_community/vectorstores/qdrant.py b/libs/community/langchain_community/vectorstores/qdrant.py index d64c5e3b802..47d32cae762 100644 --- a/libs/community/langchain_community/vectorstores/qdrant.py +++ b/libs/community/langchain_community/vectorstores/qdrant.py @@ -620,7 +620,10 @@ class Qdrant(VectorStore): return [ ( self._document_from_scored_point( - result, self.content_payload_key, self.metadata_payload_key + result, + self.collection_name, + self.content_payload_key, + self.metadata_payload_key, ), result.score, ) @@ -713,7 +716,10 @@ class Qdrant(VectorStore): return [ ( self._document_from_scored_point( - result, self.content_payload_key, self.metadata_payload_key + result, + self.collection_name, + self.content_payload_key, + self.metadata_payload_key, ), result.score, ) @@ -1051,7 +1057,10 @@ class Qdrant(VectorStore): return [ ( self._document_from_scored_point( - results[i], self.content_payload_key, self.metadata_payload_key + results[i], + self.collection_name, + self.content_payload_key, + self.metadata_payload_key, ), results[i].score, ) @@ -1123,7 +1132,10 @@ class Qdrant(VectorStore): return [ ( self._document_from_scored_point( - results[i], self.content_payload_key, self.metadata_payload_key + results[i], + self.collection_name, + self.content_payload_key, + self.metadata_payload_key, ), results[i].score, ) @@ -1938,12 +1950,13 @@ class Qdrant(VectorStore): def _document_from_scored_point( cls, scored_point: Any, + collection_name: str, content_payload_key: str, metadata_payload_key: str, ) -> Document: metadata = scored_point.payload.get(metadata_payload_key) or {} metadata["_id"] = scored_point.id - metadata["_collection_name"] = scored_point.collection_name + metadata["_collection_name"] = collection_name return Document( page_content=scored_point.payload.get(content_payload_key), metadata=metadata, diff --git a/libs/community/tests/integration_tests/vectorstores/qdrant/async_api/test_from_texts.py b/libs/community/tests/integration_tests/vectorstores/qdrant/async_api/test_from_texts.py index dc9f3c9270e..1ee99d40bdd 100644 --- a/libs/community/tests/integration_tests/vectorstores/qdrant/async_api/test_from_texts.py +++ b/libs/community/tests/integration_tests/vectorstores/qdrant/async_api/test_from_texts.py @@ -12,7 +12,10 @@ from tests.integration_tests.vectorstores.fake_embeddings import ( from tests.integration_tests.vectorstores.qdrant.async_api.fixtures import ( qdrant_locations, ) -from tests.integration_tests.vectorstores.qdrant.common import qdrant_is_not_running +from tests.integration_tests.vectorstores.qdrant.common import ( + assert_documents_equals, + qdrant_is_not_running, +) @pytest.mark.parametrize("qdrant_location", qdrant_locations()) @@ -245,4 +248,6 @@ async def test_qdrant_from_texts_stores_metadatas( location=qdrant_location, ) output = await docsearch.asimilarity_search("foo", k=1) - assert output == [Document(page_content="foo", metadata={"page": 0})] + assert_documents_equals( + output, [Document(page_content="foo", metadata={"page": 0})] + ) diff --git a/libs/community/tests/integration_tests/vectorstores/qdrant/async_api/test_max_marginal_relevance.py b/libs/community/tests/integration_tests/vectorstores/qdrant/async_api/test_max_marginal_relevance.py index 59d40c479fb..472a66bb0c7 100644 --- a/libs/community/tests/integration_tests/vectorstores/qdrant/async_api/test_max_marginal_relevance.py +++ b/libs/community/tests/integration_tests/vectorstores/qdrant/async_api/test_max_marginal_relevance.py @@ -10,6 +10,7 @@ from tests.integration_tests.vectorstores.fake_embeddings import ( from tests.integration_tests.vectorstores.qdrant.async_api.fixtures import ( qdrant_locations, ) +from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals @pytest.mark.parametrize("batch_size", [1, 64]) @@ -41,7 +42,10 @@ async def test_qdrant_max_marginal_relevance_search( output = await docsearch.amax_marginal_relevance_search( "foo", k=2, fetch_k=3, lambda_mult=0.0 ) - assert output == [ - Document(page_content="foo", metadata={"page": 0}), - Document(page_content="baz", metadata={"page": 2}), - ] + assert_documents_equals( + output, + [ + Document(page_content="foo", metadata={"page": 0}), + Document(page_content="baz", metadata={"page": 2}), + ], + ) diff --git a/libs/community/tests/integration_tests/vectorstores/qdrant/async_api/test_similarity_search.py b/libs/community/tests/integration_tests/vectorstores/qdrant/async_api/test_similarity_search.py index 0d6db192c69..5ae98ad35aa 100644 --- a/libs/community/tests/integration_tests/vectorstores/qdrant/async_api/test_similarity_search.py +++ b/libs/community/tests/integration_tests/vectorstores/qdrant/async_api/test_similarity_search.py @@ -11,6 +11,7 @@ from tests.integration_tests.vectorstores.fake_embeddings import ( from tests.integration_tests.vectorstores.qdrant.async_api.fixtures import ( qdrant_locations, ) +from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals @pytest.mark.parametrize("batch_size", [1, 64]) @@ -37,7 +38,7 @@ async def test_qdrant_similarity_search( location=qdrant_location, ) output = await docsearch.asimilarity_search("foo", k=1) - assert output == [Document(page_content="foo")] + assert_documents_equals(output, [Document(page_content="foo")]) @pytest.mark.parametrize("batch_size", [1, 64]) @@ -65,7 +66,7 @@ async def test_qdrant_similarity_search_by_vector( ) embeddings = ConsistentFakeEmbeddings().embed_query("foo") output = await docsearch.asimilarity_search_by_vector(embeddings, k=1) - assert output == [Document(page_content="foo")] + assert_documents_equals(output, [Document(page_content="foo")]) @pytest.mark.parametrize("batch_size", [1, 64]) @@ -95,7 +96,7 @@ async def test_qdrant_similarity_search_with_score_by_vector( output = await docsearch.asimilarity_search_with_score_by_vector(embeddings, k=1) assert len(output) == 1 document, score = output[0] - assert document == Document(page_content="foo") + assert_documents_equals([document], [Document(page_content="foo")]) assert score >= 0 @@ -123,12 +124,15 @@ async def test_qdrant_similarity_search_filters( output = await docsearch.asimilarity_search( "foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}} ) - assert output == [ - Document( - page_content="bar", - metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}}, - ) - ] + assert_documents_equals( + output, + [ + Document( + page_content="bar", + metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}}, + ) + ], + ) @pytest.mark.parametrize("vector_name", [None, "my-vector"]) @@ -262,12 +266,15 @@ async def test_qdrant_similarity_search_filters_with_qdrant_filters( ] ) output = await docsearch.asimilarity_search("foo", k=1, filter=qdrant_filter) - assert output == [ - Document( - page_content="bar", - metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}}, - ) - ] + assert_documents_equals( + output, + [ + Document( + page_content="bar", + metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}}, + ) + ], + ) @pytest.mark.parametrize("batch_size", [1, 64]) diff --git a/libs/community/tests/integration_tests/vectorstores/qdrant/common.py b/libs/community/tests/integration_tests/vectorstores/qdrant/common.py index 065dddd0a6b..ec5d14ef318 100644 --- a/libs/community/tests/integration_tests/vectorstores/qdrant/common.py +++ b/libs/community/tests/integration_tests/vectorstores/qdrant/common.py @@ -1,3 +1,8 @@ +from typing import List + +from langchain_core.documents import Document + + def qdrant_is_not_running() -> bool: """Check if Qdrant is not running.""" import requests @@ -8,3 +13,18 @@ def qdrant_is_not_running() -> bool: return response_json.get("title") != "qdrant - vector search engine" except (requests.exceptions.ConnectionError, requests.exceptions.Timeout): return True + + +def assert_documents_equals(actual: List[Document], expected: List[Document]): + assert len(actual) == len(expected) + + for actual_doc, expected_doc in zip(actual, expected): + assert actual_doc.page_content == expected_doc.page_content + + assert "_id" in actual_doc.metadata + assert "_collection_name" in actual_doc.metadata + + actual_doc.metadata.pop("_id") + actual_doc.metadata.pop("_collection_name") + + assert actual_doc.metadata == expected_doc.metadata diff --git a/libs/community/tests/integration_tests/vectorstores/qdrant/test_add_texts.py b/libs/community/tests/integration_tests/vectorstores/qdrant/test_add_texts.py index 819216c2807..d3d73b3c6d5 100644 --- a/libs/community/tests/integration_tests/vectorstores/qdrant/test_add_texts.py +++ b/libs/community/tests/integration_tests/vectorstores/qdrant/test_add_texts.py @@ -8,6 +8,7 @@ from langchain_community.vectorstores import Qdrant from tests.integration_tests.vectorstores.fake_embeddings import ( ConsistentFakeEmbeddings, ) +from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals @pytest.mark.parametrize("batch_size", [1, 64]) @@ -33,7 +34,7 @@ def test_qdrant_add_documents_extends_existing_collection( # ConsistentFakeEmbeddings return the same query embedding as the first document # embedding computed in `embedding.embed_documents`. Thus, "foo" embedding is the # same as "foobar" embedding - assert output == [Document(page_content="foobar")] + assert_documents_equals(output, [Document(page_content="foobar")]) @pytest.mark.parametrize("batch_size", [1, 64]) diff --git a/libs/community/tests/integration_tests/vectorstores/qdrant/test_from_texts.py b/libs/community/tests/integration_tests/vectorstores/qdrant/test_from_texts.py index e6b97fa703a..5b00ea9f98f 100644 --- a/libs/community/tests/integration_tests/vectorstores/qdrant/test_from_texts.py +++ b/libs/community/tests/integration_tests/vectorstores/qdrant/test_from_texts.py @@ -10,7 +10,10 @@ from langchain_community.vectorstores.qdrant import QdrantException from tests.integration_tests.vectorstores.fake_embeddings import ( ConsistentFakeEmbeddings, ) -from tests.integration_tests.vectorstores.qdrant.common import qdrant_is_not_running +from tests.integration_tests.vectorstores.qdrant.common import ( + assert_documents_equals, + qdrant_is_not_running, +) def test_qdrant_from_texts_stores_duplicated_texts() -> None: @@ -257,7 +260,9 @@ def test_qdrant_from_texts_stores_metadatas( batch_size=batch_size, ) output = docsearch.similarity_search("foo", k=1) - assert output == [Document(page_content="foo", metadata={"page": 0})] + assert_documents_equals( + output, [Document(page_content="foo", metadata={"page": 0})] + ) @pytest.mark.skipif(qdrant_is_not_running(), reason="Qdrant is not running") diff --git a/libs/community/tests/integration_tests/vectorstores/qdrant/test_max_marginal_relevance.py b/libs/community/tests/integration_tests/vectorstores/qdrant/test_max_marginal_relevance.py index 536003e8481..05c74311833 100644 --- a/libs/community/tests/integration_tests/vectorstores/qdrant/test_max_marginal_relevance.py +++ b/libs/community/tests/integration_tests/vectorstores/qdrant/test_max_marginal_relevance.py @@ -7,6 +7,7 @@ from langchain_community.vectorstores import Qdrant from tests.integration_tests.vectorstores.fake_embeddings import ( ConsistentFakeEmbeddings, ) +from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals @pytest.mark.parametrize("batch_size", [1, 64]) @@ -49,14 +50,18 @@ def test_qdrant_max_marginal_relevance_search( output = docsearch.max_marginal_relevance_search( "foo", k=2, fetch_k=3, lambda_mult=0.0 ) - assert output == [ - Document(page_content="foo", metadata={"page": 0}), - Document(page_content="baz", metadata={"page": 2}), - ] + assert_documents_equals( + output, + [ + Document(page_content="foo", metadata={"page": 0}), + Document(page_content="baz", metadata={"page": 2}), + ], + ) output = docsearch.max_marginal_relevance_search( "foo", k=2, fetch_k=3, lambda_mult=0.0, filter=filter ) - assert output == [ - Document(page_content="baz", metadata={"page": 2}), - ] + assert_documents_equals( + output, + [Document(page_content="baz", metadata={"page": 2})], + ) diff --git a/libs/community/tests/integration_tests/vectorstores/qdrant/test_similarity_search.py b/libs/community/tests/integration_tests/vectorstores/qdrant/test_similarity_search.py index 2df03ada797..5b51be93d4e 100644 --- a/libs/community/tests/integration_tests/vectorstores/qdrant/test_similarity_search.py +++ b/libs/community/tests/integration_tests/vectorstores/qdrant/test_similarity_search.py @@ -8,6 +8,7 @@ from langchain_community.vectorstores import Qdrant from tests.integration_tests.vectorstores.fake_embeddings import ( ConsistentFakeEmbeddings, ) +from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals @pytest.mark.parametrize("batch_size", [1, 64]) @@ -32,7 +33,7 @@ def test_qdrant_similarity_search( vector_name=vector_name, ) output = docsearch.similarity_search("foo", k=1) - assert output == [Document(page_content="foo")] + assert_documents_equals(actual=output, expected=[Document(page_content="foo")]) @pytest.mark.parametrize("batch_size", [1, 64]) @@ -58,7 +59,7 @@ def test_qdrant_similarity_search_by_vector( ) embeddings = ConsistentFakeEmbeddings().embed_query("foo") output = docsearch.similarity_search_by_vector(embeddings, k=1) - assert output == [Document(page_content="foo")] + assert_documents_equals(output, [Document(page_content="foo")]) @pytest.mark.parametrize("batch_size", [1, 64]) @@ -86,7 +87,7 @@ def test_qdrant_similarity_search_with_score_by_vector( output = docsearch.similarity_search_with_score_by_vector(embeddings, k=1) assert len(output) == 1 document, score = output[0] - assert document == Document(page_content="foo") + assert_documents_equals(actual=[document], expected=[Document(page_content="foo")]) assert score >= 0 @@ -113,12 +114,16 @@ def test_qdrant_similarity_search_filters( output = docsearch.similarity_search( "foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}} ) - assert output == [ - Document( - page_content="bar", - metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}}, - ) - ] + + assert_documents_equals( + actual=output, + expected=[ + Document( + page_content="bar", + metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}}, + ) + ], + ) @pytest.mark.parametrize("vector_name", [None, "my-vector"]) @@ -240,12 +245,15 @@ def test_qdrant_similarity_search_filters_with_qdrant_filters( ] ) output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter) - assert output == [ - Document( - page_content="bar", - metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}}, - ) - ] + assert_documents_equals( + actual=output, + expected=[ + Document( + page_content="bar", + metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}}, + ) + ], + ) @pytest.mark.parametrize("batch_size", [1, 64])