community[patch]: Correct the calling to collection_name in qdrant (#16920)

## Description

In #16608, the calling `collection_name` was wrong.
I made a fix for it. 
Sorry for the inconvenience!

## Issue

https://github.com/langchain-ai/langchain/issues/16962

## Dependencies

N/A



<!-- Thank you for contributing to LangChain!

Please title your PR "<package>: <description>", where <package> is
whichever of langchain, community, core, experimental, etc. is being
modified.

Replace this entire comment with:
  - **Description:** a description of the change, 
  - **Issue:** the issue # it fixes if applicable,
  - **Dependencies:** any dependencies required for this change,
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!

Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` from the root
of the package you've modified to check this locally.

See contribution guidelines for more information on how to write/run
tests, lint, etc: https://python.langchain.com/docs/contributing/

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.

If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
 -->

---------

Co-authored-by: Kumar Shivendu <kshivendu1@gmail.com>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
Killinsun - Ryota Takeuchi 2024-02-05 03:45:35 +09:00 committed by GitHub
parent 849051102a
commit bcfce146d8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 123 additions and 52 deletions

View File

@ -120,7 +120,10 @@ class QdrantSparseVectorRetriever(BaseRetriever):
)
return [
Qdrant._document_from_scored_point(
point, self.content_payload_key, self.metadata_payload_key
point,
self.collection_name,
self.content_payload_key,
self.metadata_payload_key,
)
for point in results
]

View File

@ -620,7 +620,10 @@ class Qdrant(VectorStore):
return [
(
self._document_from_scored_point(
result, self.content_payload_key, self.metadata_payload_key
result,
self.collection_name,
self.content_payload_key,
self.metadata_payload_key,
),
result.score,
)
@ -713,7 +716,10 @@ class Qdrant(VectorStore):
return [
(
self._document_from_scored_point(
result, self.content_payload_key, self.metadata_payload_key
result,
self.collection_name,
self.content_payload_key,
self.metadata_payload_key,
),
result.score,
)
@ -1051,7 +1057,10 @@ class Qdrant(VectorStore):
return [
(
self._document_from_scored_point(
results[i], self.content_payload_key, self.metadata_payload_key
results[i],
self.collection_name,
self.content_payload_key,
self.metadata_payload_key,
),
results[i].score,
)
@ -1123,7 +1132,10 @@ class Qdrant(VectorStore):
return [
(
self._document_from_scored_point(
results[i], self.content_payload_key, self.metadata_payload_key
results[i],
self.collection_name,
self.content_payload_key,
self.metadata_payload_key,
),
results[i].score,
)
@ -1938,12 +1950,13 @@ class Qdrant(VectorStore):
def _document_from_scored_point(
cls,
scored_point: Any,
collection_name: str,
content_payload_key: str,
metadata_payload_key: str,
) -> Document:
metadata = scored_point.payload.get(metadata_payload_key) or {}
metadata["_id"] = scored_point.id
metadata["_collection_name"] = scored_point.collection_name
metadata["_collection_name"] = collection_name
return Document(
page_content=scored_point.payload.get(content_payload_key),
metadata=metadata,

View File

@ -12,7 +12,10 @@ from tests.integration_tests.vectorstores.fake_embeddings import (
from tests.integration_tests.vectorstores.qdrant.async_api.fixtures import (
qdrant_locations,
)
from tests.integration_tests.vectorstores.qdrant.common import qdrant_is_not_running
from tests.integration_tests.vectorstores.qdrant.common import (
assert_documents_equals,
qdrant_is_not_running,
)
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
@ -245,4 +248,6 @@ async def test_qdrant_from_texts_stores_metadatas(
location=qdrant_location,
)
output = await docsearch.asimilarity_search("foo", k=1)
assert output == [Document(page_content="foo", metadata={"page": 0})]
assert_documents_equals(
output, [Document(page_content="foo", metadata={"page": 0})]
)

View File

@ -10,6 +10,7 @@ from tests.integration_tests.vectorstores.fake_embeddings import (
from tests.integration_tests.vectorstores.qdrant.async_api.fixtures import (
qdrant_locations,
)
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
@pytest.mark.parametrize("batch_size", [1, 64])
@ -41,7 +42,10 @@ async def test_qdrant_max_marginal_relevance_search(
output = await docsearch.amax_marginal_relevance_search(
"foo", k=2, fetch_k=3, lambda_mult=0.0
)
assert output == [
Document(page_content="foo", metadata={"page": 0}),
Document(page_content="baz", metadata={"page": 2}),
]
assert_documents_equals(
output,
[
Document(page_content="foo", metadata={"page": 0}),
Document(page_content="baz", metadata={"page": 2}),
],
)

View File

@ -11,6 +11,7 @@ from tests.integration_tests.vectorstores.fake_embeddings import (
from tests.integration_tests.vectorstores.qdrant.async_api.fixtures import (
qdrant_locations,
)
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
@pytest.mark.parametrize("batch_size", [1, 64])
@ -37,7 +38,7 @@ async def test_qdrant_similarity_search(
location=qdrant_location,
)
output = await docsearch.asimilarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
assert_documents_equals(output, [Document(page_content="foo")])
@pytest.mark.parametrize("batch_size", [1, 64])
@ -65,7 +66,7 @@ async def test_qdrant_similarity_search_by_vector(
)
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
output = await docsearch.asimilarity_search_by_vector(embeddings, k=1)
assert output == [Document(page_content="foo")]
assert_documents_equals(output, [Document(page_content="foo")])
@pytest.mark.parametrize("batch_size", [1, 64])
@ -95,7 +96,7 @@ async def test_qdrant_similarity_search_with_score_by_vector(
output = await docsearch.asimilarity_search_with_score_by_vector(embeddings, k=1)
assert len(output) == 1
document, score = output[0]
assert document == Document(page_content="foo")
assert_documents_equals([document], [Document(page_content="foo")])
assert score >= 0
@ -123,12 +124,15 @@ async def test_qdrant_similarity_search_filters(
output = await docsearch.asimilarity_search(
"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
)
assert output == [
Document(
page_content="bar",
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
)
]
assert_documents_equals(
output,
[
Document(
page_content="bar",
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
)
],
)
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
@ -262,12 +266,15 @@ async def test_qdrant_similarity_search_filters_with_qdrant_filters(
]
)
output = await docsearch.asimilarity_search("foo", k=1, filter=qdrant_filter)
assert output == [
Document(
page_content="bar",
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
)
]
assert_documents_equals(
output,
[
Document(
page_content="bar",
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
)
],
)
@pytest.mark.parametrize("batch_size", [1, 64])

View File

@ -1,3 +1,8 @@
from typing import List
from langchain_core.documents import Document
def qdrant_is_not_running() -> bool:
"""Check if Qdrant is not running."""
import requests
@ -8,3 +13,18 @@ def qdrant_is_not_running() -> bool:
return response_json.get("title") != "qdrant - vector search engine"
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
return True
def assert_documents_equals(actual: List[Document], expected: List[Document]):
assert len(actual) == len(expected)
for actual_doc, expected_doc in zip(actual, expected):
assert actual_doc.page_content == expected_doc.page_content
assert "_id" in actual_doc.metadata
assert "_collection_name" in actual_doc.metadata
actual_doc.metadata.pop("_id")
actual_doc.metadata.pop("_collection_name")
assert actual_doc.metadata == expected_doc.metadata

View File

@ -8,6 +8,7 @@ from langchain_community.vectorstores import Qdrant
from tests.integration_tests.vectorstores.fake_embeddings import (
ConsistentFakeEmbeddings,
)
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
@pytest.mark.parametrize("batch_size", [1, 64])
@ -33,7 +34,7 @@ def test_qdrant_add_documents_extends_existing_collection(
# ConsistentFakeEmbeddings return the same query embedding as the first document
# embedding computed in `embedding.embed_documents`. Thus, "foo" embedding is the
# same as "foobar" embedding
assert output == [Document(page_content="foobar")]
assert_documents_equals(output, [Document(page_content="foobar")])
@pytest.mark.parametrize("batch_size", [1, 64])

View File

@ -10,7 +10,10 @@ from langchain_community.vectorstores.qdrant import QdrantException
from tests.integration_tests.vectorstores.fake_embeddings import (
ConsistentFakeEmbeddings,
)
from tests.integration_tests.vectorstores.qdrant.common import qdrant_is_not_running
from tests.integration_tests.vectorstores.qdrant.common import (
assert_documents_equals,
qdrant_is_not_running,
)
def test_qdrant_from_texts_stores_duplicated_texts() -> None:
@ -257,7 +260,9 @@ def test_qdrant_from_texts_stores_metadatas(
batch_size=batch_size,
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo", metadata={"page": 0})]
assert_documents_equals(
output, [Document(page_content="foo", metadata={"page": 0})]
)
@pytest.mark.skipif(qdrant_is_not_running(), reason="Qdrant is not running")

View File

@ -7,6 +7,7 @@ from langchain_community.vectorstores import Qdrant
from tests.integration_tests.vectorstores.fake_embeddings import (
ConsistentFakeEmbeddings,
)
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
@pytest.mark.parametrize("batch_size", [1, 64])
@ -49,14 +50,18 @@ def test_qdrant_max_marginal_relevance_search(
output = docsearch.max_marginal_relevance_search(
"foo", k=2, fetch_k=3, lambda_mult=0.0
)
assert output == [
Document(page_content="foo", metadata={"page": 0}),
Document(page_content="baz", metadata={"page": 2}),
]
assert_documents_equals(
output,
[
Document(page_content="foo", metadata={"page": 0}),
Document(page_content="baz", metadata={"page": 2}),
],
)
output = docsearch.max_marginal_relevance_search(
"foo", k=2, fetch_k=3, lambda_mult=0.0, filter=filter
)
assert output == [
Document(page_content="baz", metadata={"page": 2}),
]
assert_documents_equals(
output,
[Document(page_content="baz", metadata={"page": 2})],
)

View File

@ -8,6 +8,7 @@ from langchain_community.vectorstores import Qdrant
from tests.integration_tests.vectorstores.fake_embeddings import (
ConsistentFakeEmbeddings,
)
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
@pytest.mark.parametrize("batch_size", [1, 64])
@ -32,7 +33,7 @@ def test_qdrant_similarity_search(
vector_name=vector_name,
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
assert_documents_equals(actual=output, expected=[Document(page_content="foo")])
@pytest.mark.parametrize("batch_size", [1, 64])
@ -58,7 +59,7 @@ def test_qdrant_similarity_search_by_vector(
)
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
output = docsearch.similarity_search_by_vector(embeddings, k=1)
assert output == [Document(page_content="foo")]
assert_documents_equals(output, [Document(page_content="foo")])
@pytest.mark.parametrize("batch_size", [1, 64])
@ -86,7 +87,7 @@ def test_qdrant_similarity_search_with_score_by_vector(
output = docsearch.similarity_search_with_score_by_vector(embeddings, k=1)
assert len(output) == 1
document, score = output[0]
assert document == Document(page_content="foo")
assert_documents_equals(actual=[document], expected=[Document(page_content="foo")])
assert score >= 0
@ -113,12 +114,16 @@ def test_qdrant_similarity_search_filters(
output = docsearch.similarity_search(
"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
)
assert output == [
Document(
page_content="bar",
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
)
]
assert_documents_equals(
actual=output,
expected=[
Document(
page_content="bar",
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
)
],
)
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
@ -240,12 +245,15 @@ def test_qdrant_similarity_search_filters_with_qdrant_filters(
]
)
output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter)
assert output == [
Document(
page_content="bar",
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
)
]
assert_documents_equals(
actual=output,
expected=[
Document(
page_content="bar",
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
)
],
)
@pytest.mark.parametrize("batch_size", [1, 64])