mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-04 20:28:10 +00:00
community[patch]: Correct the calling to collection_name in qdrant (#16920)
## Description In #16608, the calling `collection_name` was wrong. I made a fix for it. Sorry for the inconvenience! ## Issue https://github.com/langchain-ai/langchain/issues/16962 ## Dependencies N/A <!-- Thank you for contributing to LangChain! Please title your PR "<package>: <description>", where <package> is whichever of langchain, community, core, experimental, etc. is being modified. Replace this entire comment with: - **Description:** a description of the change, - **Issue:** the issue # it fixes if applicable, - **Dependencies:** any dependencies required for this change, - **Twitter handle:** we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` from the root of the package you've modified to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://python.langchain.com/docs/contributing/ If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --> --------- Co-authored-by: Kumar Shivendu <kshivendu1@gmail.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
parent
849051102a
commit
bcfce146d8
@ -120,7 +120,10 @@ class QdrantSparseVectorRetriever(BaseRetriever):
|
||||
)
|
||||
return [
|
||||
Qdrant._document_from_scored_point(
|
||||
point, self.content_payload_key, self.metadata_payload_key
|
||||
point,
|
||||
self.collection_name,
|
||||
self.content_payload_key,
|
||||
self.metadata_payload_key,
|
||||
)
|
||||
for point in results
|
||||
]
|
||||
|
@ -620,7 +620,10 @@ class Qdrant(VectorStore):
|
||||
return [
|
||||
(
|
||||
self._document_from_scored_point(
|
||||
result, self.content_payload_key, self.metadata_payload_key
|
||||
result,
|
||||
self.collection_name,
|
||||
self.content_payload_key,
|
||||
self.metadata_payload_key,
|
||||
),
|
||||
result.score,
|
||||
)
|
||||
@ -713,7 +716,10 @@ class Qdrant(VectorStore):
|
||||
return [
|
||||
(
|
||||
self._document_from_scored_point(
|
||||
result, self.content_payload_key, self.metadata_payload_key
|
||||
result,
|
||||
self.collection_name,
|
||||
self.content_payload_key,
|
||||
self.metadata_payload_key,
|
||||
),
|
||||
result.score,
|
||||
)
|
||||
@ -1051,7 +1057,10 @@ class Qdrant(VectorStore):
|
||||
return [
|
||||
(
|
||||
self._document_from_scored_point(
|
||||
results[i], self.content_payload_key, self.metadata_payload_key
|
||||
results[i],
|
||||
self.collection_name,
|
||||
self.content_payload_key,
|
||||
self.metadata_payload_key,
|
||||
),
|
||||
results[i].score,
|
||||
)
|
||||
@ -1123,7 +1132,10 @@ class Qdrant(VectorStore):
|
||||
return [
|
||||
(
|
||||
self._document_from_scored_point(
|
||||
results[i], self.content_payload_key, self.metadata_payload_key
|
||||
results[i],
|
||||
self.collection_name,
|
||||
self.content_payload_key,
|
||||
self.metadata_payload_key,
|
||||
),
|
||||
results[i].score,
|
||||
)
|
||||
@ -1938,12 +1950,13 @@ class Qdrant(VectorStore):
|
||||
def _document_from_scored_point(
|
||||
cls,
|
||||
scored_point: Any,
|
||||
collection_name: str,
|
||||
content_payload_key: str,
|
||||
metadata_payload_key: str,
|
||||
) -> Document:
|
||||
metadata = scored_point.payload.get(metadata_payload_key) or {}
|
||||
metadata["_id"] = scored_point.id
|
||||
metadata["_collection_name"] = scored_point.collection_name
|
||||
metadata["_collection_name"] = collection_name
|
||||
return Document(
|
||||
page_content=scored_point.payload.get(content_payload_key),
|
||||
metadata=metadata,
|
||||
|
@ -12,7 +12,10 @@ from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
from tests.integration_tests.vectorstores.qdrant.async_api.fixtures import (
|
||||
qdrant_locations,
|
||||
)
|
||||
from tests.integration_tests.vectorstores.qdrant.common import qdrant_is_not_running
|
||||
from tests.integration_tests.vectorstores.qdrant.common import (
|
||||
assert_documents_equals,
|
||||
qdrant_is_not_running,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||
@ -245,4 +248,6 @@ async def test_qdrant_from_texts_stores_metadatas(
|
||||
location=qdrant_location,
|
||||
)
|
||||
output = await docsearch.asimilarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
assert_documents_equals(
|
||||
output, [Document(page_content="foo", metadata={"page": 0})]
|
||||
)
|
||||
|
@ -10,6 +10,7 @@ from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
from tests.integration_tests.vectorstores.qdrant.async_api.fixtures import (
|
||||
qdrant_locations,
|
||||
)
|
||||
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@ -41,7 +42,10 @@ async def test_qdrant_max_marginal_relevance_search(
|
||||
output = await docsearch.amax_marginal_relevance_search(
|
||||
"foo", k=2, fetch_k=3, lambda_mult=0.0
|
||||
)
|
||||
assert output == [
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
]
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
],
|
||||
)
|
||||
|
@ -11,6 +11,7 @@ from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
from tests.integration_tests.vectorstores.qdrant.async_api.fixtures import (
|
||||
qdrant_locations,
|
||||
)
|
||||
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@ -37,7 +38,7 @@ async def test_qdrant_similarity_search(
|
||||
location=qdrant_location,
|
||||
)
|
||||
output = await docsearch.asimilarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
assert_documents_equals(output, [Document(page_content="foo")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@ -65,7 +66,7 @@ async def test_qdrant_similarity_search_by_vector(
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = await docsearch.asimilarity_search_by_vector(embeddings, k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
assert_documents_equals(output, [Document(page_content="foo")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@ -95,7 +96,7 @@ async def test_qdrant_similarity_search_with_score_by_vector(
|
||||
output = await docsearch.asimilarity_search_with_score_by_vector(embeddings, k=1)
|
||||
assert len(output) == 1
|
||||
document, score = output[0]
|
||||
assert document == Document(page_content="foo")
|
||||
assert_documents_equals([document], [Document(page_content="foo")])
|
||||
assert score >= 0
|
||||
|
||||
|
||||
@ -123,12 +124,15 @@ async def test_qdrant_similarity_search_filters(
|
||||
output = await docsearch.asimilarity_search(
|
||||
"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
)
|
||||
assert output == [
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
]
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@ -262,12 +266,15 @@ async def test_qdrant_similarity_search_filters_with_qdrant_filters(
|
||||
]
|
||||
)
|
||||
output = await docsearch.asimilarity_search("foo", k=1, filter=qdrant_filter)
|
||||
assert output == [
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
]
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
|
@ -1,3 +1,8 @@
|
||||
from typing import List
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
|
||||
def qdrant_is_not_running() -> bool:
|
||||
"""Check if Qdrant is not running."""
|
||||
import requests
|
||||
@ -8,3 +13,18 @@ def qdrant_is_not_running() -> bool:
|
||||
return response_json.get("title") != "qdrant - vector search engine"
|
||||
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
||||
return True
|
||||
|
||||
|
||||
def assert_documents_equals(actual: List[Document], expected: List[Document]):
|
||||
assert len(actual) == len(expected)
|
||||
|
||||
for actual_doc, expected_doc in zip(actual, expected):
|
||||
assert actual_doc.page_content == expected_doc.page_content
|
||||
|
||||
assert "_id" in actual_doc.metadata
|
||||
assert "_collection_name" in actual_doc.metadata
|
||||
|
||||
actual_doc.metadata.pop("_id")
|
||||
actual_doc.metadata.pop("_collection_name")
|
||||
|
||||
assert actual_doc.metadata == expected_doc.metadata
|
||||
|
@ -8,6 +8,7 @@ from langchain_community.vectorstores import Qdrant
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
ConsistentFakeEmbeddings,
|
||||
)
|
||||
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@ -33,7 +34,7 @@ def test_qdrant_add_documents_extends_existing_collection(
|
||||
# ConsistentFakeEmbeddings return the same query embedding as the first document
|
||||
# embedding computed in `embedding.embed_documents`. Thus, "foo" embedding is the
|
||||
# same as "foobar" embedding
|
||||
assert output == [Document(page_content="foobar")]
|
||||
assert_documents_equals(output, [Document(page_content="foobar")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
|
@ -10,7 +10,10 @@ from langchain_community.vectorstores.qdrant import QdrantException
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
ConsistentFakeEmbeddings,
|
||||
)
|
||||
from tests.integration_tests.vectorstores.qdrant.common import qdrant_is_not_running
|
||||
from tests.integration_tests.vectorstores.qdrant.common import (
|
||||
assert_documents_equals,
|
||||
qdrant_is_not_running,
|
||||
)
|
||||
|
||||
|
||||
def test_qdrant_from_texts_stores_duplicated_texts() -> None:
|
||||
@ -257,7 +260,9 @@ def test_qdrant_from_texts_stores_metadatas(
|
||||
batch_size=batch_size,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
||||
assert_documents_equals(
|
||||
output, [Document(page_content="foo", metadata={"page": 0})]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(qdrant_is_not_running(), reason="Qdrant is not running")
|
||||
|
@ -7,6 +7,7 @@ from langchain_community.vectorstores import Qdrant
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
ConsistentFakeEmbeddings,
|
||||
)
|
||||
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@ -49,14 +50,18 @@ def test_qdrant_max_marginal_relevance_search(
|
||||
output = docsearch.max_marginal_relevance_search(
|
||||
"foo", k=2, fetch_k=3, lambda_mult=0.0
|
||||
)
|
||||
assert output == [
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
]
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[
|
||||
Document(page_content="foo", metadata={"page": 0}),
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
],
|
||||
)
|
||||
|
||||
output = docsearch.max_marginal_relevance_search(
|
||||
"foo", k=2, fetch_k=3, lambda_mult=0.0, filter=filter
|
||||
)
|
||||
assert output == [
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
]
|
||||
assert_documents_equals(
|
||||
output,
|
||||
[Document(page_content="baz", metadata={"page": 2})],
|
||||
)
|
||||
|
@ -8,6 +8,7 @@ from langchain_community.vectorstores import Qdrant
|
||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||
ConsistentFakeEmbeddings,
|
||||
)
|
||||
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@ -32,7 +33,7 @@ def test_qdrant_similarity_search(
|
||||
vector_name=vector_name,
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
assert_documents_equals(actual=output, expected=[Document(page_content="foo")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@ -58,7 +59,7 @@ def test_qdrant_similarity_search_by_vector(
|
||||
)
|
||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||
output = docsearch.similarity_search_by_vector(embeddings, k=1)
|
||||
assert output == [Document(page_content="foo")]
|
||||
assert_documents_equals(output, [Document(page_content="foo")])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@ -86,7 +87,7 @@ def test_qdrant_similarity_search_with_score_by_vector(
|
||||
output = docsearch.similarity_search_with_score_by_vector(embeddings, k=1)
|
||||
assert len(output) == 1
|
||||
document, score = output[0]
|
||||
assert document == Document(page_content="foo")
|
||||
assert_documents_equals(actual=[document], expected=[Document(page_content="foo")])
|
||||
assert score >= 0
|
||||
|
||||
|
||||
@ -113,12 +114,16 @@ def test_qdrant_similarity_search_filters(
|
||||
output = docsearch.similarity_search(
|
||||
"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||
)
|
||||
assert output == [
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
]
|
||||
|
||||
assert_documents_equals(
|
||||
actual=output,
|
||||
expected=[
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||
@ -240,12 +245,15 @@ def test_qdrant_similarity_search_filters_with_qdrant_filters(
|
||||
]
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter)
|
||||
assert output == [
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
]
|
||||
assert_documents_equals(
|
||||
actual=output,
|
||||
expected=[
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
|
Loading…
Reference in New Issue
Block a user