mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-05 20:58:25 +00:00
community[patch]: Correct the calling to collection_name in qdrant (#16920)
## Description In #16608, the calling `collection_name` was wrong. I made a fix for it. Sorry for the inconvenience! ## Issue https://github.com/langchain-ai/langchain/issues/16962 ## Dependencies N/A <!-- Thank you for contributing to LangChain! Please title your PR "<package>: <description>", where <package> is whichever of langchain, community, core, experimental, etc. is being modified. Replace this entire comment with: - **Description:** a description of the change, - **Issue:** the issue # it fixes if applicable, - **Dependencies:** any dependencies required for this change, - **Twitter handle:** we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` from the root of the package you've modified to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://python.langchain.com/docs/contributing/ If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --> --------- Co-authored-by: Kumar Shivendu <kshivendu1@gmail.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
parent
849051102a
commit
bcfce146d8
@ -120,7 +120,10 @@ class QdrantSparseVectorRetriever(BaseRetriever):
|
|||||||
)
|
)
|
||||||
return [
|
return [
|
||||||
Qdrant._document_from_scored_point(
|
Qdrant._document_from_scored_point(
|
||||||
point, self.content_payload_key, self.metadata_payload_key
|
point,
|
||||||
|
self.collection_name,
|
||||||
|
self.content_payload_key,
|
||||||
|
self.metadata_payload_key,
|
||||||
)
|
)
|
||||||
for point in results
|
for point in results
|
||||||
]
|
]
|
||||||
|
@ -620,7 +620,10 @@ class Qdrant(VectorStore):
|
|||||||
return [
|
return [
|
||||||
(
|
(
|
||||||
self._document_from_scored_point(
|
self._document_from_scored_point(
|
||||||
result, self.content_payload_key, self.metadata_payload_key
|
result,
|
||||||
|
self.collection_name,
|
||||||
|
self.content_payload_key,
|
||||||
|
self.metadata_payload_key,
|
||||||
),
|
),
|
||||||
result.score,
|
result.score,
|
||||||
)
|
)
|
||||||
@ -713,7 +716,10 @@ class Qdrant(VectorStore):
|
|||||||
return [
|
return [
|
||||||
(
|
(
|
||||||
self._document_from_scored_point(
|
self._document_from_scored_point(
|
||||||
result, self.content_payload_key, self.metadata_payload_key
|
result,
|
||||||
|
self.collection_name,
|
||||||
|
self.content_payload_key,
|
||||||
|
self.metadata_payload_key,
|
||||||
),
|
),
|
||||||
result.score,
|
result.score,
|
||||||
)
|
)
|
||||||
@ -1051,7 +1057,10 @@ class Qdrant(VectorStore):
|
|||||||
return [
|
return [
|
||||||
(
|
(
|
||||||
self._document_from_scored_point(
|
self._document_from_scored_point(
|
||||||
results[i], self.content_payload_key, self.metadata_payload_key
|
results[i],
|
||||||
|
self.collection_name,
|
||||||
|
self.content_payload_key,
|
||||||
|
self.metadata_payload_key,
|
||||||
),
|
),
|
||||||
results[i].score,
|
results[i].score,
|
||||||
)
|
)
|
||||||
@ -1123,7 +1132,10 @@ class Qdrant(VectorStore):
|
|||||||
return [
|
return [
|
||||||
(
|
(
|
||||||
self._document_from_scored_point(
|
self._document_from_scored_point(
|
||||||
results[i], self.content_payload_key, self.metadata_payload_key
|
results[i],
|
||||||
|
self.collection_name,
|
||||||
|
self.content_payload_key,
|
||||||
|
self.metadata_payload_key,
|
||||||
),
|
),
|
||||||
results[i].score,
|
results[i].score,
|
||||||
)
|
)
|
||||||
@ -1938,12 +1950,13 @@ class Qdrant(VectorStore):
|
|||||||
def _document_from_scored_point(
|
def _document_from_scored_point(
|
||||||
cls,
|
cls,
|
||||||
scored_point: Any,
|
scored_point: Any,
|
||||||
|
collection_name: str,
|
||||||
content_payload_key: str,
|
content_payload_key: str,
|
||||||
metadata_payload_key: str,
|
metadata_payload_key: str,
|
||||||
) -> Document:
|
) -> Document:
|
||||||
metadata = scored_point.payload.get(metadata_payload_key) or {}
|
metadata = scored_point.payload.get(metadata_payload_key) or {}
|
||||||
metadata["_id"] = scored_point.id
|
metadata["_id"] = scored_point.id
|
||||||
metadata["_collection_name"] = scored_point.collection_name
|
metadata["_collection_name"] = collection_name
|
||||||
return Document(
|
return Document(
|
||||||
page_content=scored_point.payload.get(content_payload_key),
|
page_content=scored_point.payload.get(content_payload_key),
|
||||||
metadata=metadata,
|
metadata=metadata,
|
||||||
|
@ -12,7 +12,10 @@ from tests.integration_tests.vectorstores.fake_embeddings import (
|
|||||||
from tests.integration_tests.vectorstores.qdrant.async_api.fixtures import (
|
from tests.integration_tests.vectorstores.qdrant.async_api.fixtures import (
|
||||||
qdrant_locations,
|
qdrant_locations,
|
||||||
)
|
)
|
||||||
from tests.integration_tests.vectorstores.qdrant.common import qdrant_is_not_running
|
from tests.integration_tests.vectorstores.qdrant.common import (
|
||||||
|
assert_documents_equals,
|
||||||
|
qdrant_is_not_running,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
@pytest.mark.parametrize("qdrant_location", qdrant_locations())
|
||||||
@ -245,4 +248,6 @@ async def test_qdrant_from_texts_stores_metadatas(
|
|||||||
location=qdrant_location,
|
location=qdrant_location,
|
||||||
)
|
)
|
||||||
output = await docsearch.asimilarity_search("foo", k=1)
|
output = await docsearch.asimilarity_search("foo", k=1)
|
||||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
assert_documents_equals(
|
||||||
|
output, [Document(page_content="foo", metadata={"page": 0})]
|
||||||
|
)
|
||||||
|
@ -10,6 +10,7 @@ from tests.integration_tests.vectorstores.fake_embeddings import (
|
|||||||
from tests.integration_tests.vectorstores.qdrant.async_api.fixtures import (
|
from tests.integration_tests.vectorstores.qdrant.async_api.fixtures import (
|
||||||
qdrant_locations,
|
qdrant_locations,
|
||||||
)
|
)
|
||||||
|
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||||
@ -41,7 +42,10 @@ async def test_qdrant_max_marginal_relevance_search(
|
|||||||
output = await docsearch.amax_marginal_relevance_search(
|
output = await docsearch.amax_marginal_relevance_search(
|
||||||
"foo", k=2, fetch_k=3, lambda_mult=0.0
|
"foo", k=2, fetch_k=3, lambda_mult=0.0
|
||||||
)
|
)
|
||||||
assert output == [
|
assert_documents_equals(
|
||||||
|
output,
|
||||||
|
[
|
||||||
Document(page_content="foo", metadata={"page": 0}),
|
Document(page_content="foo", metadata={"page": 0}),
|
||||||
Document(page_content="baz", metadata={"page": 2}),
|
Document(page_content="baz", metadata={"page": 2}),
|
||||||
]
|
],
|
||||||
|
)
|
||||||
|
@ -11,6 +11,7 @@ from tests.integration_tests.vectorstores.fake_embeddings import (
|
|||||||
from tests.integration_tests.vectorstores.qdrant.async_api.fixtures import (
|
from tests.integration_tests.vectorstores.qdrant.async_api.fixtures import (
|
||||||
qdrant_locations,
|
qdrant_locations,
|
||||||
)
|
)
|
||||||
|
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||||
@ -37,7 +38,7 @@ async def test_qdrant_similarity_search(
|
|||||||
location=qdrant_location,
|
location=qdrant_location,
|
||||||
)
|
)
|
||||||
output = await docsearch.asimilarity_search("foo", k=1)
|
output = await docsearch.asimilarity_search("foo", k=1)
|
||||||
assert output == [Document(page_content="foo")]
|
assert_documents_equals(output, [Document(page_content="foo")])
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||||
@ -65,7 +66,7 @@ async def test_qdrant_similarity_search_by_vector(
|
|||||||
)
|
)
|
||||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||||
output = await docsearch.asimilarity_search_by_vector(embeddings, k=1)
|
output = await docsearch.asimilarity_search_by_vector(embeddings, k=1)
|
||||||
assert output == [Document(page_content="foo")]
|
assert_documents_equals(output, [Document(page_content="foo")])
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||||
@ -95,7 +96,7 @@ async def test_qdrant_similarity_search_with_score_by_vector(
|
|||||||
output = await docsearch.asimilarity_search_with_score_by_vector(embeddings, k=1)
|
output = await docsearch.asimilarity_search_with_score_by_vector(embeddings, k=1)
|
||||||
assert len(output) == 1
|
assert len(output) == 1
|
||||||
document, score = output[0]
|
document, score = output[0]
|
||||||
assert document == Document(page_content="foo")
|
assert_documents_equals([document], [Document(page_content="foo")])
|
||||||
assert score >= 0
|
assert score >= 0
|
||||||
|
|
||||||
|
|
||||||
@ -123,12 +124,15 @@ async def test_qdrant_similarity_search_filters(
|
|||||||
output = await docsearch.asimilarity_search(
|
output = await docsearch.asimilarity_search(
|
||||||
"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||||
)
|
)
|
||||||
assert output == [
|
assert_documents_equals(
|
||||||
|
output,
|
||||||
|
[
|
||||||
Document(
|
Document(
|
||||||
page_content="bar",
|
page_content="bar",
|
||||||
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
||||||
)
|
)
|
||||||
]
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||||
@ -262,12 +266,15 @@ async def test_qdrant_similarity_search_filters_with_qdrant_filters(
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
output = await docsearch.asimilarity_search("foo", k=1, filter=qdrant_filter)
|
output = await docsearch.asimilarity_search("foo", k=1, filter=qdrant_filter)
|
||||||
assert output == [
|
assert_documents_equals(
|
||||||
|
output,
|
||||||
|
[
|
||||||
Document(
|
Document(
|
||||||
page_content="bar",
|
page_content="bar",
|
||||||
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||||
)
|
)
|
||||||
]
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||||
|
@ -1,3 +1,8 @@
|
|||||||
|
from typing import List
|
||||||
|
|
||||||
|
from langchain_core.documents import Document
|
||||||
|
|
||||||
|
|
||||||
def qdrant_is_not_running() -> bool:
|
def qdrant_is_not_running() -> bool:
|
||||||
"""Check if Qdrant is not running."""
|
"""Check if Qdrant is not running."""
|
||||||
import requests
|
import requests
|
||||||
@ -8,3 +13,18 @@ def qdrant_is_not_running() -> bool:
|
|||||||
return response_json.get("title") != "qdrant - vector search engine"
|
return response_json.get("title") != "qdrant - vector search engine"
|
||||||
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def assert_documents_equals(actual: List[Document], expected: List[Document]):
|
||||||
|
assert len(actual) == len(expected)
|
||||||
|
|
||||||
|
for actual_doc, expected_doc in zip(actual, expected):
|
||||||
|
assert actual_doc.page_content == expected_doc.page_content
|
||||||
|
|
||||||
|
assert "_id" in actual_doc.metadata
|
||||||
|
assert "_collection_name" in actual_doc.metadata
|
||||||
|
|
||||||
|
actual_doc.metadata.pop("_id")
|
||||||
|
actual_doc.metadata.pop("_collection_name")
|
||||||
|
|
||||||
|
assert actual_doc.metadata == expected_doc.metadata
|
||||||
|
@ -8,6 +8,7 @@ from langchain_community.vectorstores import Qdrant
|
|||||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||||
ConsistentFakeEmbeddings,
|
ConsistentFakeEmbeddings,
|
||||||
)
|
)
|
||||||
|
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||||
@ -33,7 +34,7 @@ def test_qdrant_add_documents_extends_existing_collection(
|
|||||||
# ConsistentFakeEmbeddings return the same query embedding as the first document
|
# ConsistentFakeEmbeddings return the same query embedding as the first document
|
||||||
# embedding computed in `embedding.embed_documents`. Thus, "foo" embedding is the
|
# embedding computed in `embedding.embed_documents`. Thus, "foo" embedding is the
|
||||||
# same as "foobar" embedding
|
# same as "foobar" embedding
|
||||||
assert output == [Document(page_content="foobar")]
|
assert_documents_equals(output, [Document(page_content="foobar")])
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||||
|
@ -10,7 +10,10 @@ from langchain_community.vectorstores.qdrant import QdrantException
|
|||||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||||
ConsistentFakeEmbeddings,
|
ConsistentFakeEmbeddings,
|
||||||
)
|
)
|
||||||
from tests.integration_tests.vectorstores.qdrant.common import qdrant_is_not_running
|
from tests.integration_tests.vectorstores.qdrant.common import (
|
||||||
|
assert_documents_equals,
|
||||||
|
qdrant_is_not_running,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_qdrant_from_texts_stores_duplicated_texts() -> None:
|
def test_qdrant_from_texts_stores_duplicated_texts() -> None:
|
||||||
@ -257,7 +260,9 @@ def test_qdrant_from_texts_stores_metadatas(
|
|||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
)
|
)
|
||||||
output = docsearch.similarity_search("foo", k=1)
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
assert_documents_equals(
|
||||||
|
output, [Document(page_content="foo", metadata={"page": 0})]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(qdrant_is_not_running(), reason="Qdrant is not running")
|
@pytest.mark.skipif(qdrant_is_not_running(), reason="Qdrant is not running")
|
||||||
|
@ -7,6 +7,7 @@ from langchain_community.vectorstores import Qdrant
|
|||||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||||
ConsistentFakeEmbeddings,
|
ConsistentFakeEmbeddings,
|
||||||
)
|
)
|
||||||
|
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||||
@ -49,14 +50,18 @@ def test_qdrant_max_marginal_relevance_search(
|
|||||||
output = docsearch.max_marginal_relevance_search(
|
output = docsearch.max_marginal_relevance_search(
|
||||||
"foo", k=2, fetch_k=3, lambda_mult=0.0
|
"foo", k=2, fetch_k=3, lambda_mult=0.0
|
||||||
)
|
)
|
||||||
assert output == [
|
assert_documents_equals(
|
||||||
|
output,
|
||||||
|
[
|
||||||
Document(page_content="foo", metadata={"page": 0}),
|
Document(page_content="foo", metadata={"page": 0}),
|
||||||
Document(page_content="baz", metadata={"page": 2}),
|
Document(page_content="baz", metadata={"page": 2}),
|
||||||
]
|
],
|
||||||
|
)
|
||||||
|
|
||||||
output = docsearch.max_marginal_relevance_search(
|
output = docsearch.max_marginal_relevance_search(
|
||||||
"foo", k=2, fetch_k=3, lambda_mult=0.0, filter=filter
|
"foo", k=2, fetch_k=3, lambda_mult=0.0, filter=filter
|
||||||
)
|
)
|
||||||
assert output == [
|
assert_documents_equals(
|
||||||
Document(page_content="baz", metadata={"page": 2}),
|
output,
|
||||||
]
|
[Document(page_content="baz", metadata={"page": 2})],
|
||||||
|
)
|
||||||
|
@ -8,6 +8,7 @@ from langchain_community.vectorstores import Qdrant
|
|||||||
from tests.integration_tests.vectorstores.fake_embeddings import (
|
from tests.integration_tests.vectorstores.fake_embeddings import (
|
||||||
ConsistentFakeEmbeddings,
|
ConsistentFakeEmbeddings,
|
||||||
)
|
)
|
||||||
|
from tests.integration_tests.vectorstores.qdrant.common import assert_documents_equals
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||||
@ -32,7 +33,7 @@ def test_qdrant_similarity_search(
|
|||||||
vector_name=vector_name,
|
vector_name=vector_name,
|
||||||
)
|
)
|
||||||
output = docsearch.similarity_search("foo", k=1)
|
output = docsearch.similarity_search("foo", k=1)
|
||||||
assert output == [Document(page_content="foo")]
|
assert_documents_equals(actual=output, expected=[Document(page_content="foo")])
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||||
@ -58,7 +59,7 @@ def test_qdrant_similarity_search_by_vector(
|
|||||||
)
|
)
|
||||||
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
embeddings = ConsistentFakeEmbeddings().embed_query("foo")
|
||||||
output = docsearch.similarity_search_by_vector(embeddings, k=1)
|
output = docsearch.similarity_search_by_vector(embeddings, k=1)
|
||||||
assert output == [Document(page_content="foo")]
|
assert_documents_equals(output, [Document(page_content="foo")])
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||||
@ -86,7 +87,7 @@ def test_qdrant_similarity_search_with_score_by_vector(
|
|||||||
output = docsearch.similarity_search_with_score_by_vector(embeddings, k=1)
|
output = docsearch.similarity_search_with_score_by_vector(embeddings, k=1)
|
||||||
assert len(output) == 1
|
assert len(output) == 1
|
||||||
document, score = output[0]
|
document, score = output[0]
|
||||||
assert document == Document(page_content="foo")
|
assert_documents_equals(actual=[document], expected=[Document(page_content="foo")])
|
||||||
assert score >= 0
|
assert score >= 0
|
||||||
|
|
||||||
|
|
||||||
@ -113,12 +114,16 @@ def test_qdrant_similarity_search_filters(
|
|||||||
output = docsearch.similarity_search(
|
output = docsearch.similarity_search(
|
||||||
"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
"foo", k=1, filter={"page": 1, "metadata": {"page": 2, "pages": [3]}}
|
||||||
)
|
)
|
||||||
assert output == [
|
|
||||||
|
assert_documents_equals(
|
||||||
|
actual=output,
|
||||||
|
expected=[
|
||||||
Document(
|
Document(
|
||||||
page_content="bar",
|
page_content="bar",
|
||||||
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
metadata={"page": 1, "metadata": {"page": 2, "pages": [3, -1]}},
|
||||||
)
|
)
|
||||||
]
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
@pytest.mark.parametrize("vector_name", [None, "my-vector"])
|
||||||
@ -240,12 +245,15 @@ def test_qdrant_similarity_search_filters_with_qdrant_filters(
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter)
|
output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter)
|
||||||
assert output == [
|
assert_documents_equals(
|
||||||
|
actual=output,
|
||||||
|
expected=[
|
||||||
Document(
|
Document(
|
||||||
page_content="bar",
|
page_content="bar",
|
||||||
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||||
)
|
)
|
||||||
]
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||||
|
Loading…
Reference in New Issue
Block a user