mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-04 12:18:24 +00:00
Feature: Qdrant filters supports (#5446)
# Support Qdrant filters Qdrant has an [extensive filtering system](https://qdrant.tech/documentation/concepts/filtering/) with rich type support. This PR makes it possible to use the filters in Langchain by passing an additional param to both the `similarity_search_with_score` and `similarity_search` methods. ## Who can review? @dev2049 @hwchase17 --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
This commit is contained in:
parent
f72bb966f8
commit
8bcaca435a
@ -399,6 +399,31 @@
|
|||||||
"print(f\"\\nScore: {score}\")"
|
"print(f\"\\nScore: {score}\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"### Metadata filtering\n",
|
||||||
|
"\n",
|
||||||
|
"Qdrant has an [extensive filtering system](https://qdrant.tech/documentation/concepts/filtering/) with rich type support. It is also possible to use the filters in Langchain, by passing an additional param to both the `similarity_search_with_score` and `similarity_search` methods."
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"source": [
|
||||||
|
"```python\n",
|
||||||
|
"from qdrant_client.http import models as rest\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||||
|
"found_docs = qdrant.similarity_search_with_score(query, filter=rest.Filter(...))\n",
|
||||||
|
"```"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "c58c30bf",
|
"id": "c58c30bf",
|
||||||
|
@ -27,10 +27,11 @@ from langchain.vectorstores import VectorStore
|
|||||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
from qdrant_client.conversions import common_types
|
||||||
from qdrant_client.http import models as rest
|
from qdrant_client.http import models as rest
|
||||||
|
|
||||||
|
DictFilter = Dict[str, Union[str, int, bool, dict, list]]
|
||||||
MetadataFilter = Dict[str, Union[str, int, bool, dict, list]]
|
MetadataFilter = Union[DictFilter, common_types.Filter]
|
||||||
|
|
||||||
|
|
||||||
class Qdrant(VectorStore):
|
class Qdrant(VectorStore):
|
||||||
@ -234,10 +235,21 @@ class Qdrant(VectorStore):
|
|||||||
List of Documents most similar to the query and score for each.
|
List of Documents most similar to the query and score for each.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if filter is not None and isinstance(filter, dict):
|
||||||
|
warnings.warn(
|
||||||
|
"Using dict as a `filter` is deprecated. Please use qdrant-client "
|
||||||
|
"filters directly: "
|
||||||
|
"https://qdrant.tech/documentation/concepts/filtering/",
|
||||||
|
DeprecationWarning,
|
||||||
|
)
|
||||||
|
qdrant_filter = self._qdrant_filter_from_dict(filter)
|
||||||
|
else:
|
||||||
|
qdrant_filter = filter
|
||||||
|
|
||||||
results = self.client.search(
|
results = self.client.search(
|
||||||
collection_name=self.collection_name,
|
collection_name=self.collection_name,
|
||||||
query_vector=self._embed_query(query),
|
query_vector=self._embed_query(query),
|
||||||
query_filter=self._qdrant_filter_from_dict(filter),
|
query_filter=qdrant_filter,
|
||||||
with_payload=True,
|
with_payload=True,
|
||||||
limit=k,
|
limit=k,
|
||||||
)
|
)
|
||||||
@ -519,7 +531,7 @@ class Qdrant(VectorStore):
|
|||||||
return out
|
return out
|
||||||
|
|
||||||
def _qdrant_filter_from_dict(
|
def _qdrant_filter_from_dict(
|
||||||
self, filter: Optional[MetadataFilter]
|
self, filter: Optional[DictFilter]
|
||||||
) -> Optional[rest.Filter]:
|
) -> Optional[rest.Filter]:
|
||||||
from qdrant_client.http import models as rest
|
from qdrant_client.http import models as rest
|
||||||
|
|
||||||
|
@ -306,7 +306,7 @@ extended_testing = [
|
|||||||
"html2text",
|
"html2text",
|
||||||
"py-trello",
|
"py-trello",
|
||||||
"scikit-learn",
|
"scikit-learn",
|
||||||
"pyspark",
|
"pyspark"
|
||||||
]
|
]
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from qdrant_client.http import models as rest
|
||||||
|
|
||||||
from langchain.docstore.document import Document
|
from langchain.docstore.document import Document
|
||||||
from langchain.embeddings.base import Embeddings
|
from langchain.embeddings.base import Embeddings
|
||||||
@ -129,6 +130,45 @@ def test_qdrant_similarity_search_filters(batch_size: int) -> None:
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_qdrant_similarity_search_filters_with_qdrant_filters() -> None:
|
||||||
|
"""Test end to end construction and search."""
|
||||||
|
texts = ["foo", "bar", "baz"]
|
||||||
|
metadatas = [
|
||||||
|
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||||
|
for i in range(len(texts))
|
||||||
|
]
|
||||||
|
docsearch = Qdrant.from_texts(
|
||||||
|
texts,
|
||||||
|
ConsistentFakeEmbeddings(),
|
||||||
|
metadatas=metadatas,
|
||||||
|
location=":memory:",
|
||||||
|
)
|
||||||
|
|
||||||
|
qdrant_filter = rest.Filter(
|
||||||
|
must=[
|
||||||
|
rest.FieldCondition(
|
||||||
|
key="metadata.page",
|
||||||
|
match=rest.MatchValue(value=1),
|
||||||
|
),
|
||||||
|
rest.FieldCondition(
|
||||||
|
key="metadata.details.page",
|
||||||
|
match=rest.MatchValue(value=2),
|
||||||
|
),
|
||||||
|
rest.FieldCondition(
|
||||||
|
key="metadata.details.pages",
|
||||||
|
match=rest.MatchAny(any=[3]),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter)
|
||||||
|
assert output == [
|
||||||
|
Document(
|
||||||
|
page_content="bar",
|
||||||
|
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
["content_payload_key", "metadata_payload_key"],
|
["content_payload_key", "metadata_payload_key"],
|
||||||
|
Loading…
Reference in New Issue
Block a user