mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-03 03:38:06 +00:00
Feature: Qdrant filters supports (#5446)
# Support Qdrant filters Qdrant has an [extensive filtering system](https://qdrant.tech/documentation/concepts/filtering/) with rich type support. This PR makes it possible to use the filters in Langchain by passing an additional param to both the `similarity_search_with_score` and `similarity_search` methods. ## Who can review? @dev2049 @hwchase17 --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
This commit is contained in:
parent
f72bb966f8
commit
8bcaca435a
@ -399,6 +399,31 @@
|
||||
"print(f\"\\nScore: {score}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"### Metadata filtering\n",
|
||||
"\n",
|
||||
"Qdrant has an [extensive filtering system](https://qdrant.tech/documentation/concepts/filtering/) with rich type support. It is also possible to use the filters in Langchain, by passing an additional param to both the `similarity_search_with_score` and `similarity_search` methods."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"```python\n",
|
||||
"from qdrant_client.http import models as rest\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"found_docs = qdrant.similarity_search_with_score(query, filter=rest.Filter(...))\n",
|
||||
"```"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c58c30bf",
|
||||
|
@ -27,10 +27,11 @@ from langchain.vectorstores import VectorStore
|
||||
from langchain.vectorstores.utils import maximal_marginal_relevance
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from qdrant_client.conversions import common_types
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
|
||||
MetadataFilter = Dict[str, Union[str, int, bool, dict, list]]
|
||||
DictFilter = Dict[str, Union[str, int, bool, dict, list]]
|
||||
MetadataFilter = Union[DictFilter, common_types.Filter]
|
||||
|
||||
|
||||
class Qdrant(VectorStore):
|
||||
@ -234,10 +235,21 @@ class Qdrant(VectorStore):
|
||||
List of Documents most similar to the query and score for each.
|
||||
"""
|
||||
|
||||
if filter is not None and isinstance(filter, dict):
|
||||
warnings.warn(
|
||||
"Using dict as a `filter` is deprecated. Please use qdrant-client "
|
||||
"filters directly: "
|
||||
"https://qdrant.tech/documentation/concepts/filtering/",
|
||||
DeprecationWarning,
|
||||
)
|
||||
qdrant_filter = self._qdrant_filter_from_dict(filter)
|
||||
else:
|
||||
qdrant_filter = filter
|
||||
|
||||
results = self.client.search(
|
||||
collection_name=self.collection_name,
|
||||
query_vector=self._embed_query(query),
|
||||
query_filter=self._qdrant_filter_from_dict(filter),
|
||||
query_filter=qdrant_filter,
|
||||
with_payload=True,
|
||||
limit=k,
|
||||
)
|
||||
@ -519,7 +531,7 @@ class Qdrant(VectorStore):
|
||||
return out
|
||||
|
||||
def _qdrant_filter_from_dict(
|
||||
self, filter: Optional[MetadataFilter]
|
||||
self, filter: Optional[DictFilter]
|
||||
) -> Optional[rest.Filter]:
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
|
@ -306,7 +306,7 @@ extended_testing = [
|
||||
"html2text",
|
||||
"py-trello",
|
||||
"scikit-learn",
|
||||
"pyspark",
|
||||
"pyspark"
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
|
@ -2,6 +2,7 @@
|
||||
from typing import Callable, Optional
|
||||
|
||||
import pytest
|
||||
from qdrant_client.http import models as rest
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.embeddings.base import Embeddings
|
||||
@ -129,6 +130,45 @@ def test_qdrant_similarity_search_filters(batch_size: int) -> None:
|
||||
]
|
||||
|
||||
|
||||
def test_qdrant_similarity_search_filters_with_qdrant_filters() -> None:
|
||||
"""Test end to end construction and search."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [
|
||||
{"page": i, "details": {"page": i + 1, "pages": [i + 2, -1]}}
|
||||
for i in range(len(texts))
|
||||
]
|
||||
docsearch = Qdrant.from_texts(
|
||||
texts,
|
||||
ConsistentFakeEmbeddings(),
|
||||
metadatas=metadatas,
|
||||
location=":memory:",
|
||||
)
|
||||
|
||||
qdrant_filter = rest.Filter(
|
||||
must=[
|
||||
rest.FieldCondition(
|
||||
key="metadata.page",
|
||||
match=rest.MatchValue(value=1),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.page",
|
||||
match=rest.MatchValue(value=2),
|
||||
),
|
||||
rest.FieldCondition(
|
||||
key="metadata.details.pages",
|
||||
match=rest.MatchAny(any=[3]),
|
||||
),
|
||||
]
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=1, filter=qdrant_filter)
|
||||
assert output == [
|
||||
Document(
|
||||
page_content="bar",
|
||||
metadata={"page": 1, "details": {"page": 2, "pages": [3, -1]}},
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("batch_size", [1, 64])
|
||||
@pytest.mark.parametrize(
|
||||
["content_payload_key", "metadata_payload_key"],
|
||||
|
Loading…
Reference in New Issue
Block a user