mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-14 11:33:00 +00:00
[Community]: Added Metadata filter support for DocumentDB Vector Store (#22777)
- **Description:** As pointed out in this issue #22770, DocumentDB `similarity_search` does not support filtering through metadata which this PR adds by passing in the parameter `filter`. Also this PR fixes a minor Documentation error. - **Issue:** #22770 --------- Co-authored-by: Erick Friis <erickfriis@gmail.com> Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
912751e268
commit
36cad5d25c
@ -175,6 +175,10 @@ class DocumentDBVectorSearch(VectorStore):
|
|||||||
The maximum number of supported dimensions is 2000
|
The maximum number of supported dimensions is 2000
|
||||||
|
|
||||||
similarity: Similarity algorithm to use with the HNSW index.
|
similarity: Similarity algorithm to use with the HNSW index.
|
||||||
|
Possible options are:
|
||||||
|
- DocumentDBSimilarityType.COS (cosine distance),
|
||||||
|
- DocumentDBSimilarityType.EUC (Euclidean distance), and
|
||||||
|
- DocumentDBSimilarityType.DOT (dot product).
|
||||||
|
|
||||||
m: Specifies the max number of connections for an HNSW index.
|
m: Specifies the max number of connections for an HNSW index.
|
||||||
Large impact on memory consumption.
|
Large impact on memory consumption.
|
||||||
@ -183,10 +187,6 @@ class DocumentDBVectorSearch(VectorStore):
|
|||||||
for constructing the graph for HNSW index. Higher values lead
|
for constructing the graph for HNSW index. Higher values lead
|
||||||
to more accurate results but slower indexing speed.
|
to more accurate results but slower indexing speed.
|
||||||
|
|
||||||
Possible options are:
|
|
||||||
- DocumentDBSimilarityType.COS (cosine distance),
|
|
||||||
- DocumentDBSimilarityType.EUC (Euclidean distance), and
|
|
||||||
- DocumentDBSimilarityType.DOT (dot product).
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
An object describing the created index
|
An object describing the created index
|
||||||
@ -309,7 +309,11 @@ class DocumentDBVectorSearch(VectorStore):
|
|||||||
self._collection.delete_one({"_id": ObjectId(document_id)})
|
self._collection.delete_one({"_id": ObjectId(document_id)})
|
||||||
|
|
||||||
def _similarity_search_without_score(
|
def _similarity_search_without_score(
|
||||||
self, embeddings: List[float], k: int = 4, ef_search: int = 40
|
self,
|
||||||
|
embeddings: List[float],
|
||||||
|
k: int = 4,
|
||||||
|
ef_search: int = 40,
|
||||||
|
filter: Optional[Dict[str, Any]] = None,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Returns a list of documents.
|
"""Returns a list of documents.
|
||||||
|
|
||||||
@ -319,12 +323,13 @@ class DocumentDBVectorSearch(VectorStore):
|
|||||||
ef_search: Specifies the size of the dynamic candidate list
|
ef_search: Specifies the size of the dynamic candidate list
|
||||||
that HNSW index uses during search. A higher value of
|
that HNSW index uses during search. A higher value of
|
||||||
efSearch provides better recall at cost of speed.
|
efSearch provides better recall at cost of speed.
|
||||||
|
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
||||||
Returns:
|
Returns:
|
||||||
A list of documents closest to the query vector
|
A list of documents closest to the query vector
|
||||||
"""
|
"""
|
||||||
pipeline: List[dict[str, Any]] = [
|
pipeline: List[dict[str, Any]] = [
|
||||||
{
|
{
|
||||||
|
"$match": filter,
|
||||||
"$search": {
|
"$search": {
|
||||||
"vectorSearch": {
|
"vectorSearch": {
|
||||||
"vector": embeddings,
|
"vector": embeddings,
|
||||||
@ -333,7 +338,7 @@ class DocumentDBVectorSearch(VectorStore):
|
|||||||
"k": k,
|
"k": k,
|
||||||
"efSearch": ef_search,
|
"efSearch": ef_search,
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -352,10 +357,12 @@ class DocumentDBVectorSearch(VectorStore):
|
|||||||
query: str,
|
query: str,
|
||||||
k: int = 4,
|
k: int = 4,
|
||||||
ef_search: int = 40,
|
ef_search: int = 40,
|
||||||
|
*,
|
||||||
|
filter: Optional[Dict[str, Any]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
embeddings = self._embedding.embed_query(query)
|
embeddings = self._embedding.embed_query(query)
|
||||||
docs = self._similarity_search_without_score(
|
docs = self._similarity_search_without_score(
|
||||||
embeddings=embeddings, k=k, ef_search=ef_search
|
embeddings=embeddings, k=k, ef_search=ef_search, filter=filter
|
||||||
)
|
)
|
||||||
return [doc for doc in docs]
|
return [doc for doc in docs]
|
||||||
|
Loading…
Reference in New Issue
Block a user