mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-11 18:16:12 +00:00
[Community]: Added Metadata filter support for DocumentDB Vector Store (#22777)
- **Description:** As pointed out in this issue #22770, DocumentDB `similarity_search` does not support filtering through metadata which this PR adds by passing in the parameter `filter`. Also this PR fixes a minor Documentation error. - **Issue:** #22770 --------- Co-authored-by: Erick Friis <erickfriis@gmail.com> Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
912751e268
commit
36cad5d25c
@ -175,6 +175,10 @@ class DocumentDBVectorSearch(VectorStore):
|
||||
The maximum number of supported dimensions is 2000
|
||||
|
||||
similarity: Similarity algorithm to use with the HNSW index.
|
||||
Possible options are:
|
||||
- DocumentDBSimilarityType.COS (cosine distance),
|
||||
- DocumentDBSimilarityType.EUC (Euclidean distance), and
|
||||
- DocumentDBSimilarityType.DOT (dot product).
|
||||
|
||||
m: Specifies the max number of connections for an HNSW index.
|
||||
Large impact on memory consumption.
|
||||
@ -183,10 +187,6 @@ class DocumentDBVectorSearch(VectorStore):
|
||||
for constructing the graph for HNSW index. Higher values lead
|
||||
to more accurate results but slower indexing speed.
|
||||
|
||||
Possible options are:
|
||||
- DocumentDBSimilarityType.COS (cosine distance),
|
||||
- DocumentDBSimilarityType.EUC (Euclidean distance), and
|
||||
- DocumentDBSimilarityType.DOT (dot product).
|
||||
|
||||
Returns:
|
||||
An object describing the created index
|
||||
@ -309,7 +309,11 @@ class DocumentDBVectorSearch(VectorStore):
|
||||
self._collection.delete_one({"_id": ObjectId(document_id)})
|
||||
|
||||
def _similarity_search_without_score(
|
||||
self, embeddings: List[float], k: int = 4, ef_search: int = 40
|
||||
self,
|
||||
embeddings: List[float],
|
||||
k: int = 4,
|
||||
ef_search: int = 40,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
) -> List[Document]:
|
||||
"""Returns a list of documents.
|
||||
|
||||
@ -319,12 +323,13 @@ class DocumentDBVectorSearch(VectorStore):
|
||||
ef_search: Specifies the size of the dynamic candidate list
|
||||
that HNSW index uses during search. A higher value of
|
||||
efSearch provides better recall at cost of speed.
|
||||
|
||||
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
||||
Returns:
|
||||
A list of documents closest to the query vector
|
||||
"""
|
||||
pipeline: List[dict[str, Any]] = [
|
||||
{
|
||||
"$match": filter,
|
||||
"$search": {
|
||||
"vectorSearch": {
|
||||
"vector": embeddings,
|
||||
@ -333,7 +338,7 @@ class DocumentDBVectorSearch(VectorStore):
|
||||
"k": k,
|
||||
"efSearch": ef_search,
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
@ -352,10 +357,12 @@ class DocumentDBVectorSearch(VectorStore):
|
||||
query: str,
|
||||
k: int = 4,
|
||||
ef_search: int = 40,
|
||||
*,
|
||||
filter: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
embeddings = self._embedding.embed_query(query)
|
||||
docs = self._similarity_search_without_score(
|
||||
embeddings=embeddings, k=k, ef_search=ef_search
|
||||
embeddings=embeddings, k=k, ef_search=ef_search, filter=filter
|
||||
)
|
||||
return [doc for doc in docs]
|
||||
|
Loading…
Reference in New Issue
Block a user