community[patch]: Include scores in MongoDB Atlas QA chain results (#14666)

Adds the ability to return similarity scores when using
`RetrievalQA.from_chain_type` with `MongoDBAtlasVectorSearch`. Requires
that `return_source_documents=True` is set.

Example use:

```
vector_search = MongoDBAtlasVectorSearch.from_documents(...)

qa = RetrievalQA.from_chain_type(
	llm=OpenAI(), 
	chain_type="stuff", 
	retriever=vector_search.as_retriever(search_kwargs={"additional": ["similarity_score"]}),
	return_source_documents=True
)

...

docs = qa({"query": "..."})

docs["source_documents"][0].metadata["score"] # score will be here
```

I've tested this feature locally, using a MongoDB Atlas Cluster with a
vector search index.
This commit is contained in:
Noah Stapp 2024-01-23 18:18:28 -08:00 committed by GitHub
parent 90f5a1c40e
commit e135e5257c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -209,6 +209,7 @@ class MongoDBAtlasVectorSearch(VectorStore):
for res in cursor: for res in cursor:
text = res.pop(self._text_key) text = res.pop(self._text_key)
score = res.pop("score") score = res.pop("score")
del res["embedding"]
docs.append((Document(page_content=text, metadata=res), score)) docs.append((Document(page_content=text, metadata=res), score))
return docs return docs
@ -221,11 +222,8 @@ class MongoDBAtlasVectorSearch(VectorStore):
) -> List[Tuple[Document, float]]: ) -> List[Tuple[Document, float]]:
"""Return MongoDB documents most similar to the given query and their scores. """Return MongoDB documents most similar to the given query and their scores.
Uses the $vectorSearch stage Uses the vectorSearch operator available in MongoDB Atlas Search.
performs aNN search on a vector in the specified field. For more: https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/
Index the field as "vector" using Atlas Vector Search "vectorSearch" index type
For more info : https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/
Args: Args:
query: Text to look up documents similar to. query: Text to look up documents similar to.
@ -233,7 +231,7 @@ class MongoDBAtlasVectorSearch(VectorStore):
pre_filter: (Optional) dictionary of argument(s) to prefilter document pre_filter: (Optional) dictionary of argument(s) to prefilter document
fields on. fields on.
post_filter_pipeline: (Optional) Pipeline of MongoDB aggregation stages post_filter_pipeline: (Optional) Pipeline of MongoDB aggregation stages
following the vector Search. following the vectorSearch stage.
Returns: Returns:
List of documents most similar to the query and their scores. List of documents most similar to the query and their scores.
@ -257,11 +255,8 @@ class MongoDBAtlasVectorSearch(VectorStore):
) -> List[Document]: ) -> List[Document]:
"""Return MongoDB documents most similar to the given query. """Return MongoDB documents most similar to the given query.
Uses the $vectorSearch stage Uses the vectorSearch operator available in MongoDB Atlas Search.
performs aNN search on a vector in the specified field. For more: https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/
Index the field as "vector" using Atlas Vector Search "vectorSearch" index type
For more info : https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/
Args: Args:
query: Text to look up documents similar to. query: Text to look up documents similar to.
@ -269,17 +264,22 @@ class MongoDBAtlasVectorSearch(VectorStore):
pre_filter: (Optional) dictionary of argument(s) to prefilter document pre_filter: (Optional) dictionary of argument(s) to prefilter document
fields on. fields on.
post_filter_pipeline: (Optional) Pipeline of MongoDB aggregation stages post_filter_pipeline: (Optional) Pipeline of MongoDB aggregation stages
following the vector search. following the vectorSearch stage.
Returns: Returns:
List of documents most similar to the query and their scores. List of documents most similar to the query and their scores.
""" """
additional = kwargs.get("additional")
docs_and_scores = self.similarity_search_with_score( docs_and_scores = self.similarity_search_with_score(
query, query,
k=k, k=k,
pre_filter=pre_filter, pre_filter=pre_filter,
post_filter_pipeline=post_filter_pipeline, post_filter_pipeline=post_filter_pipeline,
) )
if additional and "similarity_score" in additional:
for doc, score in docs_and_scores:
doc.metadata["score"] = score
return [doc for doc, _ in docs_and_scores] return [doc for doc, _ in docs_and_scores]
def max_marginal_relevance_search( def max_marginal_relevance_search(
@ -309,7 +309,7 @@ class MongoDBAtlasVectorSearch(VectorStore):
pre_filter: (Optional) dictionary of argument(s) to prefilter on document pre_filter: (Optional) dictionary of argument(s) to prefilter on document
fields. fields.
post_filter_pipeline: (Optional) pipeline of MongoDB aggregation stages post_filter_pipeline: (Optional) pipeline of MongoDB aggregation stages
following the vector search. following the vectorSearch stage.
Returns: Returns:
List of documents selected by maximal marginal relevance. List of documents selected by maximal marginal relevance.
""" """