From c4d2a53f180c4eb6b8eb3cc8b56e0b037f84e67f Mon Sep 17 00:00:00 2001 From: Diverrez morgan <97022189+morgandiverrez@users.noreply.github.com> Date: Fri, 26 Jul 2024 15:34:39 +0200 Subject: [PATCH] community: creation score_threshold in flashrank_rerank.py (#24016) Description: add a optional score relevance threshold for select only coherent document, it's in complement of top_n Discussion: add relevance score threshold in flashrank_rerank document compressors #24013 Dependencies: no dependencies --------- Co-authored-by: Benjamin BERNARD Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Chester Curme --- .../document_compressors/flashrank_rerank.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/libs/community/langchain_community/document_compressors/flashrank_rerank.py b/libs/community/langchain_community/document_compressors/flashrank_rerank.py index fd66bee659d..64ea9a94191 100644 --- a/libs/community/langchain_community/document_compressors/flashrank_rerank.py +++ b/libs/community/langchain_community/document_compressors/flashrank_rerank.py @@ -26,8 +26,12 @@ class FlashrankRerank(BaseDocumentCompressor): """Flashrank client to use for compressing documents""" top_n: int = 3 """Number of documents to return.""" + score_threshold: float = 0.0 + """Minimum relevance threshold to return.""" model: Optional[str] = None """Model to use for reranking.""" + prefix_metadata: str = "" + """Prefix for flashrank_rerank metadata keys""" class Config: """Configuration for this pydantic object.""" @@ -69,11 +73,14 @@ class FlashrankRerank(BaseDocumentCompressor): final_results = [] for r in rerank_response: - metadata = r["meta"] - metadata["relevance_score"] = r["score"] - doc = Document( - page_content=r["text"], - metadata=metadata, - ) - final_results.append(doc) + if r["score"] >= self.score_threshold: + doc = Document( + page_content=r["text"], + metadata={ + self.prefix_metadata + "id": r["id"], + self.prefix_metadata + "relevance_score": r["score"], + **r["meta"], + }, + ) + final_results.append(doc) return final_results