From 282362382ced1130e3b08f911b31d4f2431b6a5f Mon Sep 17 00:00:00 2001 From: Lance Martin <122662504+rlancemartin@users.noreply.github.com> Date: Tue, 12 Dec 2023 15:16:49 -0800 Subject: [PATCH] Minor update to ensemble retriever to handle a mix of Documents or str (#14552) --- libs/langchain/langchain/retrievers/ensemble.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/libs/langchain/langchain/retrievers/ensemble.py b/libs/langchain/langchain/retrievers/ensemble.py index de34d0c0b0f..7784775fe24 100644 --- a/libs/langchain/langchain/retrievers/ensemble.py +++ b/libs/langchain/langchain/retrievers/ensemble.py @@ -103,6 +103,13 @@ class EnsembleRetriever(BaseRetriever): for i, retriever in enumerate(self.retrievers) ] + # Enforce that retrieved docs are Documents for each list in retriever_docs + for i in range(len(retriever_docs)): + retriever_docs[i] = [ + Document(page_content=doc) if not isinstance(doc, Document) else doc + for doc in retriever_docs[i] + ] + # apply rank fusion fused_documents = self.weighted_reciprocal_rank(retriever_docs) @@ -130,6 +137,13 @@ class EnsembleRetriever(BaseRetriever): for i, retriever in enumerate(self.retrievers) ] + # Enforce that retrieved docs are Documents for each list in retriever_docs + for i in range(len(retriever_docs)): + retriever_docs[i] = [ + Document(page_content=doc) if not isinstance(doc, Document) else doc + for doc in retriever_docs[i] + ] + # apply rank fusion fused_documents = self.weighted_reciprocal_rank(retriever_docs)