From 0c6a3fdd6bc8082ec09db63f65a8d3d0d1173e43 Mon Sep 17 00:00:00 2001 From: Sheng Han Lim Date: Mon, 22 Jul 2024 02:23:19 +0800 Subject: [PATCH] langchain: Update ContextualCompressionRetriever base_retriever type to RetrieverLike (#24192) **Description:** When initializing retrievers with `configurable_fields` as base retriever, `ContextualCompressionRetriever` validation fails with the following error: ``` ValidationError: 1 validation error for ContextualCompressionRetriever base_retriever Can't instantiate abstract class BaseRetriever with abstract method _get_relevant_documents (type=type_error) ``` Example code: ```python esearch_retriever = VertexAISearchRetriever( project_id=GCP_PROJECT_ID, location_id="global", data_store_id=SEARCH_ENGINE_ID, ).configurable_fields( filter=ConfigurableField(id="vertex_search_filter", name="Vertex Search Filter") ) # rerank documents with Vertex AI Rank API reranker = VertexAIRank( project_id=GCP_PROJECT_ID, location_id=GCP_REGION, ranking_config="default_ranking_config", ) retriever_with_reranker = ContextualCompressionRetriever( base_compressor=reranker, base_retriever=esearch_retriever ) ``` It seems like the issue stems from ContextualCompressionRetriever insisting that base retrievers must be strictly `BaseRetriever` inherited, and doesn't take into account cases where retrievers need to be chained and can have configurable fields defined. https://github.com/langchain-ai/langchain/blob/0a1e475a30ff66186125d57f9b01669a9783b3ed/libs/langchain/langchain/retrievers/contextual_compression.py#L15-L22 This PR proposes that the base_retriever type be set to `RetrieverLike`, similar to how `EnsembleRetriever` validates its list of retrievers: https://github.com/langchain-ai/langchain/blob/0a1e475a30ff66186125d57f9b01669a9783b3ed/libs/langchain/langchain/retrievers/ensemble.py#L58-L75 --- libs/langchain/langchain/retrievers/contextual_compression.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/retrievers/contextual_compression.py b/libs/langchain/langchain/retrievers/contextual_compression.py index d41ea489ded..9f5c09e3d1c 100644 --- a/libs/langchain/langchain/retrievers/contextual_compression.py +++ b/libs/langchain/langchain/retrievers/contextual_compression.py @@ -5,7 +5,7 @@ from langchain_core.callbacks import ( CallbackManagerForRetrieverRun, ) from langchain_core.documents import Document -from langchain_core.retrievers import BaseRetriever +from langchain_core.retrievers import BaseRetriever, RetrieverLike from langchain.retrievers.document_compressors.base import ( BaseDocumentCompressor, @@ -18,7 +18,7 @@ class ContextualCompressionRetriever(BaseRetriever): base_compressor: BaseDocumentCompressor """Compressor for compressing retrieved documents.""" - base_retriever: BaseRetriever + base_retriever: RetrieverLike """Base Retriever to use for getting relevant documents.""" class Config: