From d7c607ca006f90f9db54d59e5cc97ba6475e3491 Mon Sep 17 00:00:00 2001 From: Harrison Chase Date: Mon, 26 Feb 2024 17:20:50 -0800 Subject: [PATCH] core[minor]: move document compressor base (#17910) --- .../core/langchain_core/documents/__init__.py | 3 +- .../langchain_core/documents/compressor.py | 33 +++++++++++++++++++ .../unit_tests/documents/test_imports.py | 2 +- .../retrievers/document_compressors/base.py | 33 +++---------------- 4 files changed, 41 insertions(+), 30 deletions(-) create mode 100644 libs/core/langchain_core/documents/compressor.py diff --git a/libs/core/langchain_core/documents/__init__.py b/libs/core/langchain_core/documents/__init__.py index 53a559e4705..11943477c7d 100644 --- a/libs/core/langchain_core/documents/__init__.py +++ b/libs/core/langchain_core/documents/__init__.py @@ -3,6 +3,7 @@ and their transformations. """ from langchain_core.documents.base import Document +from langchain_core.documents.compressor import BaseDocumentCompressor from langchain_core.documents.transformers import BaseDocumentTransformer -__all__ = ["Document", "BaseDocumentTransformer"] +__all__ = ["Document", "BaseDocumentTransformer", "BaseDocumentCompressor"] diff --git a/libs/core/langchain_core/documents/compressor.py b/libs/core/langchain_core/documents/compressor.py new file mode 100644 index 00000000000..228caf660a6 --- /dev/null +++ b/libs/core/langchain_core/documents/compressor.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Optional, Sequence + +from langchain_core.callbacks import Callbacks +from langchain_core.documents import Document +from langchain_core.pydantic_v1 import BaseModel +from langchain_core.runnables import run_in_executor + + +class BaseDocumentCompressor(BaseModel, ABC): + """Base class for document compressors.""" + + @abstractmethod + def compress_documents( + self, + documents: Sequence[Document], + query: str, + callbacks: Optional[Callbacks] = None, + ) -> Sequence[Document]: + """Compress retrieved documents given the query context.""" + + async def acompress_documents( + self, + documents: Sequence[Document], + query: str, + callbacks: Optional[Callbacks] = None, + ) -> Sequence[Document]: + """Compress retrieved documents given the query context.""" + return await run_in_executor( + None, self.compress_documents, documents, query, callbacks + ) diff --git a/libs/core/tests/unit_tests/documents/test_imports.py b/libs/core/tests/unit_tests/documents/test_imports.py index d239af62bb4..f3d64a3e7d0 100644 --- a/libs/core/tests/unit_tests/documents/test_imports.py +++ b/libs/core/tests/unit_tests/documents/test_imports.py @@ -1,6 +1,6 @@ from langchain_core.documents import __all__ -EXPECTED_ALL = ["Document", "BaseDocumentTransformer"] +EXPECTED_ALL = ["Document", "BaseDocumentTransformer", "BaseDocumentCompressor"] def test_all_imports() -> None: diff --git a/libs/langchain/langchain/retrievers/document_compressors/base.py b/libs/langchain/langchain/retrievers/document_compressors/base.py index 0acb81e9e26..ae8efdf6563 100644 --- a/libs/langchain/langchain/retrievers/document_compressors/base.py +++ b/libs/langchain/langchain/retrievers/document_compressors/base.py @@ -1,38 +1,15 @@ -from abc import ABC, abstractmethod from inspect import signature from typing import List, Optional, Sequence, Union -from langchain_core.documents import BaseDocumentTransformer, Document -from langchain_core.pydantic_v1 import BaseModel -from langchain_core.runnables.config import run_in_executor +from langchain_core.documents import ( + BaseDocumentCompressor, + BaseDocumentTransformer, + Document, +) from langchain.callbacks.manager import Callbacks -class BaseDocumentCompressor(BaseModel, ABC): - """Base class for document compressors.""" - - @abstractmethod - def compress_documents( - self, - documents: Sequence[Document], - query: str, - callbacks: Optional[Callbacks] = None, - ) -> Sequence[Document]: - """Compress retrieved documents given the query context.""" - - async def acompress_documents( - self, - documents: Sequence[Document], - query: str, - callbacks: Optional[Callbacks] = None, - ) -> Sequence[Document]: - """Compress retrieved documents given the query context.""" - return await run_in_executor( - None, self.compress_documents, documents, query, callbacks - ) - - class DocumentCompressorPipeline(BaseDocumentCompressor): """Document compressor that uses a pipeline of Transformers."""