diff --git a/libs/community/langchain_community/document_loaders/base.py b/libs/community/langchain_community/document_loaders/base.py index 91ad2a17306..8af5e48fe21 100644 --- a/libs/community/langchain_community/document_loaders/base.py +++ b/libs/community/langchain_community/document_loaders/base.py @@ -19,22 +19,22 @@ class BaseLoader(ABC): Implementations should implement the lazy-loading method using generators to avoid loading all Documents into memory at once. - The `load` method will remain as is for backwards compatibility, but its - implementation should be just `list(self.lazy_load())`. + `load` is provided just for user convenience and should not be overridden. """ - # Sub-classes should implement this method - # as return list(self.lazy_load()). - # This method returns a List which is materialized in memory. - @abstractmethod + # Sub-classes should not implement this method directly. Instead, they + # should implement the lazy load method. def load(self) -> List[Document]: """Load data into Document objects.""" + return list(self.lazy_load()) def load_and_split( self, text_splitter: Optional[TextSplitter] = None ) -> List[Document]: """Load Documents and split into chunks. Chunks are returned as Documents. + Do not override this method. It should be considered to be deprecated! + Args: text_splitter: TextSplitter instance to use for splitting documents. Defaults to RecursiveCharacterTextSplitter. diff --git a/libs/community/tests/unit_tests/document_loaders/test_base.py b/libs/community/tests/unit_tests/document_loaders/test_base.py index e966cf193b6..b9663a4dda8 100644 --- a/libs/community/tests/unit_tests/document_loaders/test_base.py +++ b/libs/community/tests/unit_tests/document_loaders/test_base.py @@ -1,5 +1,5 @@ """Test Base Schema of documents.""" -from typing import Iterator, List +from typing import Iterator from langchain_core.documents import Document @@ -31,9 +31,6 @@ def test_base_blob_parser() -> None: async def test_default_aload() -> None: class FakeLoader(BaseLoader): - def load(self) -> List[Document]: - return list(self.lazy_load()) - def lazy_load(self) -> Iterator[Document]: yield from [ Document(page_content="foo"),