community[patch]: BaseLoader load method should just delegate to lazy_load (#18289)

load() should just reference lazy_load()
2025-07-05 20:58:25 +00:00 · 2024-02-29 21:45:28 -05:00 · 2024-02-29 21:45:28 -05:00 · 51b661cfe8
commit 51b661cfe8
parent 5efb5c099f
2 changed files with 7 additions and 10 deletions
--- a/libs/community/langchain_community/document_loaders/base.py
+++ b/libs/community/langchain_community/document_loaders/base.py
@ -19,22 +19,22 @@ class BaseLoader(ABC):
    Implementations should implement the lazy-loading method using generators
    to avoid loading all Documents into memory at once.

-    The `load` method will remain as is for backwards compatibility, but its
-    implementation should be just `list(self.lazy_load())`.
+    `load` is provided just for user convenience and should not be overridden.
    """

-    # Sub-classes should implement this method
-    # as return list(self.lazy_load()).
-    # This method returns a List which is materialized in memory.
-    @abstractmethod
+    # Sub-classes should not implement this method directly. Instead, they
+    # should implement the lazy load method.
    def load(self) -> List[Document]:
        """Load data into Document objects."""
+        return list(self.lazy_load())

    def load_and_split(
        self, text_splitter: Optional[TextSplitter] = None
    ) -> List[Document]:
        """Load Documents and split into chunks. Chunks are returned as Documents.

+        Do not override this method. It should be considered to be deprecated!
+
        Args:
            text_splitter: TextSplitter instance to use for splitting documents.
              Defaults to RecursiveCharacterTextSplitter.
--- a/libs/community/tests/unit_tests/document_loaders/test_base.py
+++ b/libs/community/tests/unit_tests/document_loaders/test_base.py
@ -1,5 +1,5 @@
 """Test Base Schema of documents."""
-from typing import Iterator, List
+from typing import Iterator

 from langchain_core.documents import Document

@ -31,9 +31,6 @@ def test_base_blob_parser() -> None:

 async def test_default_aload() -> None:
    class FakeLoader(BaseLoader):
-        def load(self) -> List[Document]:
-            return list(self.lazy_load())
-
        def lazy_load(self) -> Iterator[Document]:
            yield from [
                Document(page_content="foo"),