From 4a7d73b39da4d2df0a7f1edc92633b7f71232b84 Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Thu, 7 Mar 2024 17:52:19 +0100 Subject: [PATCH] community: If load() has been overridden, use it in default lazy_load() (#18690) --- .../langchain_core/document_loaders/base.py | 2 ++ .../unit_tests/document_loaders/__init__.py | 0 .../unit_tests/document_loaders/test_base.py | 31 ++++++++++++++++--- 3 files changed, 29 insertions(+), 4 deletions(-) create mode 100644 libs/core/tests/unit_tests/document_loaders/__init__.py rename libs/{community => core}/tests/unit_tests/document_loaders/test_base.py (61%) diff --git a/libs/core/langchain_core/document_loaders/base.py b/libs/core/langchain_core/document_loaders/base.py index 187123a3e39..a66f099f773 100644 --- a/libs/core/langchain_core/document_loaders/base.py +++ b/libs/core/langchain_core/document_loaders/base.py @@ -63,6 +63,8 @@ class BaseLoader(ABC): # implemented in all the existing subclasses. def lazy_load(self) -> Iterator[Document]: """A lazy loader for Documents.""" + if type(self).load != BaseLoader.load: + return iter(self.load()) raise NotImplementedError( f"{self.__class__.__name__} does not implement lazy_load()" ) diff --git a/libs/core/tests/unit_tests/document_loaders/__init__.py b/libs/core/tests/unit_tests/document_loaders/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/libs/community/tests/unit_tests/document_loaders/test_base.py b/libs/core/tests/unit_tests/document_loaders/test_base.py similarity index 61% rename from libs/community/tests/unit_tests/document_loaders/test_base.py rename to libs/core/tests/unit_tests/document_loaders/test_base.py index b9663a4dda8..b122401cabf 100644 --- a/libs/community/tests/unit_tests/document_loaders/test_base.py +++ b/libs/core/tests/unit_tests/document_loaders/test_base.py @@ -1,11 +1,12 @@ """Test Base Schema of documents.""" -from typing import Iterator +from typing import Iterator, List +import pytest + +from langchain_core.document_loaders.base import BaseBlobParser, BaseLoader +from langchain_core.document_loaders.blob_loaders import Blob from langchain_core.documents import Document -from langchain_community.document_loaders.base import BaseBlobParser, BaseLoader -from langchain_community.document_loaders.blob_loaders import Blob - def test_base_blob_parser() -> None: """Verify that the eager method is hooked up to the lazy method by default.""" @@ -29,6 +30,28 @@ def test_base_blob_parser() -> None: assert docs[0].page_content == "foo" +def test_default_lazy_load() -> None: + class FakeLoader(BaseLoader): + def load(self) -> List[Document]: + return [ + Document(page_content="foo"), + Document(page_content="bar"), + ] + + loader = FakeLoader() + docs = list(loader.lazy_load()) + assert docs == [Document(page_content="foo"), Document(page_content="bar")] + + +def test_lazy_load_not_implemented() -> None: + class FakeLoader(BaseLoader): + pass + + loader = FakeLoader() + with pytest.raises(NotImplementedError): + loader.lazy_load() + + async def test_default_aload() -> None: class FakeLoader(BaseLoader): def lazy_load(self) -> Iterator[Document]: