community: If load() has been overridden, use it in default lazy_load() (#18690)

This commit is contained in:
Christophe Bornet 2024-03-07 17:52:19 +01:00 committed by GitHub
parent 6cd7607816
commit 4a7d73b39d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 29 additions and 4 deletions

View File

@ -63,6 +63,8 @@ class BaseLoader(ABC):
# implemented in all the existing subclasses.
def lazy_load(self) -> Iterator[Document]:
"""A lazy loader for Documents."""
if type(self).load != BaseLoader.load:
return iter(self.load())
raise NotImplementedError(
f"{self.__class__.__name__} does not implement lazy_load()"
)

View File

@ -1,11 +1,12 @@
"""Test Base Schema of documents."""
from typing import Iterator
from typing import Iterator, List
import pytest
from langchain_core.document_loaders.base import BaseBlobParser, BaseLoader
from langchain_core.document_loaders.blob_loaders import Blob
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseBlobParser, BaseLoader
from langchain_community.document_loaders.blob_loaders import Blob
def test_base_blob_parser() -> None:
"""Verify that the eager method is hooked up to the lazy method by default."""
@ -29,6 +30,28 @@ def test_base_blob_parser() -> None:
assert docs[0].page_content == "foo"
def test_default_lazy_load() -> None:
class FakeLoader(BaseLoader):
def load(self) -> List[Document]:
return [
Document(page_content="foo"),
Document(page_content="bar"),
]
loader = FakeLoader()
docs = list(loader.lazy_load())
assert docs == [Document(page_content="foo"), Document(page_content="bar")]
def test_lazy_load_not_implemented() -> None:
class FakeLoader(BaseLoader):
pass
loader = FakeLoader()
with pytest.raises(NotImplementedError):
loader.lazy_load()
async def test_default_aload() -> None:
class FakeLoader(BaseLoader):
def lazy_load(self) -> Iterator[Document]: