community[patch]: BaseLoader load method should just delegate to lazy_load (#18289)

load() should just reference lazy_load()
This commit is contained in:
Eugene Yurtsev 2024-02-29 21:45:28 -05:00 committed by GitHub
parent 5efb5c099f
commit 51b661cfe8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 7 additions and 10 deletions

View File

@ -19,22 +19,22 @@ class BaseLoader(ABC):
Implementations should implement the lazy-loading method using generators
to avoid loading all Documents into memory at once.
The `load` method will remain as is for backwards compatibility, but its
implementation should be just `list(self.lazy_load())`.
`load` is provided just for user convenience and should not be overridden.
"""
# Sub-classes should implement this method
# as return list(self.lazy_load()).
# This method returns a List which is materialized in memory.
@abstractmethod
# Sub-classes should not implement this method directly. Instead, they
# should implement the lazy load method.
def load(self) -> List[Document]:
"""Load data into Document objects."""
return list(self.lazy_load())
def load_and_split(
self, text_splitter: Optional[TextSplitter] = None
) -> List[Document]:
"""Load Documents and split into chunks. Chunks are returned as Documents.
Do not override this method. It should be considered to be deprecated!
Args:
text_splitter: TextSplitter instance to use for splitting documents.
Defaults to RecursiveCharacterTextSplitter.

View File

@ -1,5 +1,5 @@
"""Test Base Schema of documents."""
from typing import Iterator, List
from typing import Iterator
from langchain_core.documents import Document
@ -31,9 +31,6 @@ def test_base_blob_parser() -> None:
async def test_default_aload() -> None:
class FakeLoader(BaseLoader):
def load(self) -> List[Document]:
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]:
yield from [
Document(page_content="foo"),