community[patch]: BaseLoader load method should just delegate to lazy_load (#18289)

load() should just reference lazy_load()
This commit is contained in:
Eugene Yurtsev 2024-02-29 21:45:28 -05:00 committed by GitHub
parent 5efb5c099f
commit 51b661cfe8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 7 additions and 10 deletions

View File

@ -19,22 +19,22 @@ class BaseLoader(ABC):
Implementations should implement the lazy-loading method using generators Implementations should implement the lazy-loading method using generators
to avoid loading all Documents into memory at once. to avoid loading all Documents into memory at once.
The `load` method will remain as is for backwards compatibility, but its `load` is provided just for user convenience and should not be overridden.
implementation should be just `list(self.lazy_load())`.
""" """
# Sub-classes should implement this method # Sub-classes should not implement this method directly. Instead, they
# as return list(self.lazy_load()). # should implement the lazy load method.
# This method returns a List which is materialized in memory.
@abstractmethod
def load(self) -> List[Document]: def load(self) -> List[Document]:
"""Load data into Document objects.""" """Load data into Document objects."""
return list(self.lazy_load())
def load_and_split( def load_and_split(
self, text_splitter: Optional[TextSplitter] = None self, text_splitter: Optional[TextSplitter] = None
) -> List[Document]: ) -> List[Document]:
"""Load Documents and split into chunks. Chunks are returned as Documents. """Load Documents and split into chunks. Chunks are returned as Documents.
Do not override this method. It should be considered to be deprecated!
Args: Args:
text_splitter: TextSplitter instance to use for splitting documents. text_splitter: TextSplitter instance to use for splitting documents.
Defaults to RecursiveCharacterTextSplitter. Defaults to RecursiveCharacterTextSplitter.

View File

@ -1,5 +1,5 @@
"""Test Base Schema of documents.""" """Test Base Schema of documents."""
from typing import Iterator, List from typing import Iterator
from langchain_core.documents import Document from langchain_core.documents import Document
@ -31,9 +31,6 @@ def test_base_blob_parser() -> None:
async def test_default_aload() -> None: async def test_default_aload() -> None:
class FakeLoader(BaseLoader): class FakeLoader(BaseLoader):
def load(self) -> List[Document]:
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]: def lazy_load(self) -> Iterator[Document]:
yield from [ yield from [
Document(page_content="foo"), Document(page_content="foo"),