mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-05 20:58:25 +00:00
community[patch]: BaseLoader load method should just delegate to lazy_load (#18289)
load() should just reference lazy_load()
This commit is contained in:
parent
5efb5c099f
commit
51b661cfe8
@ -19,22 +19,22 @@ class BaseLoader(ABC):
|
||||
Implementations should implement the lazy-loading method using generators
|
||||
to avoid loading all Documents into memory at once.
|
||||
|
||||
The `load` method will remain as is for backwards compatibility, but its
|
||||
implementation should be just `list(self.lazy_load())`.
|
||||
`load` is provided just for user convenience and should not be overridden.
|
||||
"""
|
||||
|
||||
# Sub-classes should implement this method
|
||||
# as return list(self.lazy_load()).
|
||||
# This method returns a List which is materialized in memory.
|
||||
@abstractmethod
|
||||
# Sub-classes should not implement this method directly. Instead, they
|
||||
# should implement the lazy load method.
|
||||
def load(self) -> List[Document]:
|
||||
"""Load data into Document objects."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def load_and_split(
|
||||
self, text_splitter: Optional[TextSplitter] = None
|
||||
) -> List[Document]:
|
||||
"""Load Documents and split into chunks. Chunks are returned as Documents.
|
||||
|
||||
Do not override this method. It should be considered to be deprecated!
|
||||
|
||||
Args:
|
||||
text_splitter: TextSplitter instance to use for splitting documents.
|
||||
Defaults to RecursiveCharacterTextSplitter.
|
||||
|
@ -1,5 +1,5 @@
|
||||
"""Test Base Schema of documents."""
|
||||
from typing import Iterator, List
|
||||
from typing import Iterator
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@ -31,9 +31,6 @@ def test_base_blob_parser() -> None:
|
||||
|
||||
async def test_default_aload() -> None:
|
||||
class FakeLoader(BaseLoader):
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
yield from [
|
||||
Document(page_content="foo"),
|
||||
|
Loading…
Reference in New Issue
Block a user