From a6b5d45e310234a595f5884b77a0bf63d332813a Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Wed, 6 Mar 2024 02:29:52 +0100 Subject: [PATCH] community[patch]: Implement lazy_load() for EverNoteLoader (#18538) Covered by `test_evernote_loader.py` --- .../document_loaders/evernote.py | 43 +++++++++---------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/libs/community/langchain_community/document_loaders/evernote.py b/libs/community/langchain_community/document_loaders/evernote.py index 836f393d784..88a3efa8b1d 100644 --- a/libs/community/langchain_community/document_loaders/evernote.py +++ b/libs/community/langchain_community/document_loaders/evernote.py @@ -40,33 +40,32 @@ class EverNoteLoader(BaseLoader): self.file_path = file_path self.load_single_document = load_single_document - def load(self) -> List[Document]: - """Load documents from EverNote export file.""" - documents = [ - Document( - page_content=note["content"], - metadata={ - **{ - key: value - for key, value in note.items() - if key not in ["content", "content-raw", "resource"] + def _lazy_load(self) -> Iterator[Document]: + for note in self._parse_note_xml(self.file_path): + if note.get("content") is not None: + yield Document( + page_content=note["content"], + metadata={ + **{ + key: value + for key, value in note.items() + if key not in ["content", "content-raw", "resource"] + }, + **{"source": self.file_path}, }, - **{"source": self.file_path}, - }, - ) - for note in self._parse_note_xml(self.file_path) - if note.get("content") is not None - ] + ) + def lazy_load(self) -> Iterator[Document]: + """Load documents from EverNote export file.""" if not self.load_single_document: - return documents - - return [ - Document( - page_content="".join([document.page_content for document in documents]), + yield from self._lazy_load() + else: + yield Document( + page_content="".join( + [document.page_content for document in self._lazy_load()] + ), metadata={"source": self.file_path}, ) - ] @staticmethod def _parse_content(content: str) -> str: