From 2d96803ddda9d5d431328a85d06a732af9ab3b12 Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Wed, 6 Mar 2024 15:15:57 +0100 Subject: [PATCH] community[minor]: Implement lazy_load() for OutlookMessageLoader (#18668) Integration test: `tests/integration_tests/document_loaders/test_email.py` --- .../document_loaders/email.py | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/libs/community/langchain_community/document_loaders/email.py b/libs/community/langchain_community/document_loaders/email.py index 277fdcc640c..66b460e95a2 100644 --- a/libs/community/langchain_community/document_loaders/email.py +++ b/libs/community/langchain_community/document_loaders/email.py @@ -1,5 +1,5 @@ import os -from typing import Any, List +from typing import Any, Iterator, List from langchain_core.documents import Document @@ -99,19 +99,16 @@ class OutlookMessageLoader(BaseLoader): "`pip install extract_msg`" ) - def load(self) -> List[Document]: - """Load data into document objects.""" + def lazy_load(self) -> Iterator[Document]: import extract_msg msg = extract_msg.Message(self.file_path) - return [ - Document( - page_content=msg.body, - metadata={ - "source": self.file_path, - "subject": msg.subject, - "sender": msg.sender, - "date": msg.date, - }, - ) - ] + yield Document( + page_content=msg.body, + metadata={ + "source": self.file_path, + "subject": msg.subject, + "sender": msg.sender, + "date": msg.date, + }, + )