community[minor]: Implement lazy_load() for OutlookMessageLoader (#18668)

Integration test:
`tests/integration_tests/document_loaders/test_email.py`
This commit is contained in:
Christophe Bornet 2024-03-06 15:15:57 +01:00 committed by GitHub
parent ae167fb5b2
commit 2d96803ddd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,5 +1,5 @@
import os import os
from typing import Any, List from typing import Any, Iterator, List
from langchain_core.documents import Document from langchain_core.documents import Document
@ -99,19 +99,16 @@ class OutlookMessageLoader(BaseLoader):
"`pip install extract_msg`" "`pip install extract_msg`"
) )
def load(self) -> List[Document]: def lazy_load(self) -> Iterator[Document]:
"""Load data into document objects."""
import extract_msg import extract_msg
msg = extract_msg.Message(self.file_path) msg = extract_msg.Message(self.file_path)
return [ yield Document(
Document( page_content=msg.body,
page_content=msg.body, metadata={
metadata={ "source": self.file_path,
"source": self.file_path, "subject": msg.subject,
"subject": msg.subject, "sender": msg.sender,
"sender": msg.sender, "date": msg.date,
"date": msg.date, },
}, )
)
]