From 6cd7607816f676de62225040a70a3c89f9b6088c Mon Sep 17 00:00:00 2001
From: Christophe Bornet <cbornet@hotmail.com>
Date: Thu, 7 Mar 2024 17:50:18 +0100
Subject: [PATCH] community[patch]: Implement lazy_load() for MHTMLLoader
 (#18648)

Covered by `tests/unit_tests/document_loaders/test_mhtml.py`
---
 .../langchain_community/document_loaders/mhtml.py    | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libs/community/langchain_community/document_loaders/mhtml.py b/libs/community/langchain_community/document_loaders/mhtml.py
index 75cace9124f..8652ed9e147 100644
--- a/libs/community/langchain_community/document_loaders/mhtml.py
+++ b/libs/community/langchain_community/document_loaders/mhtml.py
@@ -1,6 +1,6 @@
 import email
 import logging
-from typing import Dict, List, Union
+from typing import Dict, Iterator, Union
 
 from langchain_core.documents import Document
 
@@ -44,11 +44,11 @@ class MHTMLLoader(BaseLoader):
         self.bs_kwargs = bs_kwargs
         self.get_text_separator = get_text_separator
 
-    def load(self) -> List[Document]:
-        from bs4 import BeautifulSoup
-
+    def lazy_load(self) -> Iterator[Document]:
         """Load MHTML document into document objects."""
 
+        from bs4 import BeautifulSoup
+
         with open(self.file_path, "r", encoding=self.open_encoding) as f:
             message = email.message_from_string(f.read())
             parts = message.get_payload()
@@ -72,5 +72,5 @@ class MHTMLLoader(BaseLoader):
                         "source": self.file_path,
                         "title": title,
                     }
-                    return [Document(page_content=text, metadata=metadata)]
-        return []
+                    yield Document(page_content=text, metadata=metadata)
+                    return