diff --git a/libs/community/langchain_community/document_loaders/html_bs.py b/libs/community/langchain_community/document_loaders/html_bs.py index 3f5b0149b44..09b7489ddaa 100644 --- a/libs/community/langchain_community/document_loaders/html_bs.py +++ b/libs/community/langchain_community/document_loaders/html_bs.py @@ -1,5 +1,5 @@ import logging -from typing import Dict, List, Union +from typing import Dict, Iterator, Union from langchain_core.documents import Document @@ -42,7 +42,7 @@ class BSHTMLLoader(BaseLoader): self.bs_kwargs = bs_kwargs self.get_text_separator = get_text_separator - def load(self) -> List[Document]: + def lazy_load(self) -> Iterator[Document]: """Load HTML document into document objects.""" from bs4 import BeautifulSoup @@ -60,4 +60,4 @@ class BSHTMLLoader(BaseLoader): "source": self.file_path, "title": title, } - return [Document(page_content=text, metadata=metadata)] + yield Document(page_content=text, metadata=metadata)