diff --git a/libs/text-splitters/langchain_text_splitters/html.py b/libs/text-splitters/langchain_text_splitters/html.py index 212a9abaa7b..3613937d999 100644 --- a/libs/text-splitters/langchain_text_splitters/html.py +++ b/libs/text-splitters/langchain_text_splitters/html.py @@ -696,7 +696,7 @@ class HTMLSemanticPreservingSplitter(BaseDocumentTransformer): placeholder_count: int, ) -> Tuple[List[Document], Dict[str, str], List[str], Dict[str, str], int]: for elem in element: - if elem.name.lower() in ["html", "body", "div"]: + if elem.name.lower() in ["html", "body", "div", "main"]: children = elem.find_all(recursive=False) ( documents,