docs: fix broken Appearance of langchain_community/document_loaders/recursive_url_loader API Reference (#29305)

# PR mesesage ## Description Fixed a broken Appearance of RecurisveUrlLoader API Reference. ### Before <p align="center"> <img width="750" alt="image" src="https://github.com/user-attachments/assets/f39df65d-b788-411d-88af-8bfa2607c00b" /> <img width="750" alt="image" src="https://github.com/user-attachments/assets/b8a92b70-4548-4b4a-965f-026faeebd0ec" /> </p> ### After <p align="center"> <img width="750" alt="image" src="https://github.com/user-attachments/assets/8ea28146-de45-42e2-b346-3004ec4dfc55" /> <img width="750" alt="image" src="https://github.com/user-attachments/assets/914c6966-4055-45d3-baeb-2d97eab06fe7" /> </p> ## Issue: N/A ## Dependencies None ## Twitter handle N/A # Add tests and docs Not applicable; this change only affects documentation. # Lint and test Ran make format, make lint, and make test to ensure no issues.
2025-09-25 21:37:20 +00:00 · 2025-01-21 00:56:59 +09:00
parent 6c52378992
commit 5d64597490
1 changed files with 24 additions and 23 deletions
--- a/libs/community/langchain_community/document_loaders/recursive_url_loader.py
+++ b/libs/community/langchain_community/document_loaders/recursive_url_loader.py
@@ -53,7 +53,8 @@ def _metadata_extractor(
 class RecursiveUrlLoader(BaseLoader):
    """Recursively load all child links from a root URL.

-    **Security Note**: This loader is a crawler that will start crawling
+    **Security Note**:
+        This loader is a crawler that will start crawling
        at a given URL and then expand to crawl child links recursively.

        Web crawlers should generally NOT be deployed with network access
@@ -162,7 +163,7 @@ class RecursiveUrlLoader(BaseLoader):

            def bs4_extractor(html: str) -> str:
                soup = BeautifulSoup(html, "lxml")
-                    return re.sub(r"\n\n+", "\n\n", soup.text).strip()
+                return re.sub(r"\\n\\n+", "\\n\\n", soup.text).strip()

            loader = RecursiveUrlLoader(
                "https://docs.python.org/3.9/",