From 5d64597490cc58d2f3a84b4f6bbb71b49abcd168 Mon Sep 17 00:00:00 2001
From: Sangyun_LEE <falconlee236@cau.ac.kr>
Date: Tue, 21 Jan 2025 00:56:59 +0900
Subject: [PATCH] docs:  fix broken Appearance of
 langchain_community/document_loaders/recursive_url_loader API Reference
 (#29305)

# PR mesesage
## Description
Fixed a broken Appearance of RecurisveUrlLoader API Reference.

### Before
<p align="center">
<img width="750" alt="image"
src="https://github.com/user-attachments/assets/f39df65d-b788-411d-88af-8bfa2607c00b"
/>
<img width="750" alt="image"
src="https://github.com/user-attachments/assets/b8a92b70-4548-4b4a-965f-026faeebd0ec"
/>
</p>

### After
<p align="center">
<img width="750" alt="image"
src="https://github.com/user-attachments/assets/8ea28146-de45-42e2-b346-3004ec4dfc55"
/>
<img width="750" alt="image"
src="https://github.com/user-attachments/assets/914c6966-4055-45d3-baeb-2d97eab06fe7"
/>
</p>

## Issue:
N/A
## Dependencies
None
## Twitter handle
N/A

# Add tests and docs
Not applicable; this change only affects documentation.

# Lint and test
Ran make format, make lint, and make test to ensure no issues.
---
 .../document_loaders/recursive_url_loader.py  | 47 ++++++++++---------
 1 file changed, 24 insertions(+), 23 deletions(-)
diff --git a/libs/community/langchain_community/document_loaders/recursive_url_loader.py b/libs/community/langchain_community/document_loaders/recursive_url_loader.py
index ebf119bafc2..9c63a71ac31 100644
--- a/libs/community/langchain_community/document_loaders/recursive_url_loader.py
+++ b/libs/community/langchain_community/document_loaders/recursive_url_loader.py
@@ -53,7 +53,8 @@ def _metadata_extractor(
 class RecursiveUrlLoader(BaseLoader):
     """Recursively load all child links from a root URL.
 
-    **Security Note**: This loader is a crawler that will start crawling
+    **Security Note**:
+        This loader is a crawler that will start crawling
         at a given URL and then expand to crawl child links recursively.
 
         Web crawlers should generally NOT be deployed with network access
@@ -154,36 +155,36 @@ class RecursiveUrlLoader(BaseLoader):
         content. To parse this HTML into a more human/LLM-friendly format you can pass
         in a custom ``extractor`` method:
 
-            .. code-block:: python
+        .. code-block:: python
 
-                # This example uses `beautifulsoup4` and `lxml`
-                import re
-                from bs4 import BeautifulSoup
+            # This example uses `beautifulsoup4` and `lxml`
+            import re
+            from bs4 import BeautifulSoup
 
-                def bs4_extractor(html: str) -> str:
-                    soup = BeautifulSoup(html, "lxml")
-                    return re.sub(r"\n\n+", "\n\n", soup.text).strip()
+            def bs4_extractor(html: str) -> str:
+                soup = BeautifulSoup(html, "lxml")
+                return re.sub(r"\\n\\n+", "\\n\\n", soup.text).strip()
 
-                loader = RecursiveUrlLoader(
-                    "https://docs.python.org/3.9/",
-                    extractor=bs4_extractor,
-                )
-                print(loader.load()[0].page_content[:200])
+            loader = RecursiveUrlLoader(
+                "https://docs.python.org/3.9/",
+                extractor=bs4_extractor,
+            )
+            print(loader.load()[0].page_content[:200])
 
 
-            .. code-block:: python
+        .. code-block:: python
 
-                3.9.19 Documentation
+            3.9.19 Documentation
 
-                Download
-                Download these documents
-                Docs by version
+            Download
+            Download these documents
+            Docs by version
 
-                Python 3.13 (in development)
-                Python 3.12 (stable)
-                Python 3.11 (security-fixes)
-                Python 3.10 (security-fixes)
-                Python 3.9 (securit
+            Python 3.13 (in development)
+            Python 3.12 (stable)
+            Python 3.11 (security-fixes)
+            Python 3.10 (security-fixes)
+            Python 3.9 (securit
 
     Metadata extraction:
         Similarly to content extraction, you can specify a metadata extraction function