diff --git a/langchain/document_loaders/recursive_url_loader.py b/langchain/document_loaders/recursive_url_loader.py index 8ff452bf6dc..7107f3a7345 100644 --- a/langchain/document_loaders/recursive_url_loader.py +++ b/langchain/document_loaders/recursive_url_loader.py @@ -2,7 +2,6 @@ from typing import Iterator, List, Optional, Set from urllib.parse import urlparse import requests -from bs4 import BeautifulSoup from langchain.docstore.document import Document from langchain.document_loaders.base import BaseLoader @@ -21,6 +20,13 @@ class RecusiveUrlLoader(BaseLoader): ) -> Set[str]: """Recursively get all child links starting with the path of the input URL.""" + try: + from bs4 import BeautifulSoup + except ImportError: + raise ImportError( + "The BeautifulSoup package is required for the RecusiveUrlLoader." + ) + # Construct the base and parent URLs parsed_url = urlparse(url) base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"