From dd36adc0f4cd7dcb6dd00ec57ce6bad9d2ef671f Mon Sep 17 00:00:00 2001 From: Lance Martin <122662504+rlancemartin@users.noreply.github.com> Date: Sat, 24 Jun 2023 13:54:10 -0700 Subject: [PATCH] Make bs4 a local import in recursive_url_loader.py (#6693) Resolve https://github.com/hwchase17/langchain/issues/6679 --- langchain/document_loaders/recursive_url_loader.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/langchain/document_loaders/recursive_url_loader.py b/langchain/document_loaders/recursive_url_loader.py index 8ff452bf6dc..7107f3a7345 100644 --- a/langchain/document_loaders/recursive_url_loader.py +++ b/langchain/document_loaders/recursive_url_loader.py @@ -2,7 +2,6 @@ from typing import Iterator, List, Optional, Set from urllib.parse import urlparse import requests -from bs4 import BeautifulSoup from langchain.docstore.document import Document from langchain.document_loaders.base import BaseLoader @@ -21,6 +20,13 @@ class RecusiveUrlLoader(BaseLoader): ) -> Set[str]: """Recursively get all child links starting with the path of the input URL.""" + try: + from bs4 import BeautifulSoup + except ImportError: + raise ImportError( + "The BeautifulSoup package is required for the RecusiveUrlLoader." + ) + # Construct the base and parent URLs parsed_url = urlparse(url) base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"