Make bs4 a local import in recursive_url_loader.py (#6693)

Resolve https://github.com/hwchase17/langchain/issues/6679
This commit is contained in:
Lance Martin 2023-06-24 13:54:10 -07:00 committed by GitHub
parent ef4c7b54ef
commit dd36adc0f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2,7 +2,6 @@ from typing import Iterator, List, Optional, Set
from urllib.parse import urlparse
import requests
from bs4 import BeautifulSoup
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
@ -21,6 +20,13 @@ class RecusiveUrlLoader(BaseLoader):
) -> Set[str]:
"""Recursively get all child links starting with the path of the input URL."""
try:
from bs4 import BeautifulSoup
except ImportError:
raise ImportError(
"The BeautifulSoup package is required for the RecusiveUrlLoader."
)
# Construct the base and parent URLs
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"