Make bs4 a local import in recursive_url_loader.py (#6693)

Resolve https://github.com/hwchase17/langchain/issues/6679
This commit is contained in:
Lance Martin 2023-06-24 13:54:10 -07:00 committed by GitHub
parent ef4c7b54ef
commit dd36adc0f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2,7 +2,6 @@ from typing import Iterator, List, Optional, Set
from urllib.parse import urlparse from urllib.parse import urlparse
import requests import requests
from bs4 import BeautifulSoup
from langchain.docstore.document import Document from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader from langchain.document_loaders.base import BaseLoader
@ -21,6 +20,13 @@ class RecusiveUrlLoader(BaseLoader):
) -> Set[str]: ) -> Set[str]:
"""Recursively get all child links starting with the path of the input URL.""" """Recursively get all child links starting with the path of the input URL."""
try:
from bs4 import BeautifulSoup
except ImportError:
raise ImportError(
"The BeautifulSoup package is required for the RecusiveUrlLoader."
)
# Construct the base and parent URLs # Construct the base and parent URLs
parsed_url = urlparse(url) parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"