mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-25 16:13:25 +00:00
Make bs4 a local import in recursive_url_loader.py (#6693)
Resolve https://github.com/hwchase17/langchain/issues/6679
This commit is contained in:
parent
ef4c7b54ef
commit
dd36adc0f4
@ -2,7 +2,6 @@ from typing import Iterator, List, Optional, Set
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
@ -21,6 +20,13 @@ class RecusiveUrlLoader(BaseLoader):
|
||||
) -> Set[str]:
|
||||
"""Recursively get all child links starting with the path of the input URL."""
|
||||
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The BeautifulSoup package is required for the RecusiveUrlLoader."
|
||||
)
|
||||
|
||||
# Construct the base and parent URLs
|
||||
parsed_url = urlparse(url)
|
||||
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
||||
|
Loading…
Reference in New Issue
Block a user