mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-27 00:48:45 +00:00
Make bs4 a local import in recursive_url_loader.py (#6693)
Resolve https://github.com/hwchase17/langchain/issues/6679
This commit is contained in:
parent
ef4c7b54ef
commit
dd36adc0f4
@ -2,7 +2,6 @@ from typing import Iterator, List, Optional, Set
|
|||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
|
|
||||||
from langchain.docstore.document import Document
|
from langchain.docstore.document import Document
|
||||||
from langchain.document_loaders.base import BaseLoader
|
from langchain.document_loaders.base import BaseLoader
|
||||||
@ -21,6 +20,13 @@ class RecusiveUrlLoader(BaseLoader):
|
|||||||
) -> Set[str]:
|
) -> Set[str]:
|
||||||
"""Recursively get all child links starting with the path of the input URL."""
|
"""Recursively get all child links starting with the path of the input URL."""
|
||||||
|
|
||||||
|
try:
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError(
|
||||||
|
"The BeautifulSoup package is required for the RecusiveUrlLoader."
|
||||||
|
)
|
||||||
|
|
||||||
# Construct the base and parent URLs
|
# Construct the base and parent URLs
|
||||||
parsed_url = urlparse(url)
|
parsed_url = urlparse(url)
|
||||||
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
||||||
|
Loading…
Reference in New Issue
Block a user