mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-19 01:21:50 +00:00
feat: added tqdm progress bar to UnstructuredURLLoader (#6600)
- Description: Adds a simple progress bar with tqdm when using UnstructuredURLLoader. Exposes new paramater `show_progress_bar`. Very simple PR. - Issue: N/A - Dependencies: N/A - Tag maintainer: @rlancemartin @eyurtsev --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
This commit is contained in:
parent
afc292e58d
commit
b32cc01c9f
@ -16,6 +16,7 @@ class UnstructuredURLLoader(BaseLoader):
|
||||
urls: List[str],
|
||||
continue_on_failure: bool = True,
|
||||
mode: str = "single",
|
||||
show_progress_bar: bool = False,
|
||||
**unstructured_kwargs: Any,
|
||||
):
|
||||
"""Initialize with file path."""
|
||||
@ -51,6 +52,7 @@ class UnstructuredURLLoader(BaseLoader):
|
||||
self.continue_on_failure = continue_on_failure
|
||||
self.headers = headers
|
||||
self.unstructured_kwargs = unstructured_kwargs
|
||||
self.show_progress_bar = show_progress_bar
|
||||
|
||||
def _validate_mode(self, mode: str) -> None:
|
||||
_valid_modes = {"single", "elements"}
|
||||
@ -83,7 +85,21 @@ class UnstructuredURLLoader(BaseLoader):
|
||||
from unstructured.partition.html import partition_html
|
||||
|
||||
docs: List[Document] = list()
|
||||
for url in self.urls:
|
||||
if self.show_progress_bar:
|
||||
try:
|
||||
from tqdm import tqdm
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Package tqdm must be installed if show_progress_bar=True. "
|
||||
"Please install with 'pip install tqdm' or set "
|
||||
"show_progress_bar=False."
|
||||
) from e
|
||||
|
||||
urls = tqdm(self.urls)
|
||||
else:
|
||||
urls = self.urls
|
||||
|
||||
for url in urls:
|
||||
try:
|
||||
if self.__is_non_html_available():
|
||||
if self.__is_headers_available_for_non_html():
|
||||
|
Loading…
Reference in New Issue
Block a user