mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-20 01:49:51 +00:00
feat: added tqdm progress bar to UnstructuredURLLoader (#6600)
- Description: Adds a simple progress bar with tqdm when using UnstructuredURLLoader. Exposes new paramater `show_progress_bar`. Very simple PR. - Issue: N/A - Dependencies: N/A - Tag maintainer: @rlancemartin @eyurtsev --------- Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
This commit is contained in:
parent
afc292e58d
commit
b32cc01c9f
@ -16,6 +16,7 @@ class UnstructuredURLLoader(BaseLoader):
|
|||||||
urls: List[str],
|
urls: List[str],
|
||||||
continue_on_failure: bool = True,
|
continue_on_failure: bool = True,
|
||||||
mode: str = "single",
|
mode: str = "single",
|
||||||
|
show_progress_bar: bool = False,
|
||||||
**unstructured_kwargs: Any,
|
**unstructured_kwargs: Any,
|
||||||
):
|
):
|
||||||
"""Initialize with file path."""
|
"""Initialize with file path."""
|
||||||
@ -51,6 +52,7 @@ class UnstructuredURLLoader(BaseLoader):
|
|||||||
self.continue_on_failure = continue_on_failure
|
self.continue_on_failure = continue_on_failure
|
||||||
self.headers = headers
|
self.headers = headers
|
||||||
self.unstructured_kwargs = unstructured_kwargs
|
self.unstructured_kwargs = unstructured_kwargs
|
||||||
|
self.show_progress_bar = show_progress_bar
|
||||||
|
|
||||||
def _validate_mode(self, mode: str) -> None:
|
def _validate_mode(self, mode: str) -> None:
|
||||||
_valid_modes = {"single", "elements"}
|
_valid_modes = {"single", "elements"}
|
||||||
@ -83,7 +85,21 @@ class UnstructuredURLLoader(BaseLoader):
|
|||||||
from unstructured.partition.html import partition_html
|
from unstructured.partition.html import partition_html
|
||||||
|
|
||||||
docs: List[Document] = list()
|
docs: List[Document] = list()
|
||||||
for url in self.urls:
|
if self.show_progress_bar:
|
||||||
|
try:
|
||||||
|
from tqdm import tqdm
|
||||||
|
except ImportError as e:
|
||||||
|
raise ImportError(
|
||||||
|
"Package tqdm must be installed if show_progress_bar=True. "
|
||||||
|
"Please install with 'pip install tqdm' or set "
|
||||||
|
"show_progress_bar=False."
|
||||||
|
) from e
|
||||||
|
|
||||||
|
urls = tqdm(self.urls)
|
||||||
|
else:
|
||||||
|
urls = self.urls
|
||||||
|
|
||||||
|
for url in urls:
|
||||||
try:
|
try:
|
||||||
if self.__is_non_html_available():
|
if self.__is_non_html_available():
|
||||||
if self.__is_headers_available_for_non_html():
|
if self.__is_headers_available_for_non_html():
|
||||||
|
Loading…
Reference in New Issue
Block a user