community: add flag to toggle progress bar (#24463)

- **Description:** Add a flag to determine whether to show progress bar 
- **Issue:** n/a
- **Dependencies:** n/a
- **Twitter handle:** n/a

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
clement.l
2024-07-20 21:18:02 +08:00
committed by GitHub
parent 6b08a33fa4
commit d98b830e4b
3 changed files with 23 additions and 6 deletions

View File

@@ -58,6 +58,8 @@ class WebBaseLoader(BaseLoader):
bs_get_text_kwargs: Optional[Dict[str, Any]] = None,
bs_kwargs: Optional[Dict[str, Any]] = None,
session: Any = None,
*,
show_progress: bool = True,
) -> None:
"""Initialize loader.
@@ -69,6 +71,7 @@ class WebBaseLoader(BaseLoader):
raise_for_status: Raise an exception if http status code denotes an error.
bs_get_text_kwargs: kwargs for beatifulsoup4 get_text
bs_kwargs: kwargs for beatifulsoup4 web page parsing
show_progress: Show progress bar when loading pages.
"""
# web_path kept for backwards-compatibility.
if web_path and web_paths:
@@ -91,6 +94,7 @@ class WebBaseLoader(BaseLoader):
self.default_parser = default_parser
self.requests_kwargs = requests_kwargs or {}
self.raise_for_status = raise_for_status
self.show_progress = show_progress
self.bs_get_text_kwargs = bs_get_text_kwargs or {}
self.bs_kwargs = bs_kwargs or {}
if session:
@@ -177,11 +181,14 @@ class WebBaseLoader(BaseLoader):
task = asyncio.ensure_future(self._fetch_with_rate_limit(url, semaphore))
tasks.append(task)
try:
from tqdm.asyncio import tqdm_asyncio
if self.show_progress:
from tqdm.asyncio import tqdm_asyncio
return await tqdm_asyncio.gather(
*tasks, desc="Fetching pages", ascii=True, mininterval=1
)
return await tqdm_asyncio.gather(
*tasks, desc="Fetching pages", ascii=True, mininterval=1
)
else:
return await asyncio.gather(*tasks)
except ImportError:
warnings.warn("For better logging of progress, `pip install tqdm`")
return await asyncio.gather(*tasks)