mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-25 16:13:25 +00:00
community: add flag to toggle progress bar (#24463)
- **Description:** Add a flag to determine whether to show progress bar - **Issue:** n/a - **Dependencies:** n/a - **Twitter handle:** n/a --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
parent
6b08a33fa4
commit
d98b830e4b
@ -41,6 +41,7 @@ class BlackboardLoader(WebBaseLoader):
|
|||||||
basic_auth: Optional[Tuple[str, str]] = None,
|
basic_auth: Optional[Tuple[str, str]] = None,
|
||||||
cookies: Optional[dict] = None,
|
cookies: Optional[dict] = None,
|
||||||
continue_on_failure: bool = False,
|
continue_on_failure: bool = False,
|
||||||
|
show_progress: bool = True,
|
||||||
):
|
):
|
||||||
"""Initialize with blackboard course url.
|
"""Initialize with blackboard course url.
|
||||||
|
|
||||||
@ -56,12 +57,15 @@ class BlackboardLoader(WebBaseLoader):
|
|||||||
occurs loading a url, emitting a warning instead of raising an
|
occurs loading a url, emitting a warning instead of raising an
|
||||||
exception. Setting this to True makes the loader more robust, but also
|
exception. Setting this to True makes the loader more robust, but also
|
||||||
may result in missing data. Default: False
|
may result in missing data. Default: False
|
||||||
|
show_progress: whether to show a progress bar while loading. Default: True
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
ValueError: If blackboard course url is invalid.
|
ValueError: If blackboard course url is invalid.
|
||||||
"""
|
"""
|
||||||
super().__init__(
|
super().__init__(
|
||||||
web_paths=(blackboard_course_url), continue_on_failure=continue_on_failure
|
web_paths=(blackboard_course_url),
|
||||||
|
continue_on_failure=continue_on_failure,
|
||||||
|
show_progress=show_progress,
|
||||||
)
|
)
|
||||||
# Get base url
|
# Get base url
|
||||||
try:
|
try:
|
||||||
|
@ -20,6 +20,7 @@ class GitbookLoader(WebBaseLoader):
|
|||||||
base_url: Optional[str] = None,
|
base_url: Optional[str] = None,
|
||||||
content_selector: str = "main",
|
content_selector: str = "main",
|
||||||
continue_on_failure: bool = False,
|
continue_on_failure: bool = False,
|
||||||
|
show_progress: bool = True,
|
||||||
):
|
):
|
||||||
"""Initialize with web page and whether to load all paths.
|
"""Initialize with web page and whether to load all paths.
|
||||||
|
|
||||||
@ -36,6 +37,7 @@ class GitbookLoader(WebBaseLoader):
|
|||||||
occurs loading a url, emitting a warning instead of raising an
|
occurs loading a url, emitting a warning instead of raising an
|
||||||
exception. Setting this to True makes the loader more robust, but also
|
exception. Setting this to True makes the loader more robust, but also
|
||||||
may result in missing data. Default: False
|
may result in missing data. Default: False
|
||||||
|
show_progress: whether to show a progress bar while loading. Default: True
|
||||||
"""
|
"""
|
||||||
self.base_url = base_url or web_page
|
self.base_url = base_url or web_page
|
||||||
if self.base_url.endswith("/"):
|
if self.base_url.endswith("/"):
|
||||||
@ -43,7 +45,11 @@ class GitbookLoader(WebBaseLoader):
|
|||||||
if load_all_paths:
|
if load_all_paths:
|
||||||
# set web_path to the sitemap if we want to crawl all paths
|
# set web_path to the sitemap if we want to crawl all paths
|
||||||
web_page = f"{self.base_url}/sitemap.xml"
|
web_page = f"{self.base_url}/sitemap.xml"
|
||||||
super().__init__(web_paths=(web_page,), continue_on_failure=continue_on_failure)
|
super().__init__(
|
||||||
|
web_paths=(web_page,),
|
||||||
|
continue_on_failure=continue_on_failure,
|
||||||
|
show_progress=show_progress,
|
||||||
|
)
|
||||||
self.load_all_paths = load_all_paths
|
self.load_all_paths = load_all_paths
|
||||||
self.content_selector = content_selector
|
self.content_selector = content_selector
|
||||||
|
|
||||||
|
@ -58,6 +58,8 @@ class WebBaseLoader(BaseLoader):
|
|||||||
bs_get_text_kwargs: Optional[Dict[str, Any]] = None,
|
bs_get_text_kwargs: Optional[Dict[str, Any]] = None,
|
||||||
bs_kwargs: Optional[Dict[str, Any]] = None,
|
bs_kwargs: Optional[Dict[str, Any]] = None,
|
||||||
session: Any = None,
|
session: Any = None,
|
||||||
|
*,
|
||||||
|
show_progress: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize loader.
|
"""Initialize loader.
|
||||||
|
|
||||||
@ -69,6 +71,7 @@ class WebBaseLoader(BaseLoader):
|
|||||||
raise_for_status: Raise an exception if http status code denotes an error.
|
raise_for_status: Raise an exception if http status code denotes an error.
|
||||||
bs_get_text_kwargs: kwargs for beatifulsoup4 get_text
|
bs_get_text_kwargs: kwargs for beatifulsoup4 get_text
|
||||||
bs_kwargs: kwargs for beatifulsoup4 web page parsing
|
bs_kwargs: kwargs for beatifulsoup4 web page parsing
|
||||||
|
show_progress: Show progress bar when loading pages.
|
||||||
"""
|
"""
|
||||||
# web_path kept for backwards-compatibility.
|
# web_path kept for backwards-compatibility.
|
||||||
if web_path and web_paths:
|
if web_path and web_paths:
|
||||||
@ -91,6 +94,7 @@ class WebBaseLoader(BaseLoader):
|
|||||||
self.default_parser = default_parser
|
self.default_parser = default_parser
|
||||||
self.requests_kwargs = requests_kwargs or {}
|
self.requests_kwargs = requests_kwargs or {}
|
||||||
self.raise_for_status = raise_for_status
|
self.raise_for_status = raise_for_status
|
||||||
|
self.show_progress = show_progress
|
||||||
self.bs_get_text_kwargs = bs_get_text_kwargs or {}
|
self.bs_get_text_kwargs = bs_get_text_kwargs or {}
|
||||||
self.bs_kwargs = bs_kwargs or {}
|
self.bs_kwargs = bs_kwargs or {}
|
||||||
if session:
|
if session:
|
||||||
@ -177,11 +181,14 @@ class WebBaseLoader(BaseLoader):
|
|||||||
task = asyncio.ensure_future(self._fetch_with_rate_limit(url, semaphore))
|
task = asyncio.ensure_future(self._fetch_with_rate_limit(url, semaphore))
|
||||||
tasks.append(task)
|
tasks.append(task)
|
||||||
try:
|
try:
|
||||||
|
if self.show_progress:
|
||||||
from tqdm.asyncio import tqdm_asyncio
|
from tqdm.asyncio import tqdm_asyncio
|
||||||
|
|
||||||
return await tqdm_asyncio.gather(
|
return await tqdm_asyncio.gather(
|
||||||
*tasks, desc="Fetching pages", ascii=True, mininterval=1
|
*tasks, desc="Fetching pages", ascii=True, mininterval=1
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
return await asyncio.gather(*tasks)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
warnings.warn("For better logging of progress, `pip install tqdm`")
|
warnings.warn("For better logging of progress, `pip install tqdm`")
|
||||||
return await asyncio.gather(*tasks)
|
return await asyncio.gather(*tasks)
|
||||||
|
Loading…
Reference in New Issue
Block a user