mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-26 16:43:35 +00:00
community: add trust_env at web_base_loader (#28514)
- **Description:** I am working to address a similar issue to the one mentioned in https://github.com/langchain-ai/langchain/pull/19499. Specifically, there is a problem with the Webbase loader used in open-webui, where it fails to load the proxy configuration. This PR aims to resolve that issue. <!--If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17.--> --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
parent
be738aa7de
commit
0b1359801e
@ -71,6 +71,7 @@ class WebBaseLoader(BaseLoader):
|
||||
# bs_kwargs = None,
|
||||
# session = None,
|
||||
# show_progress = True,
|
||||
# trust_env = False,
|
||||
)
|
||||
|
||||
Lazy load:
|
||||
@ -128,6 +129,7 @@ class WebBaseLoader(BaseLoader):
|
||||
session: Any = None,
|
||||
*,
|
||||
show_progress: bool = True,
|
||||
trust_env: bool = False,
|
||||
) -> None:
|
||||
"""Initialize loader.
|
||||
|
||||
@ -140,6 +142,8 @@ class WebBaseLoader(BaseLoader):
|
||||
bs_get_text_kwargs: kwargs for beatifulsoup4 get_text
|
||||
bs_kwargs: kwargs for beatifulsoup4 web page parsing
|
||||
show_progress: Show progress bar when loading pages.
|
||||
trust_env: set to True if using proxy to make web requests, for example
|
||||
using http(s)_proxy environment variables. Defaults to False.
|
||||
"""
|
||||
# web_path kept for backwards-compatibility.
|
||||
if web_path and web_paths:
|
||||
@ -189,6 +193,7 @@ class WebBaseLoader(BaseLoader):
|
||||
self.continue_on_failure = continue_on_failure
|
||||
self.autoset_encoding = autoset_encoding
|
||||
self.encoding = encoding
|
||||
self.trust_env = trust_env
|
||||
|
||||
@property
|
||||
def web_path(self) -> str:
|
||||
@ -199,7 +204,7 @@ class WebBaseLoader(BaseLoader):
|
||||
async def _fetch(
|
||||
self, url: str, retries: int = 3, cooldown: int = 2, backoff: float = 1.5
|
||||
) -> str:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with aiohttp.ClientSession(trust_env=self.trust_env) as session:
|
||||
for i in range(retries):
|
||||
try:
|
||||
kwargs: Dict = dict(
|
||||
|
Loading…
Reference in New Issue
Block a user