mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-27 00:48:45 +00:00
community: add trust_env at web_base_loader (#28514)
- **Description:** I am working to address a similar issue to the one mentioned in https://github.com/langchain-ai/langchain/pull/19499. Specifically, there is a problem with the Webbase loader used in open-webui, where it fails to load the proxy configuration. This PR aims to resolve that issue. <!--If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17.--> --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
parent
be738aa7de
commit
0b1359801e
@ -71,6 +71,7 @@ class WebBaseLoader(BaseLoader):
|
|||||||
# bs_kwargs = None,
|
# bs_kwargs = None,
|
||||||
# session = None,
|
# session = None,
|
||||||
# show_progress = True,
|
# show_progress = True,
|
||||||
|
# trust_env = False,
|
||||||
)
|
)
|
||||||
|
|
||||||
Lazy load:
|
Lazy load:
|
||||||
@ -128,6 +129,7 @@ class WebBaseLoader(BaseLoader):
|
|||||||
session: Any = None,
|
session: Any = None,
|
||||||
*,
|
*,
|
||||||
show_progress: bool = True,
|
show_progress: bool = True,
|
||||||
|
trust_env: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize loader.
|
"""Initialize loader.
|
||||||
|
|
||||||
@ -140,6 +142,8 @@ class WebBaseLoader(BaseLoader):
|
|||||||
bs_get_text_kwargs: kwargs for beatifulsoup4 get_text
|
bs_get_text_kwargs: kwargs for beatifulsoup4 get_text
|
||||||
bs_kwargs: kwargs for beatifulsoup4 web page parsing
|
bs_kwargs: kwargs for beatifulsoup4 web page parsing
|
||||||
show_progress: Show progress bar when loading pages.
|
show_progress: Show progress bar when loading pages.
|
||||||
|
trust_env: set to True if using proxy to make web requests, for example
|
||||||
|
using http(s)_proxy environment variables. Defaults to False.
|
||||||
"""
|
"""
|
||||||
# web_path kept for backwards-compatibility.
|
# web_path kept for backwards-compatibility.
|
||||||
if web_path and web_paths:
|
if web_path and web_paths:
|
||||||
@ -189,6 +193,7 @@ class WebBaseLoader(BaseLoader):
|
|||||||
self.continue_on_failure = continue_on_failure
|
self.continue_on_failure = continue_on_failure
|
||||||
self.autoset_encoding = autoset_encoding
|
self.autoset_encoding = autoset_encoding
|
||||||
self.encoding = encoding
|
self.encoding = encoding
|
||||||
|
self.trust_env = trust_env
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def web_path(self) -> str:
|
def web_path(self) -> str:
|
||||||
@ -199,7 +204,7 @@ class WebBaseLoader(BaseLoader):
|
|||||||
async def _fetch(
|
async def _fetch(
|
||||||
self, url: str, retries: int = 3, cooldown: int = 2, backoff: float = 1.5
|
self, url: str, retries: int = 3, cooldown: int = 2, backoff: float = 1.5
|
||||||
) -> str:
|
) -> str:
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession(trust_env=self.trust_env) as session:
|
||||||
for i in range(retries):
|
for i in range(retries):
|
||||||
try:
|
try:
|
||||||
kwargs: Dict = dict(
|
kwargs: Dict = dict(
|
||||||
|
Loading…
Reference in New Issue
Block a user