From 0b1359801e7a9d56969f336d4fef5f79251d661d Mon Sep 17 00:00:00 2001 From: Dong Shin Date: Wed, 18 Dec 2024 11:18:16 +0900 Subject: [PATCH] community: add trust_env at web_base_loader (#28514) - **Description:** I am working to address a similar issue to the one mentioned in https://github.com/langchain-ai/langchain/pull/19499. Specifically, there is a problem with the Webbase loader used in open-webui, where it fails to load the proxy configuration. This PR aims to resolve that issue. --------- Co-authored-by: Chester Curme --- .../langchain_community/document_loaders/web_base.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libs/community/langchain_community/document_loaders/web_base.py b/libs/community/langchain_community/document_loaders/web_base.py index 26bc5504d58..94d46c55ea7 100644 --- a/libs/community/langchain_community/document_loaders/web_base.py +++ b/libs/community/langchain_community/document_loaders/web_base.py @@ -71,6 +71,7 @@ class WebBaseLoader(BaseLoader): # bs_kwargs = None, # session = None, # show_progress = True, + # trust_env = False, ) Lazy load: @@ -128,6 +129,7 @@ class WebBaseLoader(BaseLoader): session: Any = None, *, show_progress: bool = True, + trust_env: bool = False, ) -> None: """Initialize loader. @@ -140,6 +142,8 @@ class WebBaseLoader(BaseLoader): bs_get_text_kwargs: kwargs for beatifulsoup4 get_text bs_kwargs: kwargs for beatifulsoup4 web page parsing show_progress: Show progress bar when loading pages. + trust_env: set to True if using proxy to make web requests, for example + using http(s)_proxy environment variables. Defaults to False. """ # web_path kept for backwards-compatibility. if web_path and web_paths: @@ -189,6 +193,7 @@ class WebBaseLoader(BaseLoader): self.continue_on_failure = continue_on_failure self.autoset_encoding = autoset_encoding self.encoding = encoding + self.trust_env = trust_env @property def web_path(self) -> str: @@ -199,7 +204,7 @@ class WebBaseLoader(BaseLoader): async def _fetch( self, url: str, retries: int = 3, cooldown: int = 2, backoff: float = 1.5 ) -> str: - async with aiohttp.ClientSession() as session: + async with aiohttp.ClientSession(trust_env=self.trust_env) as session: for i in range(retries): try: kwargs: Dict = dict(