From 637c61cffbd279dc2431f9e224cfccec9c81f6cd Mon Sep 17 00:00:00 2001 From: Arjun Aravindan Date: Mon, 8 May 2023 11:05:55 -0400 Subject: [PATCH] Add support for passing binary_location to the SeleniumURLLoader when creating Chrome or Firefox web drivers (#4305) This commit adds support for passing binary_location to the SeleniumURLLoader when creating Chrome or Firefox web drivers. This allows users to specify the Browser binary location which is required when deploying to services such as Heroku This change also includes updated documentation and type hints to reflect the new binary_location parameter and its usage. fixes #4304 --- langchain/document_loaders/url_selenium.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/langchain/document_loaders/url_selenium.py b/langchain/document_loaders/url_selenium.py index 2aed3dce0b4..9a315ccc740 100644 --- a/langchain/document_loaders/url_selenium.py +++ b/langchain/document_loaders/url_selenium.py @@ -20,6 +20,7 @@ class SeleniumURLLoader(BaseLoader): urls (List[str]): List of URLs to load. continue_on_failure (bool): If True, continue loading other URLs on failure. browser (str): The browser to use, either 'chrome' or 'firefox'. + binary_location (Optional[str]): The location of the browser binary. executable_path (Optional[str]): The path to the browser executable. headless (bool): If True, the browser will run in headless mode. arguments [List[str]]: List of arguments to pass to the browser. @@ -30,6 +31,7 @@ class SeleniumURLLoader(BaseLoader): urls: List[str], continue_on_failure: bool = True, browser: Literal["chrome", "firefox"] = "chrome", + binary_location: Optional[str] = None, executable_path: Optional[str] = None, headless: bool = True, arguments: List[str] = [], @@ -54,6 +56,7 @@ class SeleniumURLLoader(BaseLoader): self.urls = urls self.continue_on_failure = continue_on_failure self.browser = browser + self.binary_location = binary_location self.executable_path = executable_path self.headless = headless self.arguments = arguments @@ -79,6 +82,8 @@ class SeleniumURLLoader(BaseLoader): if self.headless: chrome_options.add_argument("--headless") chrome_options.add_argument("--no-sandbox") + if self.binary_location is not None: + chrome_options.binary_location = self.binary_location if self.executable_path is None: return Chrome(options=chrome_options) return Chrome(executable_path=self.executable_path, options=chrome_options) @@ -93,6 +98,8 @@ class SeleniumURLLoader(BaseLoader): if self.headless: firefox_options.add_argument("--headless") + if self.binary_location is not None: + firefox_options.binary_location = self.binary_location if self.executable_path is None: return Firefox(options=firefox_options) return Firefox(