mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-07 13:40:46 +00:00
feat: Allow users to pass additional arguments to the WebDriver (#4121)
This commit adds support for passing additional arguments to the `SeleniumURLLoader ` when creating Chrome or Firefox web drivers. Previously, only a few arguments such as `headless` could be passed in. With this change, users can pass any additional arguments they need as a list of strings using the `arguments` parameter. The `arguments` parameter allows users to configure the driver with any options that are available for that particular browser. For example, users can now pass custom `user_agent` strings or `proxy` settings using this parameter. This change also includes updated documentation and type hints to reflect the new `arguments` parameter and its usage. fixes #4120
This commit is contained in:
parent
2a3c5f8353
commit
19e28d8784
@ -22,6 +22,7 @@ class SeleniumURLLoader(BaseLoader):
|
||||
browser (str): The browser to use, either 'chrome' or 'firefox'.
|
||||
executable_path (Optional[str]): The path to the browser executable.
|
||||
headless (bool): If True, the browser will run in headless mode.
|
||||
arguments [List[str]]: List of arguments to pass to the browser.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@ -31,6 +32,7 @@ class SeleniumURLLoader(BaseLoader):
|
||||
browser: Literal["chrome", "firefox"] = "chrome",
|
||||
executable_path: Optional[str] = None,
|
||||
headless: bool = True,
|
||||
arguments: List[str] = [],
|
||||
):
|
||||
"""Load a list of URLs using Selenium and unstructured."""
|
||||
try:
|
||||
@ -54,6 +56,7 @@ class SeleniumURLLoader(BaseLoader):
|
||||
self.browser = browser
|
||||
self.executable_path = executable_path
|
||||
self.headless = headless
|
||||
self.arguments = arguments
|
||||
|
||||
def _get_driver(self) -> Union["Chrome", "Firefox"]:
|
||||
"""Create and return a WebDriver instance based on the specified browser.
|
||||
@ -69,6 +72,10 @@ class SeleniumURLLoader(BaseLoader):
|
||||
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||
|
||||
chrome_options = ChromeOptions()
|
||||
|
||||
for arg in self.arguments:
|
||||
chrome_options.add_argument(arg)
|
||||
|
||||
if self.headless:
|
||||
chrome_options.add_argument("--headless")
|
||||
chrome_options.add_argument("--no-sandbox")
|
||||
@ -80,6 +87,10 @@ class SeleniumURLLoader(BaseLoader):
|
||||
from selenium.webdriver.firefox.options import Options as FirefoxOptions
|
||||
|
||||
firefox_options = FirefoxOptions()
|
||||
|
||||
for arg in self.arguments:
|
||||
firefox_options.add_argument(arg)
|
||||
|
||||
if self.headless:
|
||||
firefox_options.add_argument("--headless")
|
||||
if self.executable_path is None:
|
||||
|
Loading…
Reference in New Issue
Block a user