mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-07 21:50:25 +00:00
feat: Allow users to pass additional arguments to the WebDriver (#4121)
This commit adds support for passing additional arguments to the `SeleniumURLLoader ` when creating Chrome or Firefox web drivers. Previously, only a few arguments such as `headless` could be passed in. With this change, users can pass any additional arguments they need as a list of strings using the `arguments` parameter. The `arguments` parameter allows users to configure the driver with any options that are available for that particular browser. For example, users can now pass custom `user_agent` strings or `proxy` settings using this parameter. This change also includes updated documentation and type hints to reflect the new `arguments` parameter and its usage. fixes #4120
This commit is contained in:
parent
2a3c5f8353
commit
19e28d8784
@ -22,6 +22,7 @@ class SeleniumURLLoader(BaseLoader):
|
|||||||
browser (str): The browser to use, either 'chrome' or 'firefox'.
|
browser (str): The browser to use, either 'chrome' or 'firefox'.
|
||||||
executable_path (Optional[str]): The path to the browser executable.
|
executable_path (Optional[str]): The path to the browser executable.
|
||||||
headless (bool): If True, the browser will run in headless mode.
|
headless (bool): If True, the browser will run in headless mode.
|
||||||
|
arguments [List[str]]: List of arguments to pass to the browser.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -31,6 +32,7 @@ class SeleniumURLLoader(BaseLoader):
|
|||||||
browser: Literal["chrome", "firefox"] = "chrome",
|
browser: Literal["chrome", "firefox"] = "chrome",
|
||||||
executable_path: Optional[str] = None,
|
executable_path: Optional[str] = None,
|
||||||
headless: bool = True,
|
headless: bool = True,
|
||||||
|
arguments: List[str] = [],
|
||||||
):
|
):
|
||||||
"""Load a list of URLs using Selenium and unstructured."""
|
"""Load a list of URLs using Selenium and unstructured."""
|
||||||
try:
|
try:
|
||||||
@ -54,6 +56,7 @@ class SeleniumURLLoader(BaseLoader):
|
|||||||
self.browser = browser
|
self.browser = browser
|
||||||
self.executable_path = executable_path
|
self.executable_path = executable_path
|
||||||
self.headless = headless
|
self.headless = headless
|
||||||
|
self.arguments = arguments
|
||||||
|
|
||||||
def _get_driver(self) -> Union["Chrome", "Firefox"]:
|
def _get_driver(self) -> Union["Chrome", "Firefox"]:
|
||||||
"""Create and return a WebDriver instance based on the specified browser.
|
"""Create and return a WebDriver instance based on the specified browser.
|
||||||
@ -69,6 +72,10 @@ class SeleniumURLLoader(BaseLoader):
|
|||||||
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||||
|
|
||||||
chrome_options = ChromeOptions()
|
chrome_options = ChromeOptions()
|
||||||
|
|
||||||
|
for arg in self.arguments:
|
||||||
|
chrome_options.add_argument(arg)
|
||||||
|
|
||||||
if self.headless:
|
if self.headless:
|
||||||
chrome_options.add_argument("--headless")
|
chrome_options.add_argument("--headless")
|
||||||
chrome_options.add_argument("--no-sandbox")
|
chrome_options.add_argument("--no-sandbox")
|
||||||
@ -80,6 +87,10 @@ class SeleniumURLLoader(BaseLoader):
|
|||||||
from selenium.webdriver.firefox.options import Options as FirefoxOptions
|
from selenium.webdriver.firefox.options import Options as FirefoxOptions
|
||||||
|
|
||||||
firefox_options = FirefoxOptions()
|
firefox_options = FirefoxOptions()
|
||||||
|
|
||||||
|
for arg in self.arguments:
|
||||||
|
firefox_options.add_argument(arg)
|
||||||
|
|
||||||
if self.headless:
|
if self.headless:
|
||||||
firefox_options.add_argument("--headless")
|
firefox_options.add_argument("--headless")
|
||||||
if self.executable_path is None:
|
if self.executable_path is None:
|
||||||
|
Loading…
Reference in New Issue
Block a user