From 11c9ed3362f5f117d889bfc140cd83ee157f3283 Mon Sep 17 00:00:00 2001 From: Marco Perini Date: Thu, 18 Apr 2024 01:00:28 +0200 Subject: [PATCH] community[patch]: exposing headless flag parameter to AsyncChromiumLoader class (#20424) - **Description:** added the headless parameter as optional argument to the langchain_community.document_loaders AsyncChromiumLoader class - **Dependencies:** None - **Twitter handle:** @perinim_98 If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, hwchase17. --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur --- .../langchain_community/document_loaders/chromium.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/libs/community/langchain_community/document_loaders/chromium.py b/libs/community/langchain_community/document_loaders/chromium.py index 668466a7ff5..8c71e895965 100644 --- a/libs/community/langchain_community/document_loaders/chromium.py +++ b/libs/community/langchain_community/document_loaders/chromium.py @@ -16,17 +16,21 @@ class AsyncChromiumLoader(BaseLoader): def __init__( self, urls: List[str], + *, + headless: bool = True, ): """ Initialize the loader with a list of URL paths. Args: - urls (List[str]): A list of URLs to scrape content from. + urls: A list of URLs to scrape content from. + headless: Whether to run browser in headless mode. Raises: ImportError: If the required 'playwright' package is not installed. """ self.urls = urls + self.headless = headless try: import playwright # noqa: F401 @@ -52,7 +56,7 @@ class AsyncChromiumLoader(BaseLoader): logger.info("Starting scraping...") results = "" async with async_playwright() as p: - browser = await p.chromium.launch(headless=True) + browser = await p.chromium.launch(headless=self.headless) try: page = await browser.new_page() await page.goto(url)