community[patch]: exposing headless flag parameter to AsyncChromiumLoader class (#20424)

- **Description:** added the headless parameter as optional argument to
the langchain_community.document_loaders AsyncChromiumLoader class
  - **Dependencies:** None
  - **Twitter handle:** @perinim_98

If no one reviews your PR within a few days, please @-mention one of
baskaryan, efriis, eyurtsev, hwchase17.

---------

Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
Marco Perini 2024-04-18 01:00:28 +02:00 committed by GitHub
parent 54e9271504
commit 11c9ed3362
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -16,17 +16,21 @@ class AsyncChromiumLoader(BaseLoader):
def __init__(
self,
urls: List[str],
*,
headless: bool = True,
):
"""
Initialize the loader with a list of URL paths.
Args:
urls (List[str]): A list of URLs to scrape content from.
urls: A list of URLs to scrape content from.
headless: Whether to run browser in headless mode.
Raises:
ImportError: If the required 'playwright' package is not installed.
"""
self.urls = urls
self.headless = headless
try:
import playwright # noqa: F401
@ -52,7 +56,7 @@ class AsyncChromiumLoader(BaseLoader):
logger.info("Starting scraping...")
results = ""
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
browser = await p.chromium.launch(headless=self.headless)
try:
page = await browser.new_page()
await page.goto(url)