mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-02 01:23:07 +00:00
community[patch]: exposing headless flag parameter to AsyncChromiumLoader class (#20424)
- **Description:** added the headless parameter as optional argument to the langchain_community.document_loaders AsyncChromiumLoader class - **Dependencies:** None - **Twitter handle:** @perinim_98 If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, hwchase17. --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
54e9271504
commit
11c9ed3362
@ -16,17 +16,21 @@ class AsyncChromiumLoader(BaseLoader):
|
||||
def __init__(
|
||||
self,
|
||||
urls: List[str],
|
||||
*,
|
||||
headless: bool = True,
|
||||
):
|
||||
"""
|
||||
Initialize the loader with a list of URL paths.
|
||||
|
||||
Args:
|
||||
urls (List[str]): A list of URLs to scrape content from.
|
||||
urls: A list of URLs to scrape content from.
|
||||
headless: Whether to run browser in headless mode.
|
||||
|
||||
Raises:
|
||||
ImportError: If the required 'playwright' package is not installed.
|
||||
"""
|
||||
self.urls = urls
|
||||
self.headless = headless
|
||||
|
||||
try:
|
||||
import playwright # noqa: F401
|
||||
@ -52,7 +56,7 @@ class AsyncChromiumLoader(BaseLoader):
|
||||
logger.info("Starting scraping...")
|
||||
results = ""
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
browser = await p.chromium.launch(headless=self.headless)
|
||||
try:
|
||||
page = await browser.new_page()
|
||||
await page.goto(url)
|
||||
|
Loading…
Reference in New Issue
Block a user