mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-04 02:33:05 +00:00
community[patch]: exposing headless flag parameter to AsyncChromiumLoader class (#20424)
- **Description:** added the headless parameter as optional argument to the langchain_community.document_loaders AsyncChromiumLoader class - **Dependencies:** None - **Twitter handle:** @perinim_98 If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, hwchase17. --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
54e9271504
commit
11c9ed3362
@ -16,17 +16,21 @@ class AsyncChromiumLoader(BaseLoader):
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
urls: List[str],
|
urls: List[str],
|
||||||
|
*,
|
||||||
|
headless: bool = True,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Initialize the loader with a list of URL paths.
|
Initialize the loader with a list of URL paths.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
urls (List[str]): A list of URLs to scrape content from.
|
urls: A list of URLs to scrape content from.
|
||||||
|
headless: Whether to run browser in headless mode.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
ImportError: If the required 'playwright' package is not installed.
|
ImportError: If the required 'playwright' package is not installed.
|
||||||
"""
|
"""
|
||||||
self.urls = urls
|
self.urls = urls
|
||||||
|
self.headless = headless
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import playwright # noqa: F401
|
import playwright # noqa: F401
|
||||||
@ -52,7 +56,7 @@ class AsyncChromiumLoader(BaseLoader):
|
|||||||
logger.info("Starting scraping...")
|
logger.info("Starting scraping...")
|
||||||
results = ""
|
results = ""
|
||||||
async with async_playwright() as p:
|
async with async_playwright() as p:
|
||||||
browser = await p.chromium.launch(headless=True)
|
browser = await p.chromium.launch(headless=self.headless)
|
||||||
try:
|
try:
|
||||||
page = await browser.new_page()
|
page = await browser.new_page()
|
||||||
await page.goto(url)
|
await page.goto(url)
|
||||||
|
Loading…
Reference in New Issue
Block a user