mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-25 13:07:58 +00:00
Added matching async load func to PlaywrightURLLoader (#5938)
Fixes # (issue) The existing PlaywrightURLLoader load() function uses a synchronous browser which is not compatible with jupyter. This PR adds a sister function aload() which can be run insisde a notebook. --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
"""Tests for the Playwright URL loader"""
|
||||
import pytest
|
||||
|
||||
from langchain.document_loaders import PlaywrightURLLoader
|
||||
|
||||
@@ -19,3 +20,22 @@ def test_playwright_url_loader() -> None:
|
||||
)
|
||||
docs = loader.load()
|
||||
assert len(docs) > 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_playwright_async_url_loader() -> None:
|
||||
"""Test Playwright async URL loader."""
|
||||
urls = [
|
||||
"https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
||||
"https://goo.gl/maps/NDSHwePEyaHMFGwh8",
|
||||
"https://techmeme.com",
|
||||
"https://techcrunch.com",
|
||||
]
|
||||
loader = PlaywrightURLLoader(
|
||||
urls=urls,
|
||||
remove_selectors=["header", "footer"],
|
||||
continue_on_failure=False,
|
||||
headless=True,
|
||||
)
|
||||
docs = await loader.aload()
|
||||
assert len(docs) > 0
|
||||
|
Reference in New Issue
Block a user