community: PlaywrightURLLoader should wait for page load event before attempting to extract data (#30043)

## Description

The PlaywrightURLLoader should wait for a page to be loaded before
attempting to extract data.
This commit is contained in:
Daniel Rauber 2025-02-28 14:45:51 +01:00 committed by GitHub
parent 46908ee3da
commit 186cd7f1a1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -177,6 +177,8 @@ class PlaywrightURLLoader(BaseLoader):
if response is None:
raise ValueError(f"page.goto() returned None for url {url}")
page.wait_for_load_state("load")
text = self.evaluator.evaluate(page, browser, response)
metadata = {"source": url}
yield Document(page_content=text, metadata=metadata)
@ -216,6 +218,8 @@ class PlaywrightURLLoader(BaseLoader):
if response is None:
raise ValueError(f"page.goto() returned None for url {url}")
await page.wait_for_load_state("load")
text = await self.evaluator.evaluate_async(page, browser, response)
metadata = {"source": url}
yield Document(page_content=text, metadata=metadata)