mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-20 13:54:48 +00:00
community: PlaywrightURLLoader should wait for page load event before attempting to extract data (#30043)
## Description The PlaywrightURLLoader should wait for a page to be loaded before attempting to extract data.
This commit is contained in:
parent
46908ee3da
commit
186cd7f1a1
@ -177,6 +177,8 @@ class PlaywrightURLLoader(BaseLoader):
|
||||
if response is None:
|
||||
raise ValueError(f"page.goto() returned None for url {url}")
|
||||
|
||||
page.wait_for_load_state("load")
|
||||
|
||||
text = self.evaluator.evaluate(page, browser, response)
|
||||
metadata = {"source": url}
|
||||
yield Document(page_content=text, metadata=metadata)
|
||||
@ -216,6 +218,8 @@ class PlaywrightURLLoader(BaseLoader):
|
||||
if response is None:
|
||||
raise ValueError(f"page.goto() returned None for url {url}")
|
||||
|
||||
await page.wait_for_load_state("load")
|
||||
|
||||
text = await self.evaluator.evaluate_async(page, browser, response)
|
||||
metadata = {"source": url}
|
||||
yield Document(page_content=text, metadata=metadata)
|
||||
|
Loading…
Reference in New Issue
Block a user