mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-22 19:09:57 +00:00
community: PlaywrightURLLoader should wait for page load event before attempting to extract data (#30043)
## Description The PlaywrightURLLoader should wait for a page to be loaded before attempting to extract data.
This commit is contained in:
@@ -177,6 +177,8 @@ class PlaywrightURLLoader(BaseLoader):
|
|||||||
if response is None:
|
if response is None:
|
||||||
raise ValueError(f"page.goto() returned None for url {url}")
|
raise ValueError(f"page.goto() returned None for url {url}")
|
||||||
|
|
||||||
|
page.wait_for_load_state("load")
|
||||||
|
|
||||||
text = self.evaluator.evaluate(page, browser, response)
|
text = self.evaluator.evaluate(page, browser, response)
|
||||||
metadata = {"source": url}
|
metadata = {"source": url}
|
||||||
yield Document(page_content=text, metadata=metadata)
|
yield Document(page_content=text, metadata=metadata)
|
||||||
@@ -216,6 +218,8 @@ class PlaywrightURLLoader(BaseLoader):
|
|||||||
if response is None:
|
if response is None:
|
||||||
raise ValueError(f"page.goto() returned None for url {url}")
|
raise ValueError(f"page.goto() returned None for url {url}")
|
||||||
|
|
||||||
|
await page.wait_for_load_state("load")
|
||||||
|
|
||||||
text = await self.evaluator.evaluate_async(page, browser, response)
|
text = await self.evaluator.evaluate_async(page, browser, response)
|
||||||
metadata = {"source": url}
|
metadata = {"source": url}
|
||||||
yield Document(page_content=text, metadata=metadata)
|
yield Document(page_content=text, metadata=metadata)
|
||||||
|
Reference in New Issue
Block a user