mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-20 22:03:52 +00:00
community: PlaywrightURLLoader should wait for page load event before attempting to extract data (#30043)
## Description The PlaywrightURLLoader should wait for a page to be loaded before attempting to extract data.
This commit is contained in:
parent
46908ee3da
commit
186cd7f1a1
@ -177,6 +177,8 @@ class PlaywrightURLLoader(BaseLoader):
|
|||||||
if response is None:
|
if response is None:
|
||||||
raise ValueError(f"page.goto() returned None for url {url}")
|
raise ValueError(f"page.goto() returned None for url {url}")
|
||||||
|
|
||||||
|
page.wait_for_load_state("load")
|
||||||
|
|
||||||
text = self.evaluator.evaluate(page, browser, response)
|
text = self.evaluator.evaluate(page, browser, response)
|
||||||
metadata = {"source": url}
|
metadata = {"source": url}
|
||||||
yield Document(page_content=text, metadata=metadata)
|
yield Document(page_content=text, metadata=metadata)
|
||||||
@ -216,6 +218,8 @@ class PlaywrightURLLoader(BaseLoader):
|
|||||||
if response is None:
|
if response is None:
|
||||||
raise ValueError(f"page.goto() returned None for url {url}")
|
raise ValueError(f"page.goto() returned None for url {url}")
|
||||||
|
|
||||||
|
await page.wait_for_load_state("load")
|
||||||
|
|
||||||
text = await self.evaluator.evaluate_async(page, browser, response)
|
text = await self.evaluator.evaluate_async(page, browser, response)
|
||||||
metadata = {"source": url}
|
metadata = {"source": url}
|
||||||
yield Document(page_content=text, metadata=metadata)
|
yield Document(page_content=text, metadata=metadata)
|
||||||
|
Loading…
Reference in New Issue
Block a user