From 186cd7f1a1b771bc9999cb2bd5bcc7d72066ae64 Mon Sep 17 00:00:00 2001 From: Daniel Rauber Date: Fri, 28 Feb 2025 14:45:51 +0100 Subject: [PATCH] community: PlaywrightURLLoader should wait for page load event before attempting to extract data (#30043) ## Description The PlaywrightURLLoader should wait for a page to be loaded before attempting to extract data. --- .../langchain_community/document_loaders/url_playwright.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libs/community/langchain_community/document_loaders/url_playwright.py b/libs/community/langchain_community/document_loaders/url_playwright.py index 3f350615401..8e06ce4be0e 100644 --- a/libs/community/langchain_community/document_loaders/url_playwright.py +++ b/libs/community/langchain_community/document_loaders/url_playwright.py @@ -177,6 +177,8 @@ class PlaywrightURLLoader(BaseLoader): if response is None: raise ValueError(f"page.goto() returned None for url {url}") + page.wait_for_load_state("load") + text = self.evaluator.evaluate(page, browser, response) metadata = {"source": url} yield Document(page_content=text, metadata=metadata) @@ -216,6 +218,8 @@ class PlaywrightURLLoader(BaseLoader): if response is None: raise ValueError(f"page.goto() returned None for url {url}") + await page.wait_for_load_state("load") + text = await self.evaluator.evaluate_async(page, browser, response) metadata = {"source": url} yield Document(page_content=text, metadata=metadata)