From 7ecdac5240785e311bf8a394fbb956cd2d139390 Mon Sep 17 00:00:00 2001 From: Nicky Parseghian Date: Wed, 23 Apr 2025 17:18:42 -0500 Subject: [PATCH] community: Strip URLs from sitemap. (#30830) Fixes #30829 - **Description:** Simply strips the loc value when building the element. - **Issue:** Fixes #30829 --- libs/community/langchain_community/document_loaders/sitemap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/community/langchain_community/document_loaders/sitemap.py b/libs/community/langchain_community/document_loaders/sitemap.py index 757a3b182c1..50ecddac601 100644 --- a/libs/community/langchain_community/document_loaders/sitemap.py +++ b/libs/community/langchain_community/document_loaders/sitemap.py @@ -185,7 +185,7 @@ class SitemapLoader(WebBaseLoader): els.append( { - tag: prop.text + tag: prop.text.strip() for tag in ["loc", "lastmod", "changefreq", "priority"] if (prop := url.find(tag)) }