Harrison/sitemap local (#4704)

Co-authored-by: Lukas Bauer <lukas.bauer@mayflower.de>
This commit is contained in:
Harrison Chase
2023-05-14 22:04:38 -07:00
committed by GitHub
parent 12b4ee1fc7
commit b6e3ac17c4
5 changed files with 127 additions and 4 deletions

View File

@@ -1,3 +1,4 @@
from pathlib import Path
from typing import Any
import pytest
@@ -122,3 +123,12 @@ def test_sitemap_metadata_default() -> None:
assert len(documents) > 1
assert "source" in documents[0].metadata
assert "loc" in documents[0].metadata
def test_local_sitemap() -> None:
"""Test sitemap loader."""
file_path = Path(__file__).parent.parent / "examples/sitemap.xml"
loader = SitemapLoader(str(file_path))
documents = loader.load()
assert len(documents) > 1
assert "🦜🔗" in documents[0].page_content

View File

@@ -0,0 +1,35 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:xhtml="http://www.w3.org/1999/xhtml">
<url>
<loc>https://python.langchain.com/en/stable/</loc>
<lastmod>2023-05-04T16:15:31.377584+00:00</lastmod>
<changefreq>weekly</changefreq>
<priority>1</priority>
</url>
<url>
<loc>https://python.langchain.com/en/latest/</loc>
<lastmod>2023-05-05T07:52:19.633878+00:00</lastmod>
<changefreq>daily</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://python.langchain.com/en/harrison-docs-refactor-3-24/</loc>
<lastmod>2023-03-27T02:32:55.132916+00:00</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
</urlset>