mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-04 14:13:29 +00:00
Fix: Sitemap Document Loader Tests and Documentation (#11866)
**Description:** While working on the Docusaurus site loader #9138, I noticed some outdated docs and tests for the Sitemap Loader. **Issue:** This is tangentially related to #6691 in reference to doc links. I plan on digging in to a few of these issue when I find time next.
This commit is contained in:
parent
8bb8c56f74
commit
e669f9d731
File diff suppressed because one or more lines are too long
@ -11,7 +11,7 @@ def test_sitemap() -> None:
|
||||
loader = SitemapLoader("https://langchain.readthedocs.io/sitemap.xml")
|
||||
documents = loader.load()
|
||||
assert len(documents) > 1
|
||||
assert "🦜🔗" in documents[0].page_content
|
||||
assert "LangChain Python API" in documents[0].page_content
|
||||
|
||||
|
||||
def test_sitemap_block() -> None:
|
||||
@ -21,7 +21,7 @@ def test_sitemap_block() -> None:
|
||||
)
|
||||
documents = loader.load()
|
||||
assert len(documents) == 1
|
||||
assert "🦜🔗" in documents[0].page_content
|
||||
assert "LangChain Python API" in documents[0].page_content
|
||||
|
||||
|
||||
def test_sitemap_block_only_one() -> None:
|
||||
@ -31,7 +31,7 @@ def test_sitemap_block_only_one() -> None:
|
||||
)
|
||||
documents = loader.load()
|
||||
assert len(documents) > 1
|
||||
assert "🦜🔗" in documents[0].page_content
|
||||
assert "LangChain Python API" in documents[0].page_content
|
||||
|
||||
|
||||
def test_sitemap_block_blocknum_default() -> None:
|
||||
@ -41,7 +41,7 @@ def test_sitemap_block_blocknum_default() -> None:
|
||||
)
|
||||
documents = loader.load()
|
||||
assert len(documents) > 1
|
||||
assert "🦜🔗" in documents[0].page_content
|
||||
assert "LangChain Python API" in documents[0].page_content
|
||||
|
||||
|
||||
def test_sitemap_block_size_to_small() -> None:
|
||||
@ -76,11 +76,11 @@ def test_filter_sitemap() -> None:
|
||||
"""Test sitemap loader."""
|
||||
loader = SitemapLoader(
|
||||
"https://langchain.readthedocs.io/sitemap.xml",
|
||||
filter_urls=["https://python.langchain.com/en/stable/"],
|
||||
filter_urls=["https://api.python.langchain.com/en/stable/"],
|
||||
)
|
||||
documents = loader.load()
|
||||
assert len(documents) == 1
|
||||
assert "🦜🔗" in documents[0].page_content
|
||||
assert "LangChain Python API" in documents[0].page_content
|
||||
|
||||
|
||||
def test_sitemap_metadata() -> None:
|
||||
@ -128,7 +128,7 @@ def test_sitemap_metadata_default() -> None:
|
||||
def test_local_sitemap() -> None:
|
||||
"""Test sitemap loader."""
|
||||
file_path = Path(__file__).parent.parent / "examples/sitemap.xml"
|
||||
loader = SitemapLoader(str(file_path))
|
||||
loader = SitemapLoader(str(file_path), is_local=True)
|
||||
documents = loader.load()
|
||||
assert len(documents) > 1
|
||||
assert "🦜🔗" in documents[0].page_content
|
||||
assert "🦜️🔗" in documents[0].page_content
|
||||
|
@ -1,35 +1,35 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
|
||||
xmlns:xhtml="http://www.w3.org/1999/xhtml">
|
||||
|
||||
xmlns:xhtml="http://www.w3.org/1999/xhtml">
|
||||
|
||||
<url>
|
||||
<loc>https://python.langchain.com/en/stable/</loc>
|
||||
|
||||
|
||||
|
||||
|
||||
<lastmod>2023-05-04T16:15:31.377584+00:00</lastmod>
|
||||
|
||||
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>1</priority>
|
||||
</url>
|
||||
|
||||
|
||||
<url>
|
||||
<loc>https://python.langchain.com/en/latest/</loc>
|
||||
|
||||
|
||||
|
||||
|
||||
<lastmod>2023-05-05T07:52:19.633878+00:00</lastmod>
|
||||
|
||||
|
||||
<changefreq>daily</changefreq>
|
||||
<priority>0.9</priority>
|
||||
</url>
|
||||
|
||||
|
||||
<url>
|
||||
<loc>https://python.langchain.com/en/harrison-docs-refactor-3-24/</loc>
|
||||
|
||||
|
||||
|
||||
|
||||
<lastmod>2023-03-27T02:32:55.132916+00:00</lastmod>
|
||||
|
||||
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
|
||||
</urlset>
|
||||
|
||||
</urlset>
|
Loading…
Reference in New Issue
Block a user