mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-25 16:13:25 +00:00
docs: Updated docs for sitemap loader to use correct URL (#17395)
- **Description:** Updated URL for sitemap loader from "https://langchain.readthedocs.io/sitemap.xml" to "https://api.python.langchain.com/sitemap.xml" - **Issue:** Fixes #17236
This commit is contained in:
parent
bd0ad6637a
commit
e1bc623f8f
@ -13,27 +13,16 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Requirement already satisfied: nest_asyncio in /Users/tasp/Code/projects/langchain/.venv/lib/python3.10/site-packages (1.5.6)\n",
|
|
||||||
"\n",
|
|
||||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\n",
|
|
||||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"%pip install --upgrade --quiet nest_asyncio"
|
"%pip install --upgrade --quiet nest_asyncio"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -54,11 +43,11 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 21,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"sitemap_loader = SitemapLoader(web_path=\"https://langchain.readthedocs.io/sitemap.xml\")\n",
|
"sitemap_loader = SitemapLoader(web_path=\"https://api.python.langchain.com/sitemap.xml\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"docs = sitemap_loader.load()"
|
"docs = sitemap_loader.load()"
|
||||||
]
|
]
|
||||||
@ -90,7 +79,7 @@
|
|||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"Document(page_content='\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nLangChain Python API Reference Documentation.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nYou will be automatically redirected to the new location of this page.\\n\\n', metadata={'source': 'https://api.python.langchain.com/en/stable/', 'loc': 'https://api.python.langchain.com/en/stable/', 'lastmod': '2023-10-13T18:13:26.966937+00:00', 'changefreq': 'weekly', 'priority': '1'})"
|
"Document(page_content='\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nLangChain Python API Reference Documentation.\\n\\n\\nYou will be automatically redirected to the new location of this page.\\n\\n', metadata={'source': 'https://api.python.langchain.com/en/stable/', 'loc': 'https://api.python.langchain.com/en/stable/', 'lastmod': '2024-02-09T01:10:49.422114+00:00', 'changefreq': 'weekly', 'priority': '1'})"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 6,
|
"execution_count": 6,
|
||||||
@ -113,20 +102,12 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 27,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Fetching pages: 100%|##########| 1/1 [00:00<00:00, 16.39it/s]\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"loader = SitemapLoader(\n",
|
"loader = SitemapLoader(\n",
|
||||||
" web_path=\"https://langchain.readthedocs.io/sitemap.xml\",\n",
|
" web_path=\" https://api.python.langchain.com/sitemap.xml\",\n",
|
||||||
" filter_urls=[\"https://api.python.langchain.com/en/latest\"],\n",
|
" filter_urls=[\"https://api.python.langchain.com/en/latest\"],\n",
|
||||||
")\n",
|
")\n",
|
||||||
"documents = loader.load()"
|
"documents = loader.load()"
|
||||||
@ -134,7 +115,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 28,
|
"execution_count": 8,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"scrolled": true
|
"scrolled": true
|
||||||
},
|
},
|
||||||
@ -142,10 +123,10 @@
|
|||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"Document(page_content='\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nLangChain Python API Reference Documentation.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nYou will be automatically redirected to the new location of this page.\\n\\n', metadata={'source': 'https://api.python.langchain.com/en/latest/', 'loc': 'https://api.python.langchain.com/en/latest/', 'lastmod': '2023-10-13T18:09:58.478681+00:00', 'changefreq': 'daily', 'priority': '0.9'})"
|
"Document(page_content='\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nLangChain Python API Reference Documentation.\\n\\n\\nYou will be automatically redirected to the new location of this page.\\n\\n', metadata={'source': 'https://api.python.langchain.com/en/latest/', 'loc': 'https://api.python.langchain.com/en/latest/', 'lastmod': '2024-02-12T05:26:10.971077+00:00', 'changefreq': 'daily', 'priority': '0.9'})"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 28,
|
"execution_count": 8,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -183,7 +164,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 30,
|
"execution_count": 10,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -211,12 +192,12 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 31,
|
"execution_count": 11,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"loader = SitemapLoader(\n",
|
"loader = SitemapLoader(\n",
|
||||||
" \"https://langchain.readthedocs.io/sitemap.xml\",\n",
|
" \"https://api.python.langchain.com/sitemap.xml\",\n",
|
||||||
" filter_urls=[\"https://api.python.langchain.com/en/latest/\"],\n",
|
" filter_urls=[\"https://api.python.langchain.com/en/latest/\"],\n",
|
||||||
" parsing_function=remove_nav_and_header_elements,\n",
|
" parsing_function=remove_nav_and_header_elements,\n",
|
||||||
")"
|
")"
|
||||||
@ -233,17 +214,9 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 32,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Fetching pages: 100%|##########| 3/3 [00:00<00:00, 12.46it/s]\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"sitemap_loader = SitemapLoader(web_path=\"example_data/sitemap.xml\", is_local=True)\n",
|
"sitemap_loader = SitemapLoader(web_path=\"example_data/sitemap.xml\", is_local=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
Loading…
Reference in New Issue
Block a user