mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-18 02:33:19 +00:00
fix RecursiveUrlLoader (#8582)
Description: the recursive url loader does not fully crawl for all urls under base url Maintainer: @baskaryan
This commit is contained in:
parent
f81e613086
commit
b786335dd1
@ -92,9 +92,7 @@ class RecursiveUrlLoader(BaseLoader):
|
|||||||
yield from loaded_link
|
yield from loaded_link
|
||||||
else:
|
else:
|
||||||
yield loaded_link
|
yield loaded_link
|
||||||
# If the link is a directory (w/ children) then visit it
|
yield from self.get_child_links_recursive(link, visited)
|
||||||
if link.endswith("/"):
|
|
||||||
yield from self.get_child_links_recursive(link, visited)
|
|
||||||
|
|
||||||
return visited
|
return visited
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user