mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-17 07:26:16 +00:00
fix RecursiveUrlLoader (#8582)
Description: the recursive url loader does not fully crawl for all urls under base url Maintainer: @baskaryan
This commit is contained in:
@@ -92,9 +92,7 @@ class RecursiveUrlLoader(BaseLoader):
|
|||||||
yield from loaded_link
|
yield from loaded_link
|
||||||
else:
|
else:
|
||||||
yield loaded_link
|
yield loaded_link
|
||||||
# If the link is a directory (w/ children) then visit it
|
yield from self.get_child_links_recursive(link, visited)
|
||||||
if link.endswith("/"):
|
|
||||||
yield from self.get_child_links_recursive(link, visited)
|
|
||||||
|
|
||||||
return visited
|
return visited
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user