mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-16 09:48:04 +00:00
fix RecursiveUrlLoader (#8582)
Description: the recursive url loader does not fully crawl for all urls under base url Maintainer: @baskaryan
This commit is contained in:
parent
f81e613086
commit
b786335dd1
@ -92,9 +92,7 @@ class RecursiveUrlLoader(BaseLoader):
|
||||
yield from loaded_link
|
||||
else:
|
||||
yield loaded_link
|
||||
# If the link is a directory (w/ children) then visit it
|
||||
if link.endswith("/"):
|
||||
yield from self.get_child_links_recursive(link, visited)
|
||||
yield from self.get_child_links_recursive(link, visited)
|
||||
|
||||
return visited
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user