mirror of
https://github.com/hwchase17/langchain.git
synced 2026-01-30 22:00:29 +00:00
wip
This commit is contained in:
@@ -199,7 +199,8 @@ class RecursiveUrlLoader(BaseLoader):
|
||||
if depth + 1 < self.max_depth:
|
||||
for link in self._extract_sub_links(text, url):
|
||||
if link not in visited:
|
||||
yield from self._lazy_load_recursive(link, visited, depth=depth + 1)
|
||||
for doc in self._lazy_load_recursive(link, visited, depth=depth + 1):
|
||||
yield doc
|
||||
if link not in visited:
|
||||
raise ValueError
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ def test_async_recursive_url_loader_deterministic() -> None:
|
||||
|
||||
|
||||
def test_sync_recursive_url_loader() -> None:
|
||||
url = "https://docs.python.org/3.9/"
|
||||
url = "https://python.langchain.com/"
|
||||
loader = RecursiveUrlLoader(
|
||||
url,
|
||||
extractor=lambda _: "placeholder",
|
||||
|
||||
@@ -121,4 +121,4 @@ def extract_sub_links(
|
||||
continue
|
||||
|
||||
results.append(path)
|
||||
return results
|
||||
return sorted(results)
|
||||
|
||||
Reference in New Issue
Block a user