mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-13 21:47:12 +00:00
docs, cli[patch]: document loaders doc template (#22862)
From: https://github.com/langchain-ai/langchain/pull/22290 --------- Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
This commit is contained in:
@@ -110,14 +110,17 @@ class RecursiveUrlLoader(BaseLoader):
|
||||
# ...
|
||||
)
|
||||
|
||||
Load:
|
||||
Use ``.load()`` to synchronously load into memory all Documents, with one
|
||||
Document per visited URL. Starting from the initial URL, we recurse through
|
||||
all linked URLs up to the specified max_depth.
|
||||
|
||||
Lazy load:
|
||||
.. code-block:: python
|
||||
|
||||
docs = loader.load()
|
||||
docs = []
|
||||
docs_lazy = loader.lazy_load()
|
||||
|
||||
# async variant:
|
||||
# docs_lazy = await loader.alazy_load()
|
||||
|
||||
for doc in docs_lazy:
|
||||
docs.append(doc)
|
||||
print(docs[0].page_content[:100])
|
||||
print(docs[0].metadata)
|
||||
|
||||
@@ -146,29 +149,6 @@ class RecursiveUrlLoader(BaseLoader):
|
||||
<meta charset="utf-8" /><
|
||||
{'source': 'https://docs.python.org/3.9/', 'content_type': 'text/html', 'title': '3.9.19 Documentation', 'language': None}
|
||||
|
||||
Lazy load:
|
||||
.. code-block:: python
|
||||
|
||||
docs = []
|
||||
docs_lazy = loader.lazy_load()
|
||||
|
||||
# async variant:
|
||||
# docs_lazy = await loader.alazy_load()
|
||||
|
||||
for doc in docs_lazy:
|
||||
docs.append(doc)
|
||||
print(docs[0].page_content[:100])
|
||||
print(docs[0].metadata)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
<!DOCTYPE html>
|
||||
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta charset="utf-8" /><
|
||||
{'source': 'https://docs.python.org/3.9/', 'content_type': 'text/html', 'title': '3.9.19 Documentation', 'language': None}
|
||||
|
||||
Content parsing / extraction:
|
||||
By default the loader sets the raw HTML from each link as the Document page
|
||||
content. To parse this HTML into a more human/LLM-friendly format you can pass
|
||||
|
Reference in New Issue
Block a user