mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-04 20:46:45 +00:00
[docs]: doc loader changes (#25417)
This commit is contained in:
@@ -13,19 +13,60 @@ class UnstructuredMarkdownLoader(UnstructuredFileLoader):
|
||||
You can pass in additional unstructured kwargs after mode to apply
|
||||
different unstructured settings.
|
||||
|
||||
Examples
|
||||
--------
|
||||
from langchain_community.document_loaders import UnstructuredMarkdownLoader
|
||||
Setup:
|
||||
Install ``langchain-community``.
|
||||
|
||||
loader = UnstructuredMarkdownLoader(
|
||||
"example.md", mode="elements", strategy="fast",
|
||||
)
|
||||
docs = loader.load()
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U langchain-community
|
||||
|
||||
Instantiate:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.document_loaders import UnstructuredMarkdownLoader
|
||||
|
||||
loader = UnstructuredMarkdownLoader(
|
||||
"./example_data/example.md",
|
||||
mode="elements",
|
||||
strategy="fast",
|
||||
)
|
||||
|
||||
Lazy load:
|
||||
.. code-block:: python
|
||||
|
||||
docs = []
|
||||
docs_lazy = loader.lazy_load()
|
||||
|
||||
# async variant:
|
||||
# docs_lazy = await loader.alazy_load()
|
||||
|
||||
for doc in docs_lazy:
|
||||
docs.append(doc)
|
||||
print(docs[0].page_content[:100])
|
||||
print(docs[0].metadata)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
Sample Markdown Document
|
||||
{'source': './example_data/example.md', 'category_depth': 0, 'last_modified': '2024-08-14T15:04:18', 'languages': ['eng'], 'filetype': 'text/markdown', 'file_directory': './example_data', 'filename': 'example.md', 'category': 'Title', 'element_id': '3d0b313864598e704aa26c728ecb61e5'}
|
||||
|
||||
|
||||
Async load:
|
||||
.. code-block:: python
|
||||
|
||||
docs = await loader.aload()
|
||||
print(docs[0].page_content[:100])
|
||||
print(docs[0].metadata)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
Sample Markdown Document
|
||||
{'source': './example_data/example.md', 'category_depth': 0, 'last_modified': '2024-08-14T15:04:18', 'languages': ['eng'], 'filetype': 'text/markdown', 'file_directory': './example_data', 'filename': 'example.md', 'category': 'Title', 'element_id': '3d0b313864598e704aa26c728ecb61e5'}
|
||||
|
||||
References
|
||||
----------
|
||||
https://unstructured-io.github.io/unstructured/core/partition.html#partition-md
|
||||
"""
|
||||
""" # noqa: E501
|
||||
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.__version__ import __version__ as __unstructured_version__
|
||||
|
Reference in New Issue
Block a user