mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-17 23:41:46 +00:00
[docs]: standardize doc loader doc strings (#25325)
This commit is contained in:
@@ -24,29 +24,9 @@ _DEFAULT_URL = "https://api.unstructuredapp.io/general/v0/general"
|
||||
class UnstructuredLoader(BaseLoader):
|
||||
"""Unstructured document loader interface.
|
||||
|
||||
Partition and load files using either the `unstructured-client` sdk and the
|
||||
Unstructured API or locally using the `unstructured` library.
|
||||
|
||||
API:
|
||||
This package is configured to work with the Unstructured API by default.
|
||||
To use the Unstructured API, set
|
||||
`partition_via_api=True` and define `api_key`. If you are running the unstructured
|
||||
API locally, you can change the API rule by defining `url` when you initialize the
|
||||
loader. The hosted Unstructured API requires an API key. See the links below to
|
||||
learn more about our API offerings and get an API key.
|
||||
|
||||
Local:
|
||||
To partition files locally, you must have the `unstructured` package installed.
|
||||
You can install it with `pip install unstructured`.
|
||||
By default the file loader uses the Unstructured `partition` function and will
|
||||
automatically detect the file type.
|
||||
|
||||
In addition to document specific partition parameters, Unstructured has a rich set
|
||||
of "chunking" parameters for post-processing elements into more useful text segments
|
||||
for uses cases such as Retrieval Augmented Generation (RAG). You can pass additional
|
||||
Unstructured kwargs to the loader to configure different unstructured settings.
|
||||
|
||||
Setup:
|
||||
Install ``langchain-unstructured`` and set environment variable ``UNSTRUCTURED_API_KEY``.
|
||||
|
||||
.. code-block:: bash
|
||||
pip install -U langchain-unstructured
|
||||
export UNSTRUCTURED_API_KEY="your-api-key"
|
||||
@@ -63,20 +43,46 @@ class UnstructuredLoader(BaseLoader):
|
||||
strategy="fast",
|
||||
)
|
||||
|
||||
Load:
|
||||
Lazy load:
|
||||
.. code-block:: python
|
||||
docs = loader.load()
|
||||
|
||||
docs = []
|
||||
docs_lazy = loader.lazy_load()
|
||||
|
||||
# async variant:
|
||||
# docs_lazy = await loader.alazy_load()
|
||||
|
||||
for doc in docs_lazy:
|
||||
docs.append(doc)
|
||||
print(docs[0].page_content[:100])
|
||||
print(docs[0].metadata)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
1 2 0 2
|
||||
{'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}
|
||||
|
||||
|
||||
Async load:
|
||||
.. code-block:: python
|
||||
|
||||
docs = await loader.aload()
|
||||
print(docs[0].page_content[:100])
|
||||
print(docs[0].metadata)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
1 2 0 2
|
||||
{'source': './example_data/layout-parser-paper.pdf', 'coordinates': {'points': ((16.34, 213.36), (16.34, 253.36), (36.34, 253.36), (36.34, 213.36)), 'system': 'PixelSpace', 'layout_width': 612, 'layout_height': 792}, 'file_directory': './example_data', 'filename': 'layout-parser-paper.pdf', 'languages': ['eng'], 'last_modified': '2024-07-25T21:28:58', 'page_number': 1, 'filetype': 'application/pdf', 'category': 'UncategorizedText', 'element_id': 'd3ce55f220dfb75891b4394a18bcb973'}
|
||||
|
||||
|
||||
References
|
||||
----------
|
||||
https://docs.unstructured.io/api-reference/api-services/sdk
|
||||
https://docs.unstructured.io/api-reference/api-services/overview
|
||||
https://docs.unstructured.io/open-source/core-functionality/partitioning
|
||||
https://docs.unstructured.io/open-source/core-functionality/chunking
|
||||
"""
|
||||
""" # noqa: E501
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
Reference in New Issue
Block a user