mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-22 23:00:00 +00:00
docs: update docs strings for base unstructured loaders (#7222)
### Summary Updates the docstrings for the unstructured base loaders so more useful information appears on the integrations page. If these look good, will add similar docstrings to the other loaders. ### Reviewers - @rlancemartin - @eyurtsev - @hwchase17
This commit is contained in:
parent
265f05b10e
commit
d65b1951bd
@ -115,7 +115,30 @@ class UnstructuredBaseLoader(BaseLoader, ABC):
|
|||||||
|
|
||||||
|
|
||||||
class UnstructuredFileLoader(UnstructuredBaseLoader):
|
class UnstructuredFileLoader(UnstructuredBaseLoader):
|
||||||
"""Loader that uses unstructured to load files."""
|
"""UnstructuredFileLoader uses unstructured to load files. The file loader uses the
|
||||||
|
unstructured partition function and will automatically detect the file
|
||||||
|
type. You can run the loader in one of two modes: "single" and "elements".
|
||||||
|
If you use "single" mode, the document will be returned as a single
|
||||||
|
langchain Document object. If you use "elements" mode, the unstructured
|
||||||
|
library will split the document into elements such as Title and NarrativeText.
|
||||||
|
You can pass in additional unstructured kwargs after mode to apply
|
||||||
|
different unstructured settings.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
```python
|
||||||
|
from langchain.document_loaders import UnstructuredFileLoader
|
||||||
|
|
||||||
|
loader = UnstructuredFileLoader(
|
||||||
|
"example.pdf", mode="elements", strategy="fast",
|
||||||
|
)
|
||||||
|
docs = loader.load()
|
||||||
|
```
|
||||||
|
|
||||||
|
References
|
||||||
|
----------
|
||||||
|
https://unstructured-io.github.io/unstructured/bricks.html#partition
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -173,7 +196,37 @@ def get_elements_from_api(
|
|||||||
|
|
||||||
|
|
||||||
class UnstructuredAPIFileLoader(UnstructuredFileLoader):
|
class UnstructuredAPIFileLoader(UnstructuredFileLoader):
|
||||||
"""Loader that uses the unstructured web API to load files."""
|
"""UnstructuredAPIFileLoader uses the Unstructured API to load files.
|
||||||
|
By default, the loader makes a call to the hosted Unstructured API.
|
||||||
|
If you are running the unstructured API locally, you can change the
|
||||||
|
API rule by passing in the url parameter when you initialize the loader.
|
||||||
|
The hosted Unstructured API requires an API key. See
|
||||||
|
https://www.unstructured.io/api-key/ if you need to generate a key.
|
||||||
|
|
||||||
|
You can run the loader in one of two modes: "single" and "elements".
|
||||||
|
If you use "single" mode, the document will be returned as a single
|
||||||
|
langchain Document object. If you use "elements" mode, the unstructured
|
||||||
|
library will split the document into elements such as Title and NarrativeText.
|
||||||
|
You can pass in additional unstructured kwargs after mode to apply
|
||||||
|
different unstructured settings.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
```python
|
||||||
|
from langchain.document_loaders import UnstructuredAPIFileLoader
|
||||||
|
|
||||||
|
loader = UnstructuredFileAPILoader(
|
||||||
|
"example.pdf", mode="elements", strategy="fast", api_key="MY_API_KEY",
|
||||||
|
)
|
||||||
|
docs = loader.load()
|
||||||
|
```
|
||||||
|
|
||||||
|
References
|
||||||
|
----------
|
||||||
|
https://unstructured-io.github.io/unstructured/bricks.html#partition
|
||||||
|
https://www.unstructured.io/api-key/
|
||||||
|
https://github.com/Unstructured-IO/unstructured-api
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -208,7 +261,32 @@ class UnstructuredAPIFileLoader(UnstructuredFileLoader):
|
|||||||
|
|
||||||
|
|
||||||
class UnstructuredFileIOLoader(UnstructuredBaseLoader):
|
class UnstructuredFileIOLoader(UnstructuredBaseLoader):
|
||||||
"""Loader that uses unstructured to load file IO objects."""
|
"""UnstructuredFileIOLoader uses unstructured to load files. The file loader
|
||||||
|
uses the unstructured partition function and will automatically detect the file
|
||||||
|
type. You can run the loader in one of two modes: "single" and "elements".
|
||||||
|
If you use "single" mode, the document will be returned as a single
|
||||||
|
langchain Document object. If you use "elements" mode, the unstructured
|
||||||
|
library will split the document into elements such as Title and NarrativeText.
|
||||||
|
You can pass in additional unstructured kwargs after mode to apply
|
||||||
|
different unstructured settings.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
```python
|
||||||
|
from langchain.document_loaders import UnstructuredFileIOLoader
|
||||||
|
|
||||||
|
with open("example.pdf", "rb") as f:
|
||||||
|
loader = UnstructuredFileIOLoader(
|
||||||
|
f, mode="elements", strategy="fast",
|
||||||
|
)
|
||||||
|
docs = loader.load()
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
References
|
||||||
|
----------
|
||||||
|
https://unstructured-io.github.io/unstructured/bricks.html#partition
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
@ -230,7 +308,38 @@ class UnstructuredFileIOLoader(UnstructuredBaseLoader):
|
|||||||
|
|
||||||
|
|
||||||
class UnstructuredAPIFileIOLoader(UnstructuredFileIOLoader):
|
class UnstructuredAPIFileIOLoader(UnstructuredFileIOLoader):
|
||||||
"""Loader that uses the unstructured web API to load file IO objects."""
|
"""UnstructuredAPIFileIOLoader uses the Unstructured API to load files.
|
||||||
|
By default, the loader makes a call to the hosted Unstructured API.
|
||||||
|
If you are running the unstructured API locally, you can change the
|
||||||
|
API rule by passing in the url parameter when you initialize the loader.
|
||||||
|
The hosted Unstructured API requires an API key. See
|
||||||
|
https://www.unstructured.io/api-key/ if you need to generate a key.
|
||||||
|
|
||||||
|
You can run the loader in one of two modes: "single" and "elements".
|
||||||
|
If you use "single" mode, the document will be returned as a single
|
||||||
|
langchain Document object. If you use "elements" mode, the unstructured
|
||||||
|
library will split the document into elements such as Title and NarrativeText.
|
||||||
|
You can pass in additional unstructured kwargs after mode to apply
|
||||||
|
different unstructured settings.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
```python
|
||||||
|
from langchain.document_loaders import UnstructuredAPIFileLoader
|
||||||
|
|
||||||
|
with open("example.pdf", "rb") as f:
|
||||||
|
loader = UnstructuredFileAPILoader(
|
||||||
|
f, mode="elements", strategy="fast", api_key="MY_API_KEY",
|
||||||
|
)
|
||||||
|
docs = loader.load()
|
||||||
|
```
|
||||||
|
|
||||||
|
References
|
||||||
|
----------
|
||||||
|
https://unstructured-io.github.io/unstructured/bricks.html#partition
|
||||||
|
https://www.unstructured.io/api-key/
|
||||||
|
https://github.com/Unstructured-IO/unstructured-api
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
Loading…
Reference in New Issue
Block a user