mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-25 13:07:58 +00:00
feat: add loader for open office odt files (#4405)
# ODF File Loader Adds a data loader for handling Open Office ODT files. Requires `unstructured>=0.6.3`. ### Testing The following should work using the `fake.odt` example doc from the [`unstructured` repo](https://github.com/Unstructured-IO/unstructured). ```python from langchain.document_loaders import UnstructuredODTLoader loader = UnstructuredODTLoader(file_path="fake.odt", mode="elements") loader.load() loader = UnstructuredODTLoader(file_path="fake.odt", mode="single") loader.load() ```
This commit is contained in:
12
tests/integration_tests/document_loaders/test_odt.py
Normal file
12
tests/integration_tests/document_loaders/test_odt.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from pathlib import Path
|
||||
|
||||
from langchain.document_loaders import UnstructuredODTLoader
|
||||
|
||||
|
||||
def test_unstructured_odt_loader() -> None:
|
||||
"""Test unstructured loader."""
|
||||
file_path = Path(__file__).parent.parent / "examples/fake.odt"
|
||||
loader = UnstructuredODTLoader(str(file_path))
|
||||
docs = loader.load()
|
||||
|
||||
assert len(docs) == 1
|
BIN
tests/integration_tests/examples/fake.odt
Normal file
BIN
tests/integration_tests/examples/fake.odt
Normal file
Binary file not shown.
Reference in New Issue
Block a user