mirror of
https://github.com/hwchase17/langchain.git
synced 2026-01-29 21:30:18 +00:00
Harrison/unstructured page number (#6464)
Co-authored-by: Reza Sanaie <reza@sanaie.ca>
This commit is contained in:
@@ -11,7 +11,25 @@ from langchain.document_loaders import (
|
||||
)
|
||||
|
||||
|
||||
def test_unstructured_pdf_loader() -> None:
|
||||
def test_unstructured_pdf_loader_elements_mode() -> None:
|
||||
"""Test unstructured loader with various modes."""
|
||||
file_path = Path(__file__).parent.parent / "examples/hello.pdf"
|
||||
loader = UnstructuredPDFLoader(str(file_path), mode="elements")
|
||||
docs = loader.load()
|
||||
|
||||
assert len(docs) == 2
|
||||
|
||||
|
||||
def test_unstructured_pdf_loader_paged_mode() -> None:
|
||||
"""Test unstructured loader with various modes."""
|
||||
file_path = Path(__file__).parent.parent / "examples/layout-parser-paper.pdf"
|
||||
loader = UnstructuredPDFLoader(str(file_path), mode="paged")
|
||||
docs = loader.load()
|
||||
|
||||
assert len(docs) == 16
|
||||
|
||||
|
||||
def test_unstructured_pdf_loader_default_mode() -> None:
|
||||
"""Test unstructured loader."""
|
||||
file_path = Path(__file__).parent.parent / "examples/hello.pdf"
|
||||
loader = UnstructuredPDFLoader(str(file_path))
|
||||
|
||||
Reference in New Issue
Block a user