mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-13 13:36:15 +00:00
Feature: pdfplumber PDF loader with BaseBlobParser (#4552)
# Feature: pdfplumber PDF loader with BaseBlobParser * Adds pdfplumber as a PDF loader * Adds pdfplumber as a blob parser.
This commit is contained in:
@@ -6,6 +6,7 @@ from langchain.document_loaders.base import BaseBlobParser
|
||||
from langchain.document_loaders.blob_loaders import Blob
|
||||
from langchain.document_loaders.parsers.pdf import (
|
||||
PDFMinerParser,
|
||||
PDFPlumberParser,
|
||||
PyMuPDFParser,
|
||||
PyPDFium2Parser,
|
||||
PyPDFParser,
|
||||
@@ -78,3 +79,8 @@ def test_pypdfium2_parser() -> None:
|
||||
"""Test PyPDFium2 parser."""
|
||||
# Does not follow defaults to split by page.
|
||||
_assert_with_parser(PyPDFium2Parser())
|
||||
|
||||
|
||||
def test_pdfplumber_parser() -> None:
|
||||
"""Test PDFPlumber parser."""
|
||||
_assert_with_parser(PDFPlumberParser())
|
||||
|
@@ -8,4 +8,5 @@ def test_parsers_public_api_correct() -> None:
|
||||
"PDFMinerParser",
|
||||
"PyMuPDFParser",
|
||||
"PyPDFium2Parser",
|
||||
"PDFPlumberParser",
|
||||
}
|
||||
|
@@ -8,4 +8,5 @@ def test_parsers_public_api_correct() -> None:
|
||||
"PDFMinerParser",
|
||||
"PyMuPDFParser",
|
||||
"PyPDFium2Parser",
|
||||
"PDFPlumberParser",
|
||||
}
|
||||
|
Reference in New Issue
Block a user