mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-07 22:11:51 +00:00
community[patch]: move pdf text tests to integration (#18746)
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
"""Tests for the various PDF parsers."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
|
||||
@@ -85,12 +86,3 @@ def test_pypdfium2_parser() -> None:
|
||||
"""Test PyPDFium2 parser."""
|
||||
# Does not follow defaults to split by page.
|
||||
_assert_with_parser(PyPDFium2Parser())
|
||||
|
||||
|
||||
@pytest.mark.requires("rapidocr_onnxruntime")
|
||||
def test_extract_images_text_from_pdf() -> None:
|
||||
"""Test extract image from pdf and recognize text with rapid ocr"""
|
||||
_assert_with_parser(PyPDFParser(extract_images=True))
|
||||
_assert_with_parser(PDFMinerParser(extract_images=True))
|
||||
_assert_with_parser(PyMuPDFParser(extract_images=True))
|
||||
_assert_with_parser(PyPDFium2Parser(extract_images=True))
|
||||
|
Reference in New Issue
Block a user