community[patch]: move pdf text tests to integration (#18746)

This commit is contained in:
Erick Friis
2024-03-07 10:34:22 -08:00
committed by GitHub
parent 4a7d73b39d
commit 1beb84b061
4 changed files with 34 additions and 9 deletions

View File

@@ -1,4 +1,5 @@
"""Tests for the various PDF parsers."""
from pathlib import Path
from typing import Iterator
@@ -85,12 +86,3 @@ def test_pypdfium2_parser() -> None:
"""Test PyPDFium2 parser."""
# Does not follow defaults to split by page.
_assert_with_parser(PyPDFium2Parser())
@pytest.mark.requires("rapidocr_onnxruntime")
def test_extract_images_text_from_pdf() -> None:
"""Test extract image from pdf and recognize text with rapid ocr"""
_assert_with_parser(PyPDFParser(extract_images=True))
_assert_with_parser(PDFMinerParser(extract_images=True))
_assert_with_parser(PyMuPDFParser(extract_images=True))
_assert_with_parser(PyPDFium2Parser(extract_images=True))