community[patch]: move pdf text tests to integration (#18746)

2025-08-03 10:12:33 +00:00 · 2024-03-07 10:34:22 -08:00 · 2024-03-07 10:34:22 -08:00 · 1beb84b061
commit 1beb84b061
parent 4a7d73b39d
4 changed files with 34 additions and 9 deletions
--- a/libs/community/langchain_community/chat_models/anthropic.py
+++ b/libs/community/langchain_community/chat_models/anthropic.py
@ -1,5 +1,6 @@
 from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, cast
 from langchain_core._api.deprecation import deprecated
 from langchain_core.callbacks import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
@ -70,6 +71,11 @@ def convert_messages_to_prompt_anthropic(
    return text.rstrip()
@deprecated(
    since="0.0.28",
    removal="0.2",
    alternative_import="langchain_anthropic.ChatAnthropic",
 )
 class ChatAnthropic(BaseChatModel, _AnthropicCommon):
    """`Anthropic` chat large language models.
--- a/libs/community/langchain_community/llms/anthropic.py
+++ b/libs/community/langchain_community/llms/anthropic.py
@ -11,6 +11,7 @@ from typing import (
    Optional,
 )
 from langchain_core._api.deprecation import deprecated
 from langchain_core.callbacks import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
@ -147,6 +148,11 @@ class _AnthropicCommon(BaseLanguageModel):
        return stop
@deprecated(
    since="0.0.28",
    removal="0.2",
    alternative_import="langchain_anthropic.AnthropicLLM",
 )
 class Anthropic(LLM, _AnthropicCommon):
    """Anthropic large language models.
--- a/libs/community/tests/integration_tests/document_loaders/parsers/test_pdf_parsers.py
+++ b/libs/community/tests/integration_tests/document_loaders/parsers/test_pdf_parsers.py
@ -1,4 +1,5 @@
 """Tests for the various PDF parsers."""
 from pathlib import Path
 from typing import Iterator
@ -110,3 +111,23 @@ def test_pdfplumber_parser() -> None:
    _assert_with_parser(PDFPlumberParser())
    _assert_with_duplicate_parser(PDFPlumberParser())
    _assert_with_duplicate_parser(PDFPlumberParser(dedupe=True), dedupe=True)
 def test_extract_images_text_from_pdf_pypdfparser() -> None:
    """Test extract image from pdf and recognize text with rapid ocr - PyPDFParser"""
    _assert_with_parser(PyPDFParser(extract_images=True))
 def test_extract_images_text_from_pdf_pdfminerparser() -> None:
    """Test extract image from pdf and recognize text with rapid ocr - PDFMinerParser"""
    _assert_with_parser(PDFMinerParser(extract_images=True))
 def test_extract_images_text_from_pdf_pymupdfparser() -> None:
    """Test extract image from pdf and recognize text with rapid ocr - PyMuPDFParser"""
    _assert_with_parser(PyMuPDFParser(extract_images=True))
 def test_extract_images_text_from_pdf_pypdfium2parser() -> None:
    """Test extract image from pdf and recognize text with rapid ocr - PyPDFium2Parser"""  # noqa: E501
    _assert_with_parser(PyPDFium2Parser(extract_images=True))
--- a/libs/community/tests/unit_tests/document_loaders/parsers/test_pdf_parsers.py
+++ b/libs/community/tests/unit_tests/document_loaders/parsers/test_pdf_parsers.py
@ -1,4 +1,5 @@
 """Tests for the various PDF parsers."""
 from pathlib import Path
 from typing import Iterator
@ -85,12 +86,3 @@ def test_pypdfium2_parser() -> None:
    """Test PyPDFium2 parser."""
    # Does not follow defaults to split by page.
    _assert_with_parser(PyPDFium2Parser())
@pytest.mark.requires("rapidocr_onnxruntime")
 def test_extract_images_text_from_pdf() -> None:
    """Test extract image from pdf and recognize text with rapid ocr"""
    _assert_with_parser(PyPDFParser(extract_images=True))
    _assert_with_parser(PDFMinerParser(extract_images=True))
    _assert_with_parser(PyMuPDFParser(extract_images=True))
    _assert_with_parser(PyPDFium2Parser(extract_images=True))