community[patch]: move pdf text tests to integration (#18746)

2026-01-05 16:06:39 +00:00 · 2024-03-07 10:34:22 -08:00
parent 4a7d73b39d
commit 1beb84b061
4 changed files with 34 additions and 9 deletions
--- a/libs/community/langchain_community/chat_models/anthropic.py
+++ b/libs/community/langchain_community/chat_models/anthropic.py
@@ -1,5 +1,6 @@
 from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, cast

+from langchain_core._api.deprecation import deprecated
 from langchain_core.callbacks import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
@@ -70,6 +71,11 @@ def convert_messages_to_prompt_anthropic(
    return text.rstrip()


+@deprecated(
+    since="0.0.28",
+    removal="0.2",
+    alternative_import="langchain_anthropic.ChatAnthropic",
+)
 class ChatAnthropic(BaseChatModel, _AnthropicCommon):
    """`Anthropic` chat large language models.

--- a/libs/community/langchain_community/llms/anthropic.py
+++ b/libs/community/langchain_community/llms/anthropic.py
@@ -11,6 +11,7 @@ from typing import (
    Optional,
 )

+from langchain_core._api.deprecation import deprecated
 from langchain_core.callbacks import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
@@ -147,6 +148,11 @@ class _AnthropicCommon(BaseLanguageModel):
        return stop


+@deprecated(
+    since="0.0.28",
+    removal="0.2",
+    alternative_import="langchain_anthropic.AnthropicLLM",
+)
 class Anthropic(LLM, _AnthropicCommon):
    """Anthropic large language models.

--- a/libs/community/tests/integration_tests/document_loaders/parsers/test_pdf_parsers.py
+++ b/libs/community/tests/integration_tests/document_loaders/parsers/test_pdf_parsers.py
@@ -1,4 +1,5 @@
 """Tests for the various PDF parsers."""
+
 from pathlib import Path
 from typing import Iterator

@@ -110,3 +111,23 @@ def test_pdfplumber_parser() -> None:
    _assert_with_parser(PDFPlumberParser())
    _assert_with_duplicate_parser(PDFPlumberParser())
    _assert_with_duplicate_parser(PDFPlumberParser(dedupe=True), dedupe=True)
+
+
+def test_extract_images_text_from_pdf_pypdfparser() -> None:
+    """Test extract image from pdf and recognize text with rapid ocr - PyPDFParser"""
+    _assert_with_parser(PyPDFParser(extract_images=True))
+
+
+def test_extract_images_text_from_pdf_pdfminerparser() -> None:
+    """Test extract image from pdf and recognize text with rapid ocr - PDFMinerParser"""
+    _assert_with_parser(PDFMinerParser(extract_images=True))
+
+
+def test_extract_images_text_from_pdf_pymupdfparser() -> None:
+    """Test extract image from pdf and recognize text with rapid ocr - PyMuPDFParser"""
+    _assert_with_parser(PyMuPDFParser(extract_images=True))
+
+
+def test_extract_images_text_from_pdf_pypdfium2parser() -> None:
+    """Test extract image from pdf and recognize text with rapid ocr - PyPDFium2Parser"""  # noqa: E501
+    _assert_with_parser(PyPDFium2Parser(extract_images=True))
--- a/libs/community/tests/unit_tests/document_loaders/parsers/test_pdf_parsers.py
+++ b/libs/community/tests/unit_tests/document_loaders/parsers/test_pdf_parsers.py
@@ -1,4 +1,5 @@
 """Tests for the various PDF parsers."""
+
 from pathlib import Path
 from typing import Iterator

@@ -85,12 +86,3 @@ def test_pypdfium2_parser() -> None:
    """Test PyPDFium2 parser."""
    # Does not follow defaults to split by page.
    _assert_with_parser(PyPDFium2Parser())
-
-
-@pytest.mark.requires("rapidocr_onnxruntime")
-def test_extract_images_text_from_pdf() -> None:
-    """Test extract image from pdf and recognize text with rapid ocr"""
-    _assert_with_parser(PyPDFParser(extract_images=True))
-    _assert_with_parser(PDFMinerParser(extract_images=True))
-    _assert_with_parser(PyMuPDFParser(extract_images=True))
-    _assert_with_parser(PyPDFium2Parser(extract_images=True))