mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-01 09:04:03 +00:00
community[patch]: move pdf text tests to integration (#18746)
This commit is contained in:
parent
4a7d73b39d
commit
1beb84b061
@ -1,5 +1,6 @@
|
||||
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, cast
|
||||
|
||||
from langchain_core._api.deprecation import deprecated
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManagerForLLMRun,
|
||||
CallbackManagerForLLMRun,
|
||||
@ -70,6 +71,11 @@ def convert_messages_to_prompt_anthropic(
|
||||
return text.rstrip()
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.0.28",
|
||||
removal="0.2",
|
||||
alternative_import="langchain_anthropic.ChatAnthropic",
|
||||
)
|
||||
class ChatAnthropic(BaseChatModel, _AnthropicCommon):
|
||||
"""`Anthropic` chat large language models.
|
||||
|
||||
|
@ -11,6 +11,7 @@ from typing import (
|
||||
Optional,
|
||||
)
|
||||
|
||||
from langchain_core._api.deprecation import deprecated
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManagerForLLMRun,
|
||||
CallbackManagerForLLMRun,
|
||||
@ -147,6 +148,11 @@ class _AnthropicCommon(BaseLanguageModel):
|
||||
return stop
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.0.28",
|
||||
removal="0.2",
|
||||
alternative_import="langchain_anthropic.AnthropicLLM",
|
||||
)
|
||||
class Anthropic(LLM, _AnthropicCommon):
|
||||
"""Anthropic large language models.
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
"""Tests for the various PDF parsers."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
|
||||
@ -110,3 +111,23 @@ def test_pdfplumber_parser() -> None:
|
||||
_assert_with_parser(PDFPlumberParser())
|
||||
_assert_with_duplicate_parser(PDFPlumberParser())
|
||||
_assert_with_duplicate_parser(PDFPlumberParser(dedupe=True), dedupe=True)
|
||||
|
||||
|
||||
def test_extract_images_text_from_pdf_pypdfparser() -> None:
|
||||
"""Test extract image from pdf and recognize text with rapid ocr - PyPDFParser"""
|
||||
_assert_with_parser(PyPDFParser(extract_images=True))
|
||||
|
||||
|
||||
def test_extract_images_text_from_pdf_pdfminerparser() -> None:
|
||||
"""Test extract image from pdf and recognize text with rapid ocr - PDFMinerParser"""
|
||||
_assert_with_parser(PDFMinerParser(extract_images=True))
|
||||
|
||||
|
||||
def test_extract_images_text_from_pdf_pymupdfparser() -> None:
|
||||
"""Test extract image from pdf and recognize text with rapid ocr - PyMuPDFParser"""
|
||||
_assert_with_parser(PyMuPDFParser(extract_images=True))
|
||||
|
||||
|
||||
def test_extract_images_text_from_pdf_pypdfium2parser() -> None:
|
||||
"""Test extract image from pdf and recognize text with rapid ocr - PyPDFium2Parser""" # noqa: E501
|
||||
_assert_with_parser(PyPDFium2Parser(extract_images=True))
|
||||
|
@ -1,4 +1,5 @@
|
||||
"""Tests for the various PDF parsers."""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
|
||||
@ -85,12 +86,3 @@ def test_pypdfium2_parser() -> None:
|
||||
"""Test PyPDFium2 parser."""
|
||||
# Does not follow defaults to split by page.
|
||||
_assert_with_parser(PyPDFium2Parser())
|
||||
|
||||
|
||||
@pytest.mark.requires("rapidocr_onnxruntime")
|
||||
def test_extract_images_text_from_pdf() -> None:
|
||||
"""Test extract image from pdf and recognize text with rapid ocr"""
|
||||
_assert_with_parser(PyPDFParser(extract_images=True))
|
||||
_assert_with_parser(PDFMinerParser(extract_images=True))
|
||||
_assert_with_parser(PyMuPDFParser(extract_images=True))
|
||||
_assert_with_parser(PyPDFium2Parser(extract_images=True))
|
||||
|
Loading…
Reference in New Issue
Block a user