mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-03 10:12:33 +00:00
community[patch]: move pdf text tests to integration (#18746)
This commit is contained in:
parent
4a7d73b39d
commit
1beb84b061
@ -1,5 +1,6 @@
|
|||||||
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, cast
|
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, cast
|
||||||
|
|
||||||
|
from langchain_core._api.deprecation import deprecated
|
||||||
from langchain_core.callbacks import (
|
from langchain_core.callbacks import (
|
||||||
AsyncCallbackManagerForLLMRun,
|
AsyncCallbackManagerForLLMRun,
|
||||||
CallbackManagerForLLMRun,
|
CallbackManagerForLLMRun,
|
||||||
@ -70,6 +71,11 @@ def convert_messages_to_prompt_anthropic(
|
|||||||
return text.rstrip()
|
return text.rstrip()
|
||||||
|
|
||||||
|
|
||||||
|
@deprecated(
|
||||||
|
since="0.0.28",
|
||||||
|
removal="0.2",
|
||||||
|
alternative_import="langchain_anthropic.ChatAnthropic",
|
||||||
|
)
|
||||||
class ChatAnthropic(BaseChatModel, _AnthropicCommon):
|
class ChatAnthropic(BaseChatModel, _AnthropicCommon):
|
||||||
"""`Anthropic` chat large language models.
|
"""`Anthropic` chat large language models.
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@ from typing import (
|
|||||||
Optional,
|
Optional,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from langchain_core._api.deprecation import deprecated
|
||||||
from langchain_core.callbacks import (
|
from langchain_core.callbacks import (
|
||||||
AsyncCallbackManagerForLLMRun,
|
AsyncCallbackManagerForLLMRun,
|
||||||
CallbackManagerForLLMRun,
|
CallbackManagerForLLMRun,
|
||||||
@ -147,6 +148,11 @@ class _AnthropicCommon(BaseLanguageModel):
|
|||||||
return stop
|
return stop
|
||||||
|
|
||||||
|
|
||||||
|
@deprecated(
|
||||||
|
since="0.0.28",
|
||||||
|
removal="0.2",
|
||||||
|
alternative_import="langchain_anthropic.AnthropicLLM",
|
||||||
|
)
|
||||||
class Anthropic(LLM, _AnthropicCommon):
|
class Anthropic(LLM, _AnthropicCommon):
|
||||||
"""Anthropic large language models.
|
"""Anthropic large language models.
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
"""Tests for the various PDF parsers."""
|
"""Tests for the various PDF parsers."""
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
@ -110,3 +111,23 @@ def test_pdfplumber_parser() -> None:
|
|||||||
_assert_with_parser(PDFPlumberParser())
|
_assert_with_parser(PDFPlumberParser())
|
||||||
_assert_with_duplicate_parser(PDFPlumberParser())
|
_assert_with_duplicate_parser(PDFPlumberParser())
|
||||||
_assert_with_duplicate_parser(PDFPlumberParser(dedupe=True), dedupe=True)
|
_assert_with_duplicate_parser(PDFPlumberParser(dedupe=True), dedupe=True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_images_text_from_pdf_pypdfparser() -> None:
|
||||||
|
"""Test extract image from pdf and recognize text with rapid ocr - PyPDFParser"""
|
||||||
|
_assert_with_parser(PyPDFParser(extract_images=True))
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_images_text_from_pdf_pdfminerparser() -> None:
|
||||||
|
"""Test extract image from pdf and recognize text with rapid ocr - PDFMinerParser"""
|
||||||
|
_assert_with_parser(PDFMinerParser(extract_images=True))
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_images_text_from_pdf_pymupdfparser() -> None:
|
||||||
|
"""Test extract image from pdf and recognize text with rapid ocr - PyMuPDFParser"""
|
||||||
|
_assert_with_parser(PyMuPDFParser(extract_images=True))
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_images_text_from_pdf_pypdfium2parser() -> None:
|
||||||
|
"""Test extract image from pdf and recognize text with rapid ocr - PyPDFium2Parser""" # noqa: E501
|
||||||
|
_assert_with_parser(PyPDFium2Parser(extract_images=True))
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
"""Tests for the various PDF parsers."""
|
"""Tests for the various PDF parsers."""
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
|
|
||||||
@ -85,12 +86,3 @@ def test_pypdfium2_parser() -> None:
|
|||||||
"""Test PyPDFium2 parser."""
|
"""Test PyPDFium2 parser."""
|
||||||
# Does not follow defaults to split by page.
|
# Does not follow defaults to split by page.
|
||||||
_assert_with_parser(PyPDFium2Parser())
|
_assert_with_parser(PyPDFium2Parser())
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.requires("rapidocr_onnxruntime")
|
|
||||||
def test_extract_images_text_from_pdf() -> None:
|
|
||||||
"""Test extract image from pdf and recognize text with rapid ocr"""
|
|
||||||
_assert_with_parser(PyPDFParser(extract_images=True))
|
|
||||||
_assert_with_parser(PDFMinerParser(extract_images=True))
|
|
||||||
_assert_with_parser(PyMuPDFParser(extract_images=True))
|
|
||||||
_assert_with_parser(PyPDFium2Parser(extract_images=True))
|
|
||||||
|
Loading…
Reference in New Issue
Block a user