feat: (core, standard-tests) support PDF inputs in ToolMessages (#33183)

This commit is contained in:
ccurme
2025-10-01 10:16:16 -04:00
committed by GitHub
parent 34f8031bd9
commit 002d623f2d
5 changed files with 222 additions and 1 deletions

View File

@@ -82,6 +82,7 @@ TOOL_MESSAGE_BLOCK_TYPES = (
"search_result",
"custom_tool_call_output",
"document",
"file",
)

View File

@@ -41,6 +41,10 @@ class TestAnthropicStandard(ChatModelIntegrationTests):
def supports_image_tool_message(self) -> bool:
return True
@property
def supports_pdf_tool_message(self) -> bool:
return True
@property
def supports_anthropic_inputs(self) -> bool:
return True

View File

@@ -1,11 +1,13 @@
"""Standard LangChain interface tests for Responses API"""
import base64
from pathlib import Path
from typing import cast
import httpx
import pytest
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import AIMessage
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
from langchain_openai import ChatOpenAI
from tests.integration_tests.chat_models.test_base_standard import TestOpenAIStandard
@@ -52,6 +54,55 @@ class TestOpenAIResponses(TestOpenAIStandard):
input_ = "What was the 3rd highest building in 2000?"
return _invoke(llm, input_, stream)
@property
def supports_pdf_tool_message(self) -> bool:
# OpenAI requires a filename for PDF inputs
# For now, we test with filename in OpenAI-specific tests
return False
def test_openai_pdf_tool_messages(self, model: BaseChatModel) -> None:
"""Test that the model can process PDF inputs in ToolMessages."""
url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
tool_message = ToolMessage(
content=[
{
"type": "file",
"source_type": "base64",
"data": pdf_data,
"mime_type": "application/pdf",
"filename": "my-pdf", # specify filename
},
],
tool_call_id="1",
name="random_pdf",
)
messages = [
HumanMessage(
"Get a random PDF using the tool and relay the title verbatim."
),
AIMessage(
[],
tool_calls=[
{
"type": "tool_call",
"id": "1",
"name": "random_pdf",
"args": {},
}
],
),
tool_message,
]
def random_pdf() -> str:
"""Return a random PDF."""
return ""
_ = model.bind_tools([random_pdf]).invoke(messages)
def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage:
if stream:

View File

@@ -521,6 +521,39 @@ class ChatModelIntegrationTests(ChatModelTests):
def supports_image_tool_message(self) -> bool:
return False
.. dropdown:: supports_pdf_tool_message
Boolean property indicating whether the chat model supports ToolMessages
that include PDF content, i.e.,
.. code-block:: python
ToolMessage(
content=[
{
"type": "file",
"source_type": "base64",
"data": pdf_data,
"mime_type": "application/pdf",
},
],
tool_call_id="1",
name="random_pdf",
)
(standard format).
If set to ``True``, the chat model will be tested with message sequences that
include ToolMessages of this form.
Example:
.. code-block:: python
@property
def supports_pdf_tool_message(self) -> bool:
return False
.. dropdown:: supported_usage_metadata_details
Property controlling what usage metadata details are emitted in both invoke
@@ -2707,6 +2740,95 @@ class ChatModelIntegrationTests(ChatModelTests):
_ = model.bind_tools([random_image]).invoke(messages)
def test_pdf_tool_message(self, model: BaseChatModel) -> None:
"""Test that the model can process ToolMessages with PDF inputs.
This test should be skipped if the model does not support messages of the
form:
.. code-block:: python
ToolMessage(
content=[
{
"type": "file",
"source_type": "base64",
"data": pdf_data,
"mime_type": "application/pdf",
},
],
tool_call_id="1",
name="random_pdf",
)
containing PDF content blocks in standard format.
This test can be skipped by setting the ``supports_pdf_tool_message`` property
to False (see Configuration below).
.. dropdown:: Configuration
To disable this test, set ``supports_pdf_tool_message`` to False in your
test class:
.. code-block:: python
class TestMyChatModelIntegration(ChatModelIntegrationTests):
@property
def supports_pdf_tool_message(self) -> bool:
return False
.. dropdown:: Troubleshooting
If this test fails, check that the model can correctly handle messages
with PDF content blocks in ToolMessages, specifically base64-encoded
PDFs. Otherwise, set the ``supports_pdf_tool_message`` property to
False.
"""
if not self.supports_pdf_tool_message:
pytest.skip("Model does not support PDF tool message.")
url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
tool_message = ToolMessage(
content=[
{
"type": "file",
"source_type": "base64",
"data": pdf_data,
"mime_type": "application/pdf",
},
],
tool_call_id="1",
name="random_pdf",
)
messages = [
HumanMessage(
"Get a random PDF using the tool and relay the title verbatim."
),
AIMessage(
[],
tool_calls=[
{
"type": "tool_call",
"id": "1",
"name": "random_pdf",
"args": {},
}
],
),
tool_message,
]
def random_pdf() -> str:
"""Return a random PDF."""
return ""
_ = model.bind_tools([random_pdf]).invoke(messages)
def test_anthropic_inputs(self, model: BaseChatModel) -> None:
"""Test that model can process Anthropic-style message histories.

View File

@@ -231,6 +231,16 @@ class ChatModelTests(BaseStandardTests):
"""
return False
@property
def supports_pdf_tool_message(self) -> bool:
"""Supports PDF ToolMessages.
(bool) whether the chat model supports ToolMessages that include PDF
content.
"""
return False
@property
def enable_vcr_tests(self) -> bool:
"""(bool) whether to enable VCR tests for the chat model.
@@ -645,6 +655,39 @@ class ChatModelUnitTests(ChatModelTests):
def supports_image_tool_message(self) -> bool:
return False
.. dropdown:: supports_pdf_tool_message
Boolean property indicating whether the chat model supports ToolMessages
that include PDF content, i.e.,
.. code-block:: python
ToolMessage(
content=[
{
"type": "file",
"source_type": "base64",
"data": pdf_data,
"mime_type": "application/pdf",
},
],
tool_call_id="1",
name="random_pdf",
)
(standard format).
If set to ``True``, the chat model will be tested with message sequences that
include ToolMessages of this form.
Example:
.. code-block:: python
@property
def supports_pdf_tool_message(self) -> bool:
return False
.. dropdown:: supported_usage_metadata_details
Property controlling what usage metadata details are emitted in both ``invoke``