feat(core): impute placeholder filenames for OpenAI file inputs (#36433)

This commit is contained in:
ccurme
2026-04-01 14:41:53 -04:00
committed by GitHub
parent 86238a775e
commit bdfd4462ac
4 changed files with 71 additions and 101 deletions

View File

@@ -103,11 +103,13 @@ def convert_to_openai_data_block(
# Backward compat
file["filename"] = extras["filename"]
else:
# Can't infer filename
# Can't infer filename; set a placeholder default for compatibility.
file["filename"] = "LC_AUTOGENERATED"
warnings.warn(
"OpenAI may require a filename for file uploads. Specify a filename"
" in the content block, e.g.: {'type': 'file', 'mime_type': "
"'...', 'base64': '...', 'filename': 'my-file.pdf'}",
"'...', 'base64': '...', 'filename': 'my-file.pdf'}. "
"Using placeholder filename 'LC_AUTOGENERATED'.",
stacklevel=1,
)
formatted_block = {"type": "file", "file": file}

View File

@@ -1,13 +1,11 @@
"""Standard LangChain interface tests"""
import base64
from pathlib import Path
from typing import Literal, cast
import httpx
import pytest
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.messages import AIMessage
from langchain_tests.integration_tests import ChatModelIntegrationTests
from langchain_openai import ChatOpenAI
@@ -84,45 +82,7 @@ class TestOpenAIStandard(ChatModelIntegrationTests):
@property
def supports_pdf_inputs(self) -> bool:
# OpenAI requires a filename for PDF inputs
# For now, we test with filename in OpenAI-specific tests
return False
@pytest.mark.flaky(retries=3, delay=1)
def test_openai_pdf_inputs(self, model: BaseChatModel) -> None:
"""Test that the model can process PDF inputs."""
url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
pdf_data = base64.b64encode(httpx.get(url, timeout=10.0).content).decode(
"utf-8"
)
message = HumanMessage(
[
{"type": "text", "text": "What is the document title, verbatim?"},
{
"type": "file",
"mime_type": "application/pdf",
"base64": pdf_data,
"filename": "my-pdf", # OpenAI requires a filename
},
]
)
_ = model.invoke([message])
# Test OpenAI Chat Completions format
message = HumanMessage(
[
{"type": "text", "text": "What is the document title, verbatim?"},
{
"type": "file",
"file": {
"filename": "test file.pdf",
"file_data": f"data:application/pdf;base64,{pdf_data}",
},
},
]
)
_ = model.invoke([message])
return True
def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage:

View File

@@ -1,13 +1,11 @@
"""Standard LangChain interface tests for Responses API"""
import base64
from pathlib import Path
from typing import cast
import httpx
import pytest
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
from langchain_core.messages import AIMessage, HumanMessage
from langchain_openai import ChatOpenAI
from tests.integration_tests.chat_models.test_base_standard import TestOpenAIStandard
@@ -28,6 +26,10 @@ class TestOpenAIResponses(TestOpenAIStandard):
def supports_image_tool_message(self) -> bool:
return True
@property
def supports_pdf_tool_message(self) -> bool:
return True
@pytest.mark.xfail(reason="Unsupported.")
def test_stop_sequence(self, model: BaseChatModel) -> None:
super().test_stop_sequence(model)
@@ -57,7 +59,6 @@ class TestOpenAIResponses(TestOpenAIStandard):
@pytest.mark.flaky(retries=3, delay=1)
def test_openai_pdf_inputs(self, model: BaseChatModel) -> None:
"""Test that the model can process PDF inputs."""
super().test_openai_pdf_inputs(model)
# Responses API additionally supports files via URL
url = "https://www.berkshirehathaway.com/letters/2024ltr.pdf"
@@ -78,56 +79,6 @@ class TestOpenAIResponses(TestOpenAIStandard):
)
_ = model.invoke([message])
@property
def supports_pdf_tool_message(self) -> bool:
# OpenAI requires a filename for PDF inputs
# For now, we test with filename in OpenAI-specific tests
return False
def test_openai_pdf_tool_messages(self, model: BaseChatModel) -> None:
"""Test that the model can process PDF inputs in `ToolMessage` objects."""
url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
pdf_data = base64.b64encode(httpx.get(url, timeout=10.0).content).decode(
"utf-8"
)
tool_message = ToolMessage(
content_blocks=[
{
"type": "file",
"base64": pdf_data,
"mime_type": "application/pdf",
"extras": {"filename": "my-pdf"}, # specify filename
},
],
tool_call_id="1",
name="random_pdf",
)
messages = [
HumanMessage(
"Get a random PDF using the tool and relay the title verbatim."
),
AIMessage(
[],
tool_calls=[
{
"type": "tool_call",
"id": "1",
"name": "random_pdf",
"args": {},
}
],
),
tool_message,
]
def random_pdf() -> str:
"""Return a random PDF."""
return ""
_ = model.bind_tools([random_pdf]).invoke(messages)
def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage:
if stream:

View File

@@ -854,17 +854,19 @@ def test_format_message_content() -> None:
for content in contents:
assert expected == _format_message_content([content])
# Test warn if PDF is missing a filename
# Test warn if PDF is missing a filename and that we add a default filename
pdf_block = {
"type": "file",
"base64": "<base64 data>",
"mime_type": "application/pdf",
}
expected = [
# N.B. this format is invalid for OpenAI
{
"type": "file",
"file": {"file_data": "data:application/pdf;base64,<base64 data>"},
"file": {
"file_data": "data:application/pdf;base64,<base64 data>",
"filename": "LC_AUTOGENERATED",
},
}
]
with pytest.warns(match="filename"):
@@ -3530,6 +3532,61 @@ def test_context_overflow_error_backwards_compatibility() -> None:
assert isinstance(exc_info.value, ContextOverflowError)
def test_get_request_payload_responses_api_input_file_blocks_passthrough() -> None:
llm = ChatOpenAI(model="gpt-5", use_responses_api=True)
payload = llm._get_request_payload(
[
HumanMessage(
content=[
{
"type": "input_text",
"text": "Analyze the letter and summarize key points.",
},
{
"type": "input_file",
"file_url": "https://www.berkshirehathaway.com/letters/2024ltr.pdf",
},
{
"type": "input_file",
"file_id": "file-6F2ksmvXxt4VdoqmHRw6kL",
},
{
"type": "input_file",
"filename": "draconomicon.pdf",
"file_data": "data:application/pdf;base64,Zm9v",
},
]
)
]
)
assert payload["input"] == [
{
"type": "message",
"role": "user",
"content": [
{
"type": "input_text",
"text": "Analyze the letter and summarize key points.",
},
{
"type": "input_file",
"file_url": "https://www.berkshirehathaway.com/letters/2024ltr.pdf",
},
{
"type": "input_file",
"file_id": "file-6F2ksmvXxt4VdoqmHRw6kL",
},
{
"type": "input_file",
"filename": "draconomicon.pdf",
"file_data": "data:application/pdf;base64,Zm9v",
},
],
}
]
def test_tool_search_passthrough() -> None:
"""Test that tool_search dict is passed through as a built-in tool."""
llm = ChatOpenAI(model="gpt-4o")