From bdfd4462ac89f909fce97663ab17fc312544c17e Mon Sep 17 00:00:00 2001 From: ccurme Date: Wed, 1 Apr 2026 14:41:53 -0400 Subject: [PATCH] feat(core): impute placeholder filenames for OpenAI file inputs (#36433) --- .../messages/block_translators/openai.py | 6 +- .../chat_models/test_base_standard.py | 44 +------------ .../chat_models/test_responses_standard.py | 59 ++--------------- .../tests/unit_tests/chat_models/test_base.py | 63 ++++++++++++++++++- 4 files changed, 71 insertions(+), 101 deletions(-) diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py index 61d4e3c3d0a..be32c444911 100644 --- a/libs/core/langchain_core/messages/block_translators/openai.py +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -103,11 +103,13 @@ def convert_to_openai_data_block( # Backward compat file["filename"] = extras["filename"] else: - # Can't infer filename + # Can't infer filename; set a placeholder default for compatibility. + file["filename"] = "LC_AUTOGENERATED" warnings.warn( "OpenAI may require a filename for file uploads. Specify a filename" " in the content block, e.g.: {'type': 'file', 'mime_type': " - "'...', 'base64': '...', 'filename': 'my-file.pdf'}", + "'...', 'base64': '...', 'filename': 'my-file.pdf'}. " + "Using placeholder filename 'LC_AUTOGENERATED'.", stacklevel=1, ) formatted_block = {"type": "file", "file": file} diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py index 291d72c4a0d..fb4cfa35b68 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py @@ -1,13 +1,11 @@ """Standard LangChain interface tests""" -import base64 from pathlib import Path from typing import Literal, cast -import httpx import pytest from langchain_core.language_models import BaseChatModel -from langchain_core.messages import AIMessage, HumanMessage +from langchain_core.messages import AIMessage from langchain_tests.integration_tests import ChatModelIntegrationTests from langchain_openai import ChatOpenAI @@ -84,45 +82,7 @@ class TestOpenAIStandard(ChatModelIntegrationTests): @property def supports_pdf_inputs(self) -> bool: - # OpenAI requires a filename for PDF inputs - # For now, we test with filename in OpenAI-specific tests - return False - - @pytest.mark.flaky(retries=3, delay=1) - def test_openai_pdf_inputs(self, model: BaseChatModel) -> None: - """Test that the model can process PDF inputs.""" - url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" - pdf_data = base64.b64encode(httpx.get(url, timeout=10.0).content).decode( - "utf-8" - ) - - message = HumanMessage( - [ - {"type": "text", "text": "What is the document title, verbatim?"}, - { - "type": "file", - "mime_type": "application/pdf", - "base64": pdf_data, - "filename": "my-pdf", # OpenAI requires a filename - }, - ] - ) - _ = model.invoke([message]) - - # Test OpenAI Chat Completions format - message = HumanMessage( - [ - {"type": "text", "text": "What is the document title, verbatim?"}, - { - "type": "file", - "file": { - "filename": "test file.pdf", - "file_data": f"data:application/pdf;base64,{pdf_data}", - }, - }, - ] - ) - _ = model.invoke([message]) + return True def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage: diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py index cee734f0f0f..14383b5ac6d 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py @@ -1,13 +1,11 @@ """Standard LangChain interface tests for Responses API""" -import base64 from pathlib import Path from typing import cast -import httpx import pytest from langchain_core.language_models import BaseChatModel -from langchain_core.messages import AIMessage, HumanMessage, ToolMessage +from langchain_core.messages import AIMessage, HumanMessage from langchain_openai import ChatOpenAI from tests.integration_tests.chat_models.test_base_standard import TestOpenAIStandard @@ -28,6 +26,10 @@ class TestOpenAIResponses(TestOpenAIStandard): def supports_image_tool_message(self) -> bool: return True + @property + def supports_pdf_tool_message(self) -> bool: + return True + @pytest.mark.xfail(reason="Unsupported.") def test_stop_sequence(self, model: BaseChatModel) -> None: super().test_stop_sequence(model) @@ -57,7 +59,6 @@ class TestOpenAIResponses(TestOpenAIStandard): @pytest.mark.flaky(retries=3, delay=1) def test_openai_pdf_inputs(self, model: BaseChatModel) -> None: """Test that the model can process PDF inputs.""" - super().test_openai_pdf_inputs(model) # Responses API additionally supports files via URL url = "https://www.berkshirehathaway.com/letters/2024ltr.pdf" @@ -78,56 +79,6 @@ class TestOpenAIResponses(TestOpenAIStandard): ) _ = model.invoke([message]) - @property - def supports_pdf_tool_message(self) -> bool: - # OpenAI requires a filename for PDF inputs - # For now, we test with filename in OpenAI-specific tests - return False - - def test_openai_pdf_tool_messages(self, model: BaseChatModel) -> None: - """Test that the model can process PDF inputs in `ToolMessage` objects.""" - url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" - pdf_data = base64.b64encode(httpx.get(url, timeout=10.0).content).decode( - "utf-8" - ) - - tool_message = ToolMessage( - content_blocks=[ - { - "type": "file", - "base64": pdf_data, - "mime_type": "application/pdf", - "extras": {"filename": "my-pdf"}, # specify filename - }, - ], - tool_call_id="1", - name="random_pdf", - ) - - messages = [ - HumanMessage( - "Get a random PDF using the tool and relay the title verbatim." - ), - AIMessage( - [], - tool_calls=[ - { - "type": "tool_call", - "id": "1", - "name": "random_pdf", - "args": {}, - } - ], - ), - tool_message, - ] - - def random_pdf() -> str: - """Return a random PDF.""" - return "" - - _ = model.bind_tools([random_pdf]).invoke(messages) - def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage: if stream: diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 291fee686b1..adbceddde5f 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -854,17 +854,19 @@ def test_format_message_content() -> None: for content in contents: assert expected == _format_message_content([content]) - # Test warn if PDF is missing a filename + # Test warn if PDF is missing a filename and that we add a default filename pdf_block = { "type": "file", "base64": "", "mime_type": "application/pdf", } expected = [ - # N.B. this format is invalid for OpenAI { "type": "file", - "file": {"file_data": "data:application/pdf;base64,"}, + "file": { + "file_data": "data:application/pdf;base64,", + "filename": "LC_AUTOGENERATED", + }, } ] with pytest.warns(match="filename"): @@ -3530,6 +3532,61 @@ def test_context_overflow_error_backwards_compatibility() -> None: assert isinstance(exc_info.value, ContextOverflowError) +def test_get_request_payload_responses_api_input_file_blocks_passthrough() -> None: + llm = ChatOpenAI(model="gpt-5", use_responses_api=True) + payload = llm._get_request_payload( + [ + HumanMessage( + content=[ + { + "type": "input_text", + "text": "Analyze the letter and summarize key points.", + }, + { + "type": "input_file", + "file_url": "https://www.berkshirehathaway.com/letters/2024ltr.pdf", + }, + { + "type": "input_file", + "file_id": "file-6F2ksmvXxt4VdoqmHRw6kL", + }, + { + "type": "input_file", + "filename": "draconomicon.pdf", + "file_data": "data:application/pdf;base64,Zm9v", + }, + ] + ) + ] + ) + + assert payload["input"] == [ + { + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": "Analyze the letter and summarize key points.", + }, + { + "type": "input_file", + "file_url": "https://www.berkshirehathaway.com/letters/2024ltr.pdf", + }, + { + "type": "input_file", + "file_id": "file-6F2ksmvXxt4VdoqmHRw6kL", + }, + { + "type": "input_file", + "filename": "draconomicon.pdf", + "file_data": "data:application/pdf;base64,Zm9v", + }, + ], + } + ] + + def test_tool_search_passthrough() -> None: """Test that tool_search dict is passed through as a built-in tool.""" llm = ChatOpenAI(model="gpt-4o")