feat(core): impute placeholder filenames for OpenAI file inputs (#36433)

2026-07-13 20:47:15 +00:00 · 2026-04-01 14:41:53 -04:00
parent 86238a775e
commit bdfd4462ac
4 changed files with 71 additions and 101 deletions
--- a/libs/core/langchain_core/messages/block_translators/openai.py
+++ b/libs/core/langchain_core/messages/block_translators/openai.py
@@ -103,11 +103,13 @@ def convert_to_openai_data_block(
                # Backward compat
                file["filename"] = extras["filename"]
            else:
-                # Can't infer filename
+                # Can't infer filename; set a placeholder default for compatibility.
+                file["filename"] = "LC_AUTOGENERATED"
                warnings.warn(
                    "OpenAI may require a filename for file uploads. Specify a filename"
                    " in the content block, e.g.: {'type': 'file', 'mime_type': "
-                    "'...', 'base64': '...', 'filename': 'my-file.pdf'}",
+                    "'...', 'base64': '...', 'filename': 'my-file.pdf'}. "
+                    "Using placeholder filename 'LC_AUTOGENERATED'.",
                    stacklevel=1,
                )
            formatted_block = {"type": "file", "file": file}
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py
@@ -1,13 +1,11 @@
 """Standard LangChain interface tests"""

-import base64
 from pathlib import Path
 from typing import Literal, cast

-import httpx
 import pytest
 from langchain_core.language_models import BaseChatModel
-from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.messages import AIMessage
 from langchain_tests.integration_tests import ChatModelIntegrationTests

 from langchain_openai import ChatOpenAI
@@ -84,45 +82,7 @@ class TestOpenAIStandard(ChatModelIntegrationTests):

    @property
    def supports_pdf_inputs(self) -> bool:
-        # OpenAI requires a filename for PDF inputs
-        # For now, we test with filename in OpenAI-specific tests
-        return False
-
-    @pytest.mark.flaky(retries=3, delay=1)
-    def test_openai_pdf_inputs(self, model: BaseChatModel) -> None:
-        """Test that the model can process PDF inputs."""
-        url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
-        pdf_data = base64.b64encode(httpx.get(url, timeout=10.0).content).decode(
-            "utf-8"
-        )
-
-        message = HumanMessage(
-            [
-                {"type": "text", "text": "What is the document title, verbatim?"},
-                {
-                    "type": "file",
-                    "mime_type": "application/pdf",
-                    "base64": pdf_data,
-                    "filename": "my-pdf",  # OpenAI requires a filename
-                },
-            ]
-        )
-        _ = model.invoke([message])
-
-        # Test OpenAI Chat Completions format
-        message = HumanMessage(
-            [
-                {"type": "text", "text": "What is the document title, verbatim?"},
-                {
-                    "type": "file",
-                    "file": {
-                        "filename": "test file.pdf",
-                        "file_data": f"data:application/pdf;base64,{pdf_data}",
-                    },
-                },
-            ]
-        )
-        _ = model.invoke([message])
+        return True


 def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage:
--- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py
+++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py
@@ -1,13 +1,11 @@
 """Standard LangChain interface tests for Responses API"""

-import base64
 from pathlib import Path
 from typing import cast

-import httpx
 import pytest
 from langchain_core.language_models import BaseChatModel
-from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
+from langchain_core.messages import AIMessage, HumanMessage

 from langchain_openai import ChatOpenAI
 from tests.integration_tests.chat_models.test_base_standard import TestOpenAIStandard
@@ -28,6 +26,10 @@ class TestOpenAIResponses(TestOpenAIStandard):
    def supports_image_tool_message(self) -> bool:
        return True

+    @property
+    def supports_pdf_tool_message(self) -> bool:
+        return True
+
    @pytest.mark.xfail(reason="Unsupported.")
    def test_stop_sequence(self, model: BaseChatModel) -> None:
        super().test_stop_sequence(model)
@@ -57,7 +59,6 @@ class TestOpenAIResponses(TestOpenAIStandard):
    @pytest.mark.flaky(retries=3, delay=1)
    def test_openai_pdf_inputs(self, model: BaseChatModel) -> None:
        """Test that the model can process PDF inputs."""
-        super().test_openai_pdf_inputs(model)
        # Responses API additionally supports files via URL
        url = "https://www.berkshirehathaway.com/letters/2024ltr.pdf"

@@ -78,56 +79,6 @@ class TestOpenAIResponses(TestOpenAIStandard):
        )
        _ = model.invoke([message])

-    @property
-    def supports_pdf_tool_message(self) -> bool:
-        # OpenAI requires a filename for PDF inputs
-        # For now, we test with filename in OpenAI-specific tests
-        return False
-
-    def test_openai_pdf_tool_messages(self, model: BaseChatModel) -> None:
-        """Test that the model can process PDF inputs in `ToolMessage` objects."""
-        url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
-        pdf_data = base64.b64encode(httpx.get(url, timeout=10.0).content).decode(
-            "utf-8"
-        )
-
-        tool_message = ToolMessage(
-            content_blocks=[
-                {
-                    "type": "file",
-                    "base64": pdf_data,
-                    "mime_type": "application/pdf",
-                    "extras": {"filename": "my-pdf"},  # specify filename
-                },
-            ],
-            tool_call_id="1",
-            name="random_pdf",
-        )
-
-        messages = [
-            HumanMessage(
-                "Get a random PDF using the tool and relay the title verbatim."
-            ),
-            AIMessage(
-                [],
-                tool_calls=[
-                    {
-                        "type": "tool_call",
-                        "id": "1",
-                        "name": "random_pdf",
-                        "args": {},
-                    }
-                ],
-            ),
-            tool_message,
-        ]
-
-        def random_pdf() -> str:
-            """Return a random PDF."""
-            return ""
-
-        _ = model.bind_tools([random_pdf]).invoke(messages)
-

 def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage:
    if stream:
--- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
+++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py
@@ -854,17 +854,19 @@ def test_format_message_content() -> None:
    for content in contents:
        assert expected == _format_message_content([content])

-    # Test warn if PDF is missing a filename
+    # Test warn if PDF is missing a filename and that we add a default filename
    pdf_block = {
        "type": "file",
        "base64": "<base64 data>",
        "mime_type": "application/pdf",
    }
    expected = [
-        # N.B. this format is invalid for OpenAI
        {
            "type": "file",
-            "file": {"file_data": "data:application/pdf;base64,<base64 data>"},
+            "file": {
+                "file_data": "data:application/pdf;base64,<base64 data>",
+                "filename": "LC_AUTOGENERATED",
+            },
        }
    ]
    with pytest.warns(match="filename"):
@@ -3530,6 +3532,61 @@ def test_context_overflow_error_backwards_compatibility() -> None:
    assert isinstance(exc_info.value, ContextOverflowError)


+def test_get_request_payload_responses_api_input_file_blocks_passthrough() -> None:
+    llm = ChatOpenAI(model="gpt-5", use_responses_api=True)
+    payload = llm._get_request_payload(
+        [
+            HumanMessage(
+                content=[
+                    {
+                        "type": "input_text",
+                        "text": "Analyze the letter and summarize key points.",
+                    },
+                    {
+                        "type": "input_file",
+                        "file_url": "https://www.berkshirehathaway.com/letters/2024ltr.pdf",
+                    },
+                    {
+                        "type": "input_file",
+                        "file_id": "file-6F2ksmvXxt4VdoqmHRw6kL",
+                    },
+                    {
+                        "type": "input_file",
+                        "filename": "draconomicon.pdf",
+                        "file_data": "data:application/pdf;base64,Zm9v",
+                    },
+                ]
+            )
+        ]
+    )
+
+    assert payload["input"] == [
+        {
+            "type": "message",
+            "role": "user",
+            "content": [
+                {
+                    "type": "input_text",
+                    "text": "Analyze the letter and summarize key points.",
+                },
+                {
+                    "type": "input_file",
+                    "file_url": "https://www.berkshirehathaway.com/letters/2024ltr.pdf",
+                },
+                {
+                    "type": "input_file",
+                    "file_id": "file-6F2ksmvXxt4VdoqmHRw6kL",
+                },
+                {
+                    "type": "input_file",
+                    "filename": "draconomicon.pdf",
+                    "file_data": "data:application/pdf;base64,Zm9v",
+                },
+            ],
+        }
+    ]
+
+
 def test_tool_search_passthrough() -> None:
    """Test that tool_search dict is passed through as a built-in tool."""
    llm = ChatOpenAI(model="gpt-4o")