core, standard-tests: support PDF and audio input in Chat Completions format (#30979)

Chat models currently implement support for: - images in OpenAI Chat Completions format - other multimodal types (e.g., PDF and audio) in a cross-provider [standard format](https://python.langchain.com/docs/how_to/multimodal_inputs/) Here we update core to extend support to PDF and audio input in Chat Completions format. **If an OAI-format PDF or audio content block is passed into any chat model, it will be transformed to the LangChain standard format**. We assume that any chat model supporting OAI-format PDF or audio has implemented support for the standard format.
2025-09-04 12:39:32 +00:00 · 2025-04-23 14:32:51 -04:00
parent d4fc734250
commit faef3e5d50
5 changed files with 305 additions and 4 deletions
--- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py
+++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py
@@ -2036,6 +2036,24 @@ class ChatModelIntegrationTests(ChatModelTests):
        )
        _ = model.invoke([message])

+        # Test OpenAI Chat Completions format
+        message = HumanMessage(
+            [
+                {
+                    "type": "text",
+                    "text": "Summarize this document:",
+                },
+                {
+                    "type": "file",
+                    "file": {
+                        "filename": "test file.pdf",
+                        "file_data": f"data:application/pdf;base64,{pdf_data}",
+                    },
+                },
+            ]
+        )
+        _ = model.invoke([message])
+
    def test_audio_inputs(self, model: BaseChatModel) -> None:
        """Test that the model can process audio inputs.

@@ -2093,6 +2111,21 @@ class ChatModelIntegrationTests(ChatModelTests):
        )
        _ = model.invoke([message])

+        # Test OpenAI Chat Completions format
+        message = HumanMessage(
+            [
+                {
+                    "type": "text",
+                    "text": "Describe this audio:",
+                },
+                {
+                    "type": "input_audio",
+                    "input_audio": {"data": audio_data, "format": "wav"},
+                },
+            ]
+        )
+        _ = model.invoke([message])
+
    def test_image_inputs(self, model: BaseChatModel) -> None:
        """Test that the model can process image inputs.