core, standard-tests: support PDF and audio input in Chat Completions format (#30979)

Chat models currently implement support for:
- images in OpenAI Chat Completions format
- other multimodal types (e.g., PDF and audio) in a cross-provider
[standard
format](https://python.langchain.com/docs/how_to/multimodal_inputs/)

Here we update core to extend support to PDF and audio input in Chat
Completions format. **If an OAI-format PDF or audio content block is
passed into any chat model, it will be transformed to the LangChain
standard format**. We assume that any chat model supporting OAI-format
PDF or audio has implemented support for the standard format.
This commit is contained in:
ccurme
2025-04-23 14:32:51 -04:00
committed by GitHub
parent d4fc734250
commit faef3e5d50
5 changed files with 305 additions and 4 deletions

View File

@@ -2036,6 +2036,24 @@ class ChatModelIntegrationTests(ChatModelTests):
)
_ = model.invoke([message])
# Test OpenAI Chat Completions format
message = HumanMessage(
[
{
"type": "text",
"text": "Summarize this document:",
},
{
"type": "file",
"file": {
"filename": "test file.pdf",
"file_data": f"data:application/pdf;base64,{pdf_data}",
},
},
]
)
_ = model.invoke([message])
def test_audio_inputs(self, model: BaseChatModel) -> None:
"""Test that the model can process audio inputs.
@@ -2093,6 +2111,21 @@ class ChatModelIntegrationTests(ChatModelTests):
)
_ = model.invoke([message])
# Test OpenAI Chat Completions format
message = HumanMessage(
[
{
"type": "text",
"text": "Describe this audio:",
},
{
"type": "input_audio",
"input_audio": {"data": audio_data, "format": "wav"},
},
]
)
_ = model.invoke([message])
def test_image_inputs(self, model: BaseChatModel) -> None:
"""Test that the model can process image inputs.