mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-14 05:56:40 +00:00
core, standard-tests: support PDF and audio input in Chat Completions format (#30979)
Chat models currently implement support for: - images in OpenAI Chat Completions format - other multimodal types (e.g., PDF and audio) in a cross-provider [standard format](https://python.langchain.com/docs/how_to/multimodal_inputs/) Here we update core to extend support to PDF and audio input in Chat Completions format. **If an OAI-format PDF or audio content block is passed into any chat model, it will be transformed to the LangChain standard format**. We assume that any chat model supporting OAI-format PDF or audio has implemented support for the standard format.
This commit is contained in:
@@ -455,3 +455,115 @@ def test_trace_images_in_openai_format() -> None:
|
||||
"url": "https://example.com/image.png",
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_extend_support_to_openai_multimodal_formats() -> None:
|
||||
"""Test that chat models normalize OpenAI file and audio inputs."""
|
||||
llm = ParrotFakeChatModel()
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "Hello"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/image.png"},
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg..."},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"filename": "draconomicon.pdf",
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {"file_id": "<file id>"},
|
||||
},
|
||||
{
|
||||
"type": "input_audio",
|
||||
"input_audio": {"data": "<base64 data>", "format": "wav"},
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
expected_content = [
|
||||
{"type": "text", "text": "Hello"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/image.png"},
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg..."},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 string>",
|
||||
"mime_type": "application/pdf",
|
||||
"filename": "draconomicon.pdf",
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 string>",
|
||||
"mime_type": "application/pdf",
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {"file_id": "<file id>"},
|
||||
},
|
||||
{
|
||||
"type": "audio",
|
||||
"source_type": "base64",
|
||||
"data": "<base64 data>",
|
||||
"mime_type": "audio/wav",
|
||||
},
|
||||
]
|
||||
response = llm.invoke(messages)
|
||||
assert response.content == expected_content
|
||||
|
||||
# Test no mutation
|
||||
assert messages[0]["content"] == [
|
||||
{"type": "text", "text": "Hello"},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/image.png"},
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg..."},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"filename": "draconomicon.pdf",
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {"file_id": "<file id>"},
|
||||
},
|
||||
{
|
||||
"type": "input_audio",
|
||||
"input_audio": {"data": "<base64 data>", "format": "wav"},
|
||||
},
|
||||
]
|
||||
|
Reference in New Issue
Block a user