diff --git a/libs/core/langchain_core/messages/block_translators/openai.py b/libs/core/langchain_core/messages/block_translators/openai.py index 0681ec8398c..ee237379490 100644 --- a/libs/core/langchain_core/messages/block_translators/openai.py +++ b/libs/core/langchain_core/messages/block_translators/openai.py @@ -5,7 +5,7 @@ from __future__ import annotations import json import warnings from collections.abc import Iterable -from typing import TYPE_CHECKING, Any, Optional, Union, cast +from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast from langchain_core.language_models._utils import ( _is_openai_data_block, @@ -42,10 +42,23 @@ def convert_to_openai_image_block(block: dict[str, Any]) -> dict: raise ValueError(error_message) -def convert_to_openai_data_block(block: dict) -> dict: +def convert_to_openai_data_block( + block: dict, api: Literal["chat/completions", "responses"] = "chat/completions" +) -> dict: """Format standard data content block to format expected by OpenAI.""" if block["type"] == "image": - formatted_block = convert_to_openai_image_block(block) + chat_completions_block = convert_to_openai_image_block(block) + if api == "responses": + formatted_block = { + "type": "input_image", + "image_url": chat_completions_block["image_url"]["url"], + } + if chat_completions_block["image_url"].get("detail"): + formatted_block["detail"] = chat_completions_block["image_url"][ + "detail" + ] + else: + formatted_block = chat_completions_block elif block["type"] == "file": if "base64" in block or block.get("source_type") == "base64": @@ -68,13 +81,23 @@ def convert_to_openai_data_block(block: dict) -> dict: stacklevel=1, ) formatted_block = {"type": "file", "file": file} + if api == "responses": + formatted_block = {"type": "input_file", **formatted_block["file"]} elif "file_id" in block or block.get("source_type") == "id": # Handle v0 format: {"source_type": "id", "id": "...", ...} # Handle v1 format: {"file_id": "...", ...} file_id = block["id"] if "source_type" in block else block["file_id"] formatted_block = {"type": "file", "file": {"file_id": file_id}} + if api == "responses": + formatted_block = {"type": "input_file", **formatted_block["file"]} + elif "url" in block: + if api == "chat/completions": + error_msg = "OpenAI Chat Completions does not support file URLs." + raise ValueError(error_msg) + # Only supported by Responses API; return in that format + formatted_block = {"type": "input_file", "file_url": block["url"]} else: - error_msg = "Keys base64 or file_id required for file blocks." + error_msg = "Keys base64, url, or file_id required for file blocks." raise ValueError(error_msg) elif block["type"] == "audio": diff --git a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py index 873ca517dd6..6c4b04fca01 100644 --- a/libs/core/tests/unit_tests/messages/block_translators/test_openai.py +++ b/libs/core/tests/unit_tests/messages/block_translators/test_openai.py @@ -1,7 +1,12 @@ from typing import Optional +import pytest + from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage from langchain_core.messages import content as types +from langchain_core.messages.block_translators.openai import ( + convert_to_openai_data_block, +) from tests.unit_tests.language_models.chat_models.test_base import ( _content_blocks_equal_ignore_id, ) @@ -442,3 +447,132 @@ def test_compat_responses_v03() -> None: {"type": "reasoning", "reasoning": "reasoning text", "id": "rs_abc"} ] assert chunk.content_blocks == expected_content + + +def test_convert_to_openai_data_block() -> None: + # Chat completions + ## Image / url + block = { + "type": "image", + "url": "https://example.com/test.png", + } + expected = { + "type": "image_url", + "image_url": {"url": "https://example.com/test.png"}, + } + result = convert_to_openai_data_block(block) + assert result == expected + + ## Image / base64 + block = { + "type": "image", + "base64": "", + "mime_type": "image/png", + } + expected = { + "type": "image_url", + "image_url": {"url": "data:image/png;base64,"}, + } + result = convert_to_openai_data_block(block) + assert result == expected + + ## File / url + block = { + "type": "file", + "url": "https://example.com/test.pdf", + } + with pytest.raises(ValueError, match="does not support"): + result = convert_to_openai_data_block(block) + + ## File / base64 + block = { + "type": "file", + "base64": "", + "mime_type": "application/pdf", + "filename": "test.pdf", + } + expected = { + "type": "file", + "file": { + "file_data": "data:application/pdf;base64,", + "filename": "test.pdf", + }, + } + result = convert_to_openai_data_block(block) + assert result == expected + + ## File / file ID + block = { + "type": "file", + "file_id": "file-abc123", + } + expected = {"type": "file", "file": {"file_id": "file-abc123"}} + result = convert_to_openai_data_block(block) + assert result == expected + + ## Audio / base64 + block = { + "type": "audio", + "base64": "", + "mime_type": "audio/wav", + } + expected = { + "type": "input_audio", + "input_audio": {"data": "", "format": "wav"}, + } + result = convert_to_openai_data_block(block) + assert result == expected + + # Responses + ## Image / url + block = { + "type": "image", + "url": "https://example.com/test.png", + } + expected = {"type": "input_image", "image_url": "https://example.com/test.png"} + result = convert_to_openai_data_block(block, api="responses") + assert result == expected + + ## Image / base64 + block = { + "type": "image", + "base64": "", + "mime_type": "image/png", + } + expected = { + "type": "input_image", + "image_url": "data:image/png;base64,", + } + result = convert_to_openai_data_block(block, api="responses") + assert result == expected + + ## File / url + block = { + "type": "file", + "url": "https://example.com/test.pdf", + } + expected = {"type": "input_file", "file_url": "https://example.com/test.pdf"} + + ## File / base64 + block = { + "type": "file", + "base64": "", + "mime_type": "application/pdf", + "filename": "test.pdf", + } + expected = { + "type": "input_file", + "file_data": "data:application/pdf;base64,", + "filename": "test.pdf", + } + result = convert_to_openai_data_block(block, api="responses") + assert result == expected + + ## File / file ID + block = { + "type": "file", + "file_id": "file-abc123", + } + expected = {"type": "input_file", "file_id": "file-abc123"} + result = convert_to_openai_data_block(block, api="responses") + assert result == expected diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index 3d5c6282da2..8116630830b 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -206,7 +206,11 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage: return ChatMessage(content=_dict.get("content", ""), role=role, id=id_) # type: ignore[arg-type] -def _format_message_content(content: Any, responses_ai_msg: bool = False) -> Any: +def _format_message_content( + content: Any, + api: Literal["chat/completions", "responses"] = "chat/completions", + role: Optional[str] = None, +) -> Any: """Format message content.""" if content and isinstance(content, list): formatted_content = [] @@ -223,9 +227,9 @@ def _format_message_content(content: Any, responses_ai_msg: bool = False) -> Any and is_data_content_block(block) # Responses API messages handled separately in _compat (parsed into # image generation calls) - and not responses_ai_msg + and not (api == "responses" and str(role).lower().startswith("ai")) ): - formatted_content.append(convert_to_openai_data_block(block)) + formatted_content.append(convert_to_openai_data_block(block, api=api)) # Anthropic image blocks elif ( isinstance(block, dict) @@ -258,13 +262,12 @@ def _format_message_content(content: Any, responses_ai_msg: bool = False) -> Any def _convert_message_to_dict( - message: BaseMessage, responses_ai_msg: bool = False + message: BaseMessage, + api: Literal["chat/completions", "responses"] = "chat/completions", ) -> dict: """Convert a LangChain message to dictionary format expected by OpenAI.""" message_dict: dict[str, Any] = { - "content": _format_message_content( - message.content, responses_ai_msg=responses_ai_msg - ) + "content": _format_message_content(message.content, api=api, role=message.type) } if (name := message.name or message.additional_kwargs.get("name")) is not None: message_dict["name"] = name @@ -306,7 +309,7 @@ def _convert_message_to_dict( isinstance(block, dict) and block.get("type") == "audio" and (id_ := block.get("id")) - and not responses_ai_msg + and api != "responses" ): # openai doesn't support passing the data back - only the id # https://platform.openai.com/docs/guides/audio/multi-turn-conversations @@ -3702,7 +3705,7 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: for lc_msg in messages: if isinstance(lc_msg, AIMessage): lc_msg = _convert_from_v03_ai_message(lc_msg) - msg = _convert_message_to_dict(lc_msg, responses_ai_msg=True) + msg = _convert_message_to_dict(lc_msg, api="responses") if isinstance(msg.get("content"), list) and all( isinstance(block, dict) for block in msg["content"] ): @@ -3717,7 +3720,7 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list: ] msg["content"] = _convert_from_v1_to_responses(msg["content"], tcs) else: - msg = _convert_message_to_dict(lc_msg) + msg = _convert_message_to_dict(lc_msg, api="responses") # Get content from non-standard content blocks if isinstance(msg["content"], list): for i, block in enumerate(msg["content"]): diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py index d96b3e69d24..122bd7a6490 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_base_standard.py @@ -95,7 +95,7 @@ class TestOpenAIStandard(ChatModelIntegrationTests): message = HumanMessage( [ - {"type": "text", "text": "Summarize this document:"}, + {"type": "text", "text": "What is the document title, verbatim?"}, { "type": "file", "mime_type": "application/pdf", @@ -109,7 +109,7 @@ class TestOpenAIStandard(ChatModelIntegrationTests): # Test OpenAI Chat Completions format message = HumanMessage( [ - {"type": "text", "text": "Summarize this document:"}, + {"type": "text", "text": "What is the document title, verbatim?"}, { "type": "file", "file": { diff --git a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py index 4b6430e4ead..b33f3e56abc 100644 --- a/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py +++ b/libs/partners/openai/tests/integration_tests/chat_models/test_responses_standard.py @@ -5,7 +5,7 @@ from typing import cast import pytest from langchain_core.language_models import BaseChatModel -from langchain_core.messages import AIMessage +from langchain_core.messages import AIMessage, HumanMessage from langchain_openai import ChatOpenAI from tests.integration_tests.chat_models.test_base_standard import TestOpenAIStandard @@ -48,6 +48,29 @@ class TestOpenAIResponses(TestOpenAIStandard): input_ = "What was the 3rd highest building in 2000?" return _invoke(llm, input_, stream) + def test_openai_pdf_inputs(self, model: BaseChatModel) -> None: + """Test that the model can process PDF inputs.""" + super().test_openai_pdf_inputs(model) + # Responses API additionally supports files via URL + url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" + + message = HumanMessage( + [ + {"type": "text", "text": "What is the document title, verbatim?"}, + {"type": "file", "url": url}, + ] + ) + _ = model.invoke([message]) + + # Test OpenAI Responses format + message = HumanMessage( + [ + {"type": "text", "text": "What is the document title, verbatim?"}, + {"type": "input_file", "file_url": url}, + ] + ) + _ = model.invoke([message]) + def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage: if stream: