mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-21 06:33:41 +00:00
feat(openai): (v1) support pdfs passed via url in standard format (#32876)
This commit is contained in:
@@ -5,7 +5,7 @@ from __future__ import annotations
|
||||
import json
|
||||
import warnings
|
||||
from collections.abc import Iterable
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union, cast
|
||||
from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
|
||||
|
||||
from langchain_core.language_models._utils import (
|
||||
_is_openai_data_block,
|
||||
@@ -42,10 +42,23 @@ def convert_to_openai_image_block(block: dict[str, Any]) -> dict:
|
||||
raise ValueError(error_message)
|
||||
|
||||
|
||||
def convert_to_openai_data_block(block: dict) -> dict:
|
||||
def convert_to_openai_data_block(
|
||||
block: dict, api: Literal["chat/completions", "responses"] = "chat/completions"
|
||||
) -> dict:
|
||||
"""Format standard data content block to format expected by OpenAI."""
|
||||
if block["type"] == "image":
|
||||
formatted_block = convert_to_openai_image_block(block)
|
||||
chat_completions_block = convert_to_openai_image_block(block)
|
||||
if api == "responses":
|
||||
formatted_block = {
|
||||
"type": "input_image",
|
||||
"image_url": chat_completions_block["image_url"]["url"],
|
||||
}
|
||||
if chat_completions_block["image_url"].get("detail"):
|
||||
formatted_block["detail"] = chat_completions_block["image_url"][
|
||||
"detail"
|
||||
]
|
||||
else:
|
||||
formatted_block = chat_completions_block
|
||||
|
||||
elif block["type"] == "file":
|
||||
if "base64" in block or block.get("source_type") == "base64":
|
||||
@@ -68,13 +81,23 @@ def convert_to_openai_data_block(block: dict) -> dict:
|
||||
stacklevel=1,
|
||||
)
|
||||
formatted_block = {"type": "file", "file": file}
|
||||
if api == "responses":
|
||||
formatted_block = {"type": "input_file", **formatted_block["file"]}
|
||||
elif "file_id" in block or block.get("source_type") == "id":
|
||||
# Handle v0 format: {"source_type": "id", "id": "...", ...}
|
||||
# Handle v1 format: {"file_id": "...", ...}
|
||||
file_id = block["id"] if "source_type" in block else block["file_id"]
|
||||
formatted_block = {"type": "file", "file": {"file_id": file_id}}
|
||||
if api == "responses":
|
||||
formatted_block = {"type": "input_file", **formatted_block["file"]}
|
||||
elif "url" in block:
|
||||
if api == "chat/completions":
|
||||
error_msg = "OpenAI Chat Completions does not support file URLs."
|
||||
raise ValueError(error_msg)
|
||||
# Only supported by Responses API; return in that format
|
||||
formatted_block = {"type": "input_file", "file_url": block["url"]}
|
||||
else:
|
||||
error_msg = "Keys base64 or file_id required for file blocks."
|
||||
error_msg = "Keys base64, url, or file_id required for file blocks."
|
||||
raise ValueError(error_msg)
|
||||
|
||||
elif block["type"] == "audio":
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage
|
||||
from langchain_core.messages import content as types
|
||||
from langchain_core.messages.block_translators.openai import (
|
||||
convert_to_openai_data_block,
|
||||
)
|
||||
from tests.unit_tests.language_models.chat_models.test_base import (
|
||||
_content_blocks_equal_ignore_id,
|
||||
)
|
||||
@@ -442,3 +447,132 @@ def test_compat_responses_v03() -> None:
|
||||
{"type": "reasoning", "reasoning": "reasoning text", "id": "rs_abc"}
|
||||
]
|
||||
assert chunk.content_blocks == expected_content
|
||||
|
||||
|
||||
def test_convert_to_openai_data_block() -> None:
|
||||
# Chat completions
|
||||
## Image / url
|
||||
block = {
|
||||
"type": "image",
|
||||
"url": "https://example.com/test.png",
|
||||
}
|
||||
expected = {
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "https://example.com/test.png"},
|
||||
}
|
||||
result = convert_to_openai_data_block(block)
|
||||
assert result == expected
|
||||
|
||||
## Image / base64
|
||||
block = {
|
||||
"type": "image",
|
||||
"base64": "<base64 string>",
|
||||
"mime_type": "image/png",
|
||||
}
|
||||
expected = {
|
||||
"type": "image_url",
|
||||
"image_url": {"url": "data:image/png;base64,<base64 string>"},
|
||||
}
|
||||
result = convert_to_openai_data_block(block)
|
||||
assert result == expected
|
||||
|
||||
## File / url
|
||||
block = {
|
||||
"type": "file",
|
||||
"url": "https://example.com/test.pdf",
|
||||
}
|
||||
with pytest.raises(ValueError, match="does not support"):
|
||||
result = convert_to_openai_data_block(block)
|
||||
|
||||
## File / base64
|
||||
block = {
|
||||
"type": "file",
|
||||
"base64": "<base64 string>",
|
||||
"mime_type": "application/pdf",
|
||||
"filename": "test.pdf",
|
||||
}
|
||||
expected = {
|
||||
"type": "file",
|
||||
"file": {
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
"filename": "test.pdf",
|
||||
},
|
||||
}
|
||||
result = convert_to_openai_data_block(block)
|
||||
assert result == expected
|
||||
|
||||
## File / file ID
|
||||
block = {
|
||||
"type": "file",
|
||||
"file_id": "file-abc123",
|
||||
}
|
||||
expected = {"type": "file", "file": {"file_id": "file-abc123"}}
|
||||
result = convert_to_openai_data_block(block)
|
||||
assert result == expected
|
||||
|
||||
## Audio / base64
|
||||
block = {
|
||||
"type": "audio",
|
||||
"base64": "<base64 string>",
|
||||
"mime_type": "audio/wav",
|
||||
}
|
||||
expected = {
|
||||
"type": "input_audio",
|
||||
"input_audio": {"data": "<base64 string>", "format": "wav"},
|
||||
}
|
||||
result = convert_to_openai_data_block(block)
|
||||
assert result == expected
|
||||
|
||||
# Responses
|
||||
## Image / url
|
||||
block = {
|
||||
"type": "image",
|
||||
"url": "https://example.com/test.png",
|
||||
}
|
||||
expected = {"type": "input_image", "image_url": "https://example.com/test.png"}
|
||||
result = convert_to_openai_data_block(block, api="responses")
|
||||
assert result == expected
|
||||
|
||||
## Image / base64
|
||||
block = {
|
||||
"type": "image",
|
||||
"base64": "<base64 string>",
|
||||
"mime_type": "image/png",
|
||||
}
|
||||
expected = {
|
||||
"type": "input_image",
|
||||
"image_url": "data:image/png;base64,<base64 string>",
|
||||
}
|
||||
result = convert_to_openai_data_block(block, api="responses")
|
||||
assert result == expected
|
||||
|
||||
## File / url
|
||||
block = {
|
||||
"type": "file",
|
||||
"url": "https://example.com/test.pdf",
|
||||
}
|
||||
expected = {"type": "input_file", "file_url": "https://example.com/test.pdf"}
|
||||
|
||||
## File / base64
|
||||
block = {
|
||||
"type": "file",
|
||||
"base64": "<base64 string>",
|
||||
"mime_type": "application/pdf",
|
||||
"filename": "test.pdf",
|
||||
}
|
||||
expected = {
|
||||
"type": "input_file",
|
||||
"file_data": "data:application/pdf;base64,<base64 string>",
|
||||
"filename": "test.pdf",
|
||||
}
|
||||
result = convert_to_openai_data_block(block, api="responses")
|
||||
assert result == expected
|
||||
|
||||
## File / file ID
|
||||
block = {
|
||||
"type": "file",
|
||||
"file_id": "file-abc123",
|
||||
}
|
||||
expected = {"type": "input_file", "file_id": "file-abc123"}
|
||||
result = convert_to_openai_data_block(block, api="responses")
|
||||
assert result == expected
|
||||
|
||||
@@ -206,7 +206,11 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
|
||||
return ChatMessage(content=_dict.get("content", ""), role=role, id=id_) # type: ignore[arg-type]
|
||||
|
||||
|
||||
def _format_message_content(content: Any, responses_ai_msg: bool = False) -> Any:
|
||||
def _format_message_content(
|
||||
content: Any,
|
||||
api: Literal["chat/completions", "responses"] = "chat/completions",
|
||||
role: Optional[str] = None,
|
||||
) -> Any:
|
||||
"""Format message content."""
|
||||
if content and isinstance(content, list):
|
||||
formatted_content = []
|
||||
@@ -223,9 +227,9 @@ def _format_message_content(content: Any, responses_ai_msg: bool = False) -> Any
|
||||
and is_data_content_block(block)
|
||||
# Responses API messages handled separately in _compat (parsed into
|
||||
# image generation calls)
|
||||
and not responses_ai_msg
|
||||
and not (api == "responses" and str(role).lower().startswith("ai"))
|
||||
):
|
||||
formatted_content.append(convert_to_openai_data_block(block))
|
||||
formatted_content.append(convert_to_openai_data_block(block, api=api))
|
||||
# Anthropic image blocks
|
||||
elif (
|
||||
isinstance(block, dict)
|
||||
@@ -258,13 +262,12 @@ def _format_message_content(content: Any, responses_ai_msg: bool = False) -> Any
|
||||
|
||||
|
||||
def _convert_message_to_dict(
|
||||
message: BaseMessage, responses_ai_msg: bool = False
|
||||
message: BaseMessage,
|
||||
api: Literal["chat/completions", "responses"] = "chat/completions",
|
||||
) -> dict:
|
||||
"""Convert a LangChain message to dictionary format expected by OpenAI."""
|
||||
message_dict: dict[str, Any] = {
|
||||
"content": _format_message_content(
|
||||
message.content, responses_ai_msg=responses_ai_msg
|
||||
)
|
||||
"content": _format_message_content(message.content, api=api, role=message.type)
|
||||
}
|
||||
if (name := message.name or message.additional_kwargs.get("name")) is not None:
|
||||
message_dict["name"] = name
|
||||
@@ -306,7 +309,7 @@ def _convert_message_to_dict(
|
||||
isinstance(block, dict)
|
||||
and block.get("type") == "audio"
|
||||
and (id_ := block.get("id"))
|
||||
and not responses_ai_msg
|
||||
and api != "responses"
|
||||
):
|
||||
# openai doesn't support passing the data back - only the id
|
||||
# https://platform.openai.com/docs/guides/audio/multi-turn-conversations
|
||||
@@ -3702,7 +3705,7 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list:
|
||||
for lc_msg in messages:
|
||||
if isinstance(lc_msg, AIMessage):
|
||||
lc_msg = _convert_from_v03_ai_message(lc_msg)
|
||||
msg = _convert_message_to_dict(lc_msg, responses_ai_msg=True)
|
||||
msg = _convert_message_to_dict(lc_msg, api="responses")
|
||||
if isinstance(msg.get("content"), list) and all(
|
||||
isinstance(block, dict) for block in msg["content"]
|
||||
):
|
||||
@@ -3717,7 +3720,7 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list:
|
||||
]
|
||||
msg["content"] = _convert_from_v1_to_responses(msg["content"], tcs)
|
||||
else:
|
||||
msg = _convert_message_to_dict(lc_msg)
|
||||
msg = _convert_message_to_dict(lc_msg, api="responses")
|
||||
# Get content from non-standard content blocks
|
||||
if isinstance(msg["content"], list):
|
||||
for i, block in enumerate(msg["content"]):
|
||||
|
||||
@@ -95,7 +95,7 @@ class TestOpenAIStandard(ChatModelIntegrationTests):
|
||||
|
||||
message = HumanMessage(
|
||||
[
|
||||
{"type": "text", "text": "Summarize this document:"},
|
||||
{"type": "text", "text": "What is the document title, verbatim?"},
|
||||
{
|
||||
"type": "file",
|
||||
"mime_type": "application/pdf",
|
||||
@@ -109,7 +109,7 @@ class TestOpenAIStandard(ChatModelIntegrationTests):
|
||||
# Test OpenAI Chat Completions format
|
||||
message = HumanMessage(
|
||||
[
|
||||
{"type": "text", "text": "Summarize this document:"},
|
||||
{"type": "text", "text": "What is the document title, verbatim?"},
|
||||
{
|
||||
"type": "file",
|
||||
"file": {
|
||||
|
||||
@@ -5,7 +5,7 @@ from typing import cast
|
||||
|
||||
import pytest
|
||||
from langchain_core.language_models import BaseChatModel
|
||||
from langchain_core.messages import AIMessage
|
||||
from langchain_core.messages import AIMessage, HumanMessage
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from tests.integration_tests.chat_models.test_base_standard import TestOpenAIStandard
|
||||
@@ -48,6 +48,29 @@ class TestOpenAIResponses(TestOpenAIStandard):
|
||||
input_ = "What was the 3rd highest building in 2000?"
|
||||
return _invoke(llm, input_, stream)
|
||||
|
||||
def test_openai_pdf_inputs(self, model: BaseChatModel) -> None:
|
||||
"""Test that the model can process PDF inputs."""
|
||||
super().test_openai_pdf_inputs(model)
|
||||
# Responses API additionally supports files via URL
|
||||
url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
|
||||
|
||||
message = HumanMessage(
|
||||
[
|
||||
{"type": "text", "text": "What is the document title, verbatim?"},
|
||||
{"type": "file", "url": url},
|
||||
]
|
||||
)
|
||||
_ = model.invoke([message])
|
||||
|
||||
# Test OpenAI Responses format
|
||||
message = HumanMessage(
|
||||
[
|
||||
{"type": "text", "text": "What is the document title, verbatim?"},
|
||||
{"type": "input_file", "file_url": url},
|
||||
]
|
||||
)
|
||||
_ = model.invoke([message])
|
||||
|
||||
|
||||
def _invoke(llm: ChatOpenAI, input_: str, stream: bool) -> AIMessage:
|
||||
if stream:
|
||||
|
||||
Reference in New Issue
Block a user