This commit is contained in:
Eugene Yurtsev 2025-05-29 16:41:44 -04:00
parent dd4fc8ab8f
commit abb00c1000
4 changed files with 1497 additions and 1421 deletions

View File

@ -118,6 +118,15 @@ global_ssl_context = ssl.create_default_context(cafile=certifi.where())
_FUNCTION_CALL_IDS_MAP_KEY = "__openai_function_call_ids__"
WellKnownTools = (
"file_search",
"web_search_preview",
"computer_use_preview",
"code_interpreter",
"mcp",
"image_generation",
)
def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
"""Convert a dictionary to a LangChain message.
@ -1487,13 +1496,7 @@ class BaseChatOpenAI(BaseChatModel):
"type": "function",
"function": {"name": tool_choice},
}
elif tool_choice in (
"file_search",
"web_search_preview",
"computer_use_preview",
"code_interpreter",
"mcp",
):
elif tool_choice in WellKnownTools:
tool_choice = {"type": tool_choice}
# 'any' is not natively supported by OpenAI API.
# We support 'any' since other models use this instead of 'required'.
@ -3050,6 +3053,11 @@ def _construct_responses_api_payload(
new_tools.append({"type": "function", **tool["function"]})
else:
new_tools.append(tool)
if tool["type"] == "image_generation" and "partial_images" in tool:
raise NotImplementedError(
"Partial image generation is not yet supported via the LangChain ChatOpenAI client. "
)
payload["tools"] = new_tools
if tool_choice := payload.pop("tool_choice", None):
# chat api: {"type": "function", "function": {"name": "..."}}
@ -3139,6 +3147,7 @@ def _pop_summary_index_from_reasoning(reasoning: dict) -> dict:
def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list:
"""Construct the input for the OpenAI Responses API."""
input_ = []
for lc_msg in messages:
msg = _convert_message_to_dict(lc_msg)
@ -3191,6 +3200,7 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list:
computer_calls = []
code_interpreter_calls = []
mcp_calls = []
image_generation_calls = []
tool_outputs = lc_msg.additional_kwargs.get("tool_outputs", [])
for tool_output in tool_outputs:
if tool_output.get("type") == "computer_call":
@ -3199,10 +3209,13 @@ def _construct_responses_api_input(messages: Sequence[BaseMessage]) -> list:
code_interpreter_calls.append(tool_output)
elif tool_output.get("type") == "mcp_call":
mcp_calls.append(tool_output)
elif tool_output.get("type") == "image_generation":
image_generation_calls.append(tool_output)
else:
pass
input_.extend(code_interpreter_calls)
input_.extend(mcp_calls)
input_.extend(image_generation_calls)
msg["content"] = msg.get("content") or []
if lc_msg.additional_kwargs.get("refusal"):
if isinstance(msg["content"], str):
@ -3489,6 +3502,7 @@ def _convert_responses_chunk_to_generation_chunk(
"mcp_call",
"mcp_list_tools",
"mcp_approval_request",
"image_generation_call",
):
additional_kwargs["tool_outputs"] = [
chunk.item.model_dump(exclude_none=True, mode="json")
@ -3516,6 +3530,9 @@ def _convert_responses_chunk_to_generation_chunk(
{"index": chunk.summary_index, "type": "summary_text", "text": ""}
]
}
elif chunk.type == "response.image_generation_call.partial_image":
# Partial images are not supported yet.
pass
elif chunk.type == "response.reasoning_summary_text.delta":
additional_kwargs["reasoning"] = {
"summary": [

View File

@ -7,7 +7,7 @@ authors = []
license = { text = "MIT" }
requires-python = ">=3.9"
dependencies = [
"langchain-core<1.0.0,>=0.3.61",
"langchain-core<1.0.0,>=0.3.63",
"openai<2.0.0,>=1.68.2",
"tiktoken<1,>=0.7",
]

View File

@ -452,3 +452,61 @@ def test_mcp_builtin() -> None:
_ = llm_with_tools.invoke(
[approval_message], previous_response_id=response.response_metadata["id"]
)
@pytest.mark.vcr()
def test_image_generation_streaming() -> None:
"""Test image generation streaming."""
llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True)
# Test invocation
tool = {
"type": "image_generation",
# For testing purposes let's keep the quality low, so the test runs faster.
"quality": "low",
}
llm_with_tools = llm.bind_tools([tool])
response = llm_with_tools.invoke("Make a picture of a fuzzy cat")
_check_response(response)
tool_output = response.additional_kwargs["tool_outputs"][0]
# Example tool output for an image
# {
# "background": "opaque",
# "id": "ig_683716a8ddf0819888572b20621c7ae4029ec8c11f8dacf8",
# "output_format": "png",
# "quality": "high",
# "revised_prompt": "A fluffy, fuzzy cat sitting calmly, with soft fur, bright "
# "eyes, and a cute, friendly expression. The background is "
# "simple and light to emphasize the cat's texture and "
# "fluffiness.",
# "size": "1024x1024",
# "status": "completed",
# "type": "image_generation_call",
# "result": # base64 encode image data
# }
expected_keys = {
"id",
"background",
"output_format",
"quality",
"result",
"revised_prompt",
"size",
"status",
"type",
}
assert set(tool_output.keys()).issubset(expected_keys)
llm = ChatOpenAI(model="gpt-4.1", use_responses_api=True)
full: Optional[BaseMessageChunk] = None
tool = {"type": "image_generation", "quality": "low"}
for chunk in llm.stream("Make a picture of a fuzzy cat", tools=[tool]):
assert isinstance(chunk, AIMessageChunk)
full = chunk if full is None else full + chunk
complete_ai_message = cast(AIMessageChunk, full)
# At the moment, the streaming API does not pick up annotations fully.
# So the following check is commented out.
# _check_response(complete_ai_message)
tool_output = complete_ai_message.additional_kwargs["tool_outputs"][0]
assert set(tool_output.keys()).issubset(expected_keys)

File diff suppressed because it is too large Load Diff