From 86428c63ac31999e31483e62b62df2ee9b4ab101 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Thu, 11 Jun 2026 00:51:50 -0400 Subject: [PATCH] fix(core,openai): normalize v1 streamed tool calls (#35983) OpenAI Chat Completions streaming has a v1 normalization gap when tool calls are streamed. When users opt into `output_version="v1"`, `.content_blocks` is expected to be the normalized cross-provider view of the message. For OpenAI Chat Completions streams, though, chunks still carry raw string `content` plus side-channel `tool_call_chunks` / `tool_calls`. Practically, an OpenAI stream chunk can look like this internally: ```python AIMessageChunk( content="", tool_call_chunks=[ { "name": "get_weather", "args": '{"location": "SF"}', "id": "call_123", "index": 0, "type": "tool_call_chunk", } ], response_metadata={"model_provider": "openai", "output_version": "v1"}, ) ``` That is not already-normalized v1 content like this: ```python AIMessageChunk( content=[ { "type": "tool_call_chunk", "name": "get_weather", "args": '{"location": "SF"}', "id": "call_123", "index": 0, } ], ) ``` Because `.content_blocks` currently short-circuits solely on `output_version="v1"`, it can return the raw string/empty list directly instead of running the OpenAI translator that incorporates `tool_call_chunks` / `tool_calls` into normalized v1 blocks. In practice, a streamed OpenAI tool call can be parsed successfully into `tool_calls`, but still be missing from the final aggregated `.content_blocks`. Downstream code that consumes the v1 block interface then sees no `tool_call` block and must know to inspect OpenAI-specific chunk fields instead. User story: > As a LangChain user streaming OpenAI Chat Completions with bound tools and `output_version="v1"`, I need the final aggregated message's `.content_blocks` to include normalized `tool_call` blocks, so that code written against the v1 content-block interface handles streamed tool calls consistently across providers. Expected final aggregated view: ```python message.content_blocks == [ { "type": "tool_call", "name": "get_weather", "args": {"location": "SF"}, "id": "call_123", } ] ``` Root causes: 1. The usage-only Chat Completions chunk uses `content=[]` in v1 mode while normal streaming chunks use `content=""`, creating inconsistent content types during chunk aggregation. 2. `AIMessage.content_blocks` and `AIMessageChunk.content_blocks` treat any `output_version="v1"` message as already-normalized, even when `content` is still raw string content from Chat Completions. 3. Content-bearing OpenAI stream chunks do not carry `output_version="v1"`, so the final merged chunk may not reliably take the v1 normalization path. Changes: - Keep usage-only Chat Completions chunks as `content=""` instead of overriding to `[]`, so streaming chunks merge consistently. - Propagate `output_version="v1"` to content-bearing chunks. - Only short-circuit v1 `.content_blocks` when `content` is already a list of blocks; otherwise fall through to the provider translator. - Add regression tests covering string-content v1 fallback, usage-only chunk content consistency, and streamed tool calls appearing as normalized final v1 blocks. --- libs/core/langchain_core/messages/ai.py | 18 +- .../core/tests/unit_tests/messages/test_ai.py | 66 ++++++ .../langchain_openai/chat_models/base.py | 11 +- .../tests/unit_tests/chat_models/test_base.py | 210 ++++++++++++++++++ 4 files changed, 302 insertions(+), 3 deletions(-) diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py index 58de711821d..213f0f1c22e 100644 --- a/libs/core/langchain_core/messages/ai.py +++ b/libs/core/langchain_core/messages/ai.py @@ -247,7 +247,12 @@ class AIMessage(BaseMessage): first before falling back to best-effort parsing. For details, see the property on `BaseMessage`. """ - if self.response_metadata.get("output_version") == "v1": + if self.response_metadata.get("output_version") == "v1" and isinstance( + self.content, list + ): + # Only short-circuit when content is a list (assumed under v1 to + # already hold ContentBlock dicts; the cast is unchecked). See + # AIMessageChunk.content_blocks for full rationale. return cast("list[types.ContentBlock]", self.content) model_provider = self.response_metadata.get("model_provider") @@ -440,7 +445,16 @@ class AIMessageChunk(AIMessage, BaseMessageChunk): @property def content_blocks(self) -> list[types.ContentBlock]: """Return standard, typed `ContentBlock` dicts from the message.""" - if self.response_metadata.get("output_version") == "v1": + if self.response_metadata.get("output_version") == "v1" and isinstance( + self.content, list + ): + # Only short-circuit when content is already a list of ContentBlock + # dicts. Some streaming implementations keep content as a string + # even when output_version="v1" is set (e.g., OpenAI Chat + # Completions), so it must fall through to the model_provider + # translator which builds ContentBlock dicts from tool_calls / + # tool_call_chunks. Without this guard, string content would be + # returned directly, silently dropping tool calls. return cast("list[types.ContentBlock]", self.content) model_provider = self.response_metadata.get("model_provider") diff --git a/libs/core/tests/unit_tests/messages/test_ai.py b/libs/core/tests/unit_tests/messages/test_ai.py index a9c732c99da..8e410165f9e 100644 --- a/libs/core/tests/unit_tests/messages/test_ai.py +++ b/libs/core/tests/unit_tests/messages/test_ai.py @@ -490,6 +490,72 @@ def test_content_blocks() -> None: ] +def test_content_blocks_v1_string_content_falls_through() -> None: + """Test that content_blocks falls through to translator when content is a string. + + When output_version="v1" is set but content is a string (as in Chat + Completions streaming), content_blocks must not short-circuit. It should + fall through to the model_provider translator so tool calls are included. + Covers both `AIMessage` and `AIMessageChunk`. + """ + # AIMessage with string content + tool_calls + v1 metadata + msg = AIMessage( + content="Hello", + tool_calls=[ + create_tool_call(name="foo", args={"a": 1}, id="tc_1"), + ], + response_metadata={ + "output_version": "v1", + "model_provider": "openai", + }, + ) + blocks = msg.content_blocks + assert isinstance(blocks, list) + # Should contain a text block and a tool_call block, not the raw string + types_found = {b["type"] for b in blocks} + assert "text" in types_found + assert "tool_call" in types_found + + # AIMessageChunk with string content + tool_call_chunks + v1 metadata + chunk = AIMessageChunk( + content="Hello", + tool_call_chunks=[ + create_tool_call_chunk(name="foo", args='{"a": 1}', id="tc_1", index=0), + ], + response_metadata={ + "output_version": "v1", + "model_provider": "openai", + }, + ) + blocks = chunk.content_blocks + assert isinstance(blocks, list) + types_found = {b["type"] for b in blocks} + assert "text" in types_found + assert "tool_call_chunk" in types_found + + +def test_content_blocks_v1_list_content_short_circuits() -> None: + """Test that content_blocks short-circuits when v1 content is already a list. + + The `isinstance(self.content, list)` guard must preserve the fast path: + when content is already a list of `ContentBlock` dicts, `content_blocks` + returns it verbatim (the same object) without routing through the + translator. Covers both `AIMessage` and `AIMessageChunk`. + """ + content: list = [ + {"type": "text", "text": "Hello"}, + {"type": "tool_call", "name": "foo", "args": {"a": 1}, "id": "tc_1"}, + ] + for cls in (AIMessage, AIMessageChunk): + msg = cls( + content=content, + response_metadata={"output_version": "v1", "model_provider": "openai"}, + ) + # Short-circuit returns the stored content object itself; the translator + # would build and return a new list instead. + assert msg.content_blocks is msg.content + + def test_content_blocks_reasoning_extraction() -> None: """Test best-effort reasoning extraction from `additional_kwargs`.""" message = AIMessage( diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index ba3464c21a2..41bb467f6de 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -1357,8 +1357,14 @@ class BaseChatOpenAI(BaseChatModel): message=default_chunk_class(content="", usage_metadata=usage_metadata), generation_info=base_generation_info, ) + # Keep content as "" (the default) rather than converting to []. + # Chat Completions content deltas are normalized to strings in + # _convert_delta_to_message_chunk. Starting with [] causes + # merge_content to silently drop string content (empty list is + # falsy, so no merge branch applies). The empty list also triggers + # the content_blocks isinstance(list) short-circuit, which would + # return [] and miss tool_call_chunks. if self.output_version == "v1": - generation_chunk.message.content = [] generation_chunk.message.response_metadata["output_version"] = "v1" return generation_chunk @@ -1389,6 +1395,9 @@ class BaseChatOpenAI(BaseChatModel): message_chunk.usage_metadata = usage_metadata message_chunk.response_metadata["model_provider"] = "openai" + # Propagate output_version so content_blocks can detect v1 mode. + if self.output_version == "v1": + message_chunk.response_metadata["output_version"] = "v1" return ChatGenerationChunk( message=message_chunk, generation_info=generation_info or None ) diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 2ccc8fd9742..f9ab9169058 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -24,6 +24,7 @@ from langchain_core.messages import ( SystemMessage, ToolCall, ToolMessage, + message_chunk_to_message, ) from langchain_core.messages import content as types from langchain_core.messages.ai import UsageMetadata @@ -1385,6 +1386,215 @@ def test_output_version_compat() -> None: assert llm._use_responses_api({}) is True +def test_convert_chunk_to_generation_chunk_v1_keeps_string_content() -> None: + """v1 streaming keeps content as '' (not []) and stamps output_version. + + Covers both the usage-only (empty-choices) chunk and a content-bearing + chunk carrying a tool-call delta; the latter pins the per-content-chunk + `output_version` propagation. + """ + llm = ChatOpenAI(model="gpt-4o", output_version="v1") + + # Empty-choices chunk (usage-only) + empty_chunk: dict[str, Any] = { + "id": "chatcmpl-test", + "object": "chat.completion.chunk", + "created": 0, + "model": "gpt-4o", + "choices": [], + "usage": {"prompt_tokens": 5, "completion_tokens": 3, "total_tokens": 8}, + } + gen = llm._convert_chunk_to_generation_chunk(empty_chunk, AIMessageChunk, None) + assert gen is not None + assert gen.message.content == "" # NOT [] + assert gen.message.response_metadata.get("output_version") == "v1" + + # Content-bearing chunk with tool_call delta + tool_chunk: dict[str, Any] = { + "id": "chatcmpl-test", + "object": "chat.completion.chunk", + "created": 0, + "model": "gpt-4o", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "index": 0, + "id": "call_abc", + "function": {"name": "get_weather", "arguments": ""}, + } + ], + }, + "logprobs": None, + "finish_reason": None, + } + ], + "usage": None, + } + gen = llm._convert_chunk_to_generation_chunk(tool_chunk, AIMessageChunk, None) + assert gen is not None + assert isinstance(gen.message.content, str) + assert gen.message.response_metadata.get("output_version") == "v1" + assert gen.message.response_metadata.get("model_provider") == "openai" + + +def test_v1_streaming_tool_calls_in_content_blocks() -> None: + """End-to-end: streaming chunks with tool calls produce correct content_blocks.""" + stream_chunks: list[dict[str, Any]] = [ + # Initial empty-choices chunk + { + "id": "chatcmpl-test", + "object": "chat.completion.chunk", + "created": 0, + "model": "gpt-4o", + "choices": [ + { + "index": 0, + "delta": {"role": "assistant", "content": ""}, + "logprobs": None, + "finish_reason": None, + } + ], + "usage": None, + }, + # Text token streamed before the tool call + { + "id": "chatcmpl-test", + "object": "chat.completion.chunk", + "created": 0, + "model": "gpt-4o", + "choices": [ + { + "index": 0, + "delta": {"content": "Let me check the weather."}, + "logprobs": None, + "finish_reason": None, + } + ], + "usage": None, + }, + # Tool call start + { + "id": "chatcmpl-test", + "object": "chat.completion.chunk", + "created": 0, + "model": "gpt-4o", + "choices": [ + { + "index": 0, + "delta": { + "tool_calls": [ + { + "index": 0, + "id": "call_abc", + "function": { + "name": "get_weather", + "arguments": '{"loc', + }, + } + ] + }, + "logprobs": None, + "finish_reason": None, + } + ], + "usage": None, + }, + # Tool call args continuation + { + "id": "chatcmpl-test", + "object": "chat.completion.chunk", + "created": 0, + "model": "gpt-4o", + "choices": [ + { + "index": 0, + "delta": { + "tool_calls": [ + { + "index": 0, + "function": {"arguments": 'ation": "SF"}'}, + } + ] + }, + "logprobs": None, + "finish_reason": None, + } + ], + "usage": None, + }, + # Finish + { + "id": "chatcmpl-test", + "object": "chat.completion.chunk", + "created": 0, + "model": "gpt-4o", + "choices": [ + { + "index": 0, + "delta": {}, + "logprobs": None, + "finish_reason": "tool_calls", + } + ], + "usage": None, + }, + # Usage chunk + { + "id": "chatcmpl-test", + "object": "chat.completion.chunk", + "created": 0, + "model": "gpt-4o", + "choices": [], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 5, + "total_tokens": 15, + }, + }, + ] + + llm = ChatOpenAI(model="gpt-4o", output_version="v1") + + aggregated: AIMessageChunk | None = None + for raw_chunk in stream_chunks: + gen = llm._convert_chunk_to_generation_chunk(raw_chunk, AIMessageChunk, None) + if gen is None: + continue + chunk = cast(AIMessageChunk, gen.message) + aggregated = chunk if aggregated is None else aggregated + chunk + + assert aggregated is not None + # Tool calls should be present + assert len(aggregated.tool_call_chunks) == 1 + assert aggregated.tool_call_chunks[0]["name"] == "get_weather" + + # While still a chunk, content_blocks should include both the streamed text + # and the in-progress tool_call_chunk (text deltas must survive alongside + # tool calls through the merge). + blocks = aggregated.content_blocks + block_types = {b["type"] for b in blocks} + assert "tool_call_chunk" in block_types + assert "text" in block_types + + # Once finalized into a non-chunk AIMessage, the in-progress tool_call_chunk + # resolves to a normalized v1 `tool_call` block with parsed args. This is + # the cross-provider view downstream consumers code against. + final = message_chunk_to_message(aggregated) + final_blocks = final.content_blocks + assert {"type": "text", "text": "Let me check the weather."} in final_blocks + assert { + "type": "tool_call", + "name": "get_weather", + "args": {"location": "SF"}, + "id": "call_abc", + } in final_blocks + + def test_verbosity_parameter_payload() -> None: """Test verbosity parameter is included in request payload for Responses API.""" llm = ChatOpenAI(model="gpt-5", verbosity="high", use_responses_api=True)