From 649d82f20694e5dc45c83e4a6369f04a838fd074 Mon Sep 17 00:00:00 2001
From: Nick Hollon <nick.hollon@langchain.dev>
Date: Thu, 14 May 2026 11:11:30 -0700
Subject: [PATCH] fix(core): preserve reasoning blocks alongside tool_call in
 v3 stream (#37434)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #37420

---

`stream_events(version="v3")` (and the `astream_events` async twin)
silently dropped reasoning content from the final assembled `AIMessage`
whenever the same message also produced a tool_call. The bug reproduces
against Gemini 2.5 Pro with `include_thoughts=True`: reasoning streams
correctly through `ChatModelStream.reasoning`, but the persisted message
in the final graph state carries only the `tool_call` block.

## Root cause

`_iter_protocol_blocks` in the compat bridge groups per-chunk content
blocks by source-side identifier. When a provider doesn't supply an
`index` field on its content blocks — which the Google GenAI translator
does not for either `reasoning` or `tool_call` blocks — the bridge falls
back to positional `i` as the bucket key. Because Gemini typically emits
one block per chunk, every reasoning chunk and the later tool_call chunk
all key to `0`, and the type mismatch trips `_accumulate`'s
self-contained `else` branch. That branch clears accumulated reasoning
state and replaces it with the incoming tool_call, so reasoning never
reaches `content-block-finish`.

## Fix

When a block has no source-side `index`, key it by `("__lc_no_index__",
block_type, positional_i)` instead of bare `i`. Same-type chunks at the
same position still share a bucket and merge cleanly (streaming text and
reasoning unchanged); different-type chunks at the same position now
occupy distinct wire blocks and both reach `content-block-finish`.
Providers that supply explicit indices (Anthropic, OpenAI Responses) are
unaffected.

## Verification

Unit-tested at the compat-bridge layer for both sync
(`chunks_to_events`) and async (`achunks_to_events`) paths.

Verified live against Gemini 2.5 Pro `gemini-2.5-pro` with
`thinking_budget=2048`, `include_thoughts=True`, and a single
`get_weather` tool. Pre-fix:
`final_state.messages[tool_calling_ai_message].content == [{type:
tool_call, ...}]`. Post-fix: `[..., {type: reasoning, reasoning: "..."},
{type: tool_call, ...}]`, matching the shape `ainvoke` returns on the
same input.
---
 .../language_models/_compat_bridge.py         |  16 ++-
 .../language_models/test_compat_bridge.py     | 120 ++++++++++++++++++
 2 files changed, 135 insertions(+), 1 deletion(-)

diff --git a/libs/core/langchain_core/language_models/_compat_bridge.py b/libs/core/langchain_core/language_models/_compat_bridge.py
index 5c2cdf0ba0d..527bd652d4b 100644
--- a/libs/core/langchain_core/language_models/_compat_bridge.py
+++ b/libs/core/langchain_core/language_models/_compat_bridge.py
@@ -189,7 +189,21 @@ def _iter_protocol_blocks(msg: BaseMessage) -> list[tuple[Any, CompatBlock]]:
     for i, block in enumerate(raw):
         if not isinstance(block, dict):
             continue
-        key = block.get("index", i)
+        explicit_idx = block.get("index")
+        if explicit_idx is None:
+            # No source-side identity. Bucket by (sentinel, block type,
+            # positional `i`) so two blocks of different types at the
+            # same position across chunks (e.g. Gemini emitting a
+            # reasoning block in one chunk and a `tool_call` in the
+            # next, both at positional 0 because each chunk carries one
+            # block) get distinct wire blocks. Without this, the second
+            # type's incoming block hits `_accumulate`'s self-contained
+            # `else` branch and clobbers the first. Same-type chunks
+            # still share the bucket and merge cleanly, which is what
+            # streaming text / reasoning relies on.
+            key: Any = ("__lc_no_index__", block.get("type"), i)
+        else:
+            key = explicit_idx
         result.append((key, dict(block)))
 
     if not isinstance(msg, AIMessageChunk):
diff --git a/libs/core/tests/unit_tests/language_models/test_compat_bridge.py b/libs/core/tests/unit_tests/language_models/test_compat_bridge.py
index 27f1fc87ec5..e32591ef4e9 100644
--- a/libs/core/tests/unit_tests/language_models/test_compat_bridge.py
+++ b/libs/core/tests/unit_tests/language_models/test_compat_bridge.py
@@ -529,6 +529,126 @@ def test_chunks_to_events_no_provider_text_plus_tool_call() -> None:
     assert "tool_call" in types
 
 
+def test_chunks_to_events_reasoning_then_tool_call_no_index() -> None:
+    """Reasoning followed by a tool_call in separate no-index chunks survives.
+
+    Regression for langchain-ai/langchain#37420. Some providers (notably Gemini
+    via the `google_genai` translator) emit per-chunk content blocks without an
+    `index` field. The bridge's positional fallback keys reasoning and
+    tool_call chunks identically (both at position 0 within their own chunk),
+    so the second-arriving block previously overwrote the first in the
+    accumulator. End result: the final assembled `AIMessage` had only the
+    `tool_call` and the reasoning was silently dropped from `.content`.
+    """
+    chunks = [
+        ChatGenerationChunk(
+            message=AIMessageChunk(
+                content=[{"type": "reasoning", "reasoning": "First "}],
+                response_metadata={
+                    "output_version": "v1",
+                    "model_provider": "google_genai",
+                },
+            )
+        ),
+        ChatGenerationChunk(
+            message=AIMessageChunk(
+                content=[{"type": "reasoning", "reasoning": "thought."}],
+                response_metadata={
+                    "output_version": "v1",
+                    "model_provider": "google_genai",
+                },
+            )
+        ),
+        ChatGenerationChunk(
+            message=AIMessageChunk(
+                content=[
+                    {
+                        "type": "tool_call",
+                        "id": "tc1",
+                        "name": "get_weather",
+                        "args": {"city": "San Francisco"},
+                    }
+                ],
+                response_metadata={
+                    "output_version": "v1",
+                    "model_provider": "google_genai",
+                },
+            )
+        ),
+    ]
+
+    events = list(chunks_to_events(iter(chunks), message_id="msg-1"))
+    finish_blocks: list[Any] = [
+        e["content"] for e in events if e["event"] == "content-block-finish"
+    ]
+    finish_types = [b.get("type") for b in finish_blocks]
+    assert "reasoning" in finish_types, (
+        f"Reasoning block was dropped during chunk accumulation. "
+        f"Finish events saw types: {finish_types}"
+    )
+    assert "tool_call" in finish_types
+
+    reasoning_finish = next(b for b in finish_blocks if b.get("type") == "reasoning")
+    assert reasoning_finish["reasoning"] == "First thought."
+
+    tool_call_finish = next(b for b in finish_blocks if b.get("type") == "tool_call")
+    assert tool_call_finish["id"] == "tc1"
+    assert tool_call_finish["name"] == "get_weather"
+    assert tool_call_finish["args"] == {"city": "San Francisco"}
+
+
+@pytest.mark.asyncio
+async def test_achunks_to_events_reasoning_then_tool_call_no_index() -> None:
+    """Async twin of the no-index reasoning + tool_call regression."""
+    chunks = [
+        ChatGenerationChunk(
+            message=AIMessageChunk(
+                content=[{"type": "reasoning", "reasoning": "First "}],
+                response_metadata={
+                    "output_version": "v1",
+                    "model_provider": "google_genai",
+                },
+            )
+        ),
+        ChatGenerationChunk(
+            message=AIMessageChunk(
+                content=[{"type": "reasoning", "reasoning": "thought."}],
+                response_metadata={
+                    "output_version": "v1",
+                    "model_provider": "google_genai",
+                },
+            )
+        ),
+        ChatGenerationChunk(
+            message=AIMessageChunk(
+                content=[
+                    {
+                        "type": "tool_call",
+                        "id": "tc1",
+                        "name": "get_weather",
+                        "args": {"city": "San Francisco"},
+                    }
+                ],
+                response_metadata={
+                    "output_version": "v1",
+                    "model_provider": "google_genai",
+                },
+            )
+        ),
+    ]
+
+    events = [
+        event
+        async for event in achunks_to_events(_aiter_chunks(chunks), message_id="msg-1")
+    ]
+    finish_blocks: list[Any] = [
+        e["content"] for e in events if e["event"] == "content-block-finish"
+    ]
+    finish_types = [b.get("type") for b in finish_blocks]
+    assert "reasoning" in finish_types
+    assert "tool_call" in finish_types
+
+
 def test_chunks_to_events_reasoning_in_additional_kwargs() -> None:
     """Reasoning packed into additional_kwargs surfaces as a reasoning block."""
     chunks = [