Compare commits

...

1 Commits

Author SHA1 Message Date
Christian Bromann
bd8ab5520b fix(core): strip null id/name from tool-call-chunk deltas in compat bridge
Many provider integrations (notably Anthropic's `input_json_delta`
path) attach the tool-call `id` and `name` only to the first
`tool_use` chunk; subsequent per-chunk slices carry `id=None,
name=None` and just the fresh `args` segment. The compat bridge
forwarded those `None` values verbatim, producing wire payloads like
`{"type": "tool_call_chunk", "id": null, "name": null, "args": "..."}`.

Consumers that fold deltas via a naive `{...target, ...delta}` spread
(e.g. the langgraph-js SDK's `MessageAssembler.applyContentDelta`)
interpret those as "identifier reset to null" and lose the id/name
captured from `content-block-start`. Downstream extractors then drop
the chunk until the final `content-block-finish` arrives — visible to
end users as tool-call cards appearing all-at-once at the end of a
turn instead of streaming in incrementally (the Deep Agent example
rendering four subagents in a single flicker rather than one after
another).

Introduce `_to_protocol_delta_block` and route every
`content-block-delta` emission (sync / async chunk streams and the
`message_to_events` replay path) through it. For `tool_call_chunk`
and `server_tool_call_chunk` shapes, drop `id` / `name` keys when
they would serialize to `null`. This matches the wire shape produced
by langgraph-js's `toProtocolDeltaBlock`, where identifiers are only
surfaced when they carry a real value.
2026-04-22 18:40:36 -07:00
2 changed files with 118 additions and 3 deletions

View File

@@ -202,6 +202,35 @@ def _should_emit_delta(block: CompatBlock) -> bool:
return False
def _to_protocol_delta_block(block: CompatBlock) -> ContentBlock:
"""Sanitize a per-chunk block for emission as a ``content-block-delta``.
Many provider integrations (notably Anthropic's ``input_json_delta``
path) emit subsequent tool-call chunks with ``id`` and ``name`` set
to ``None`` — the metadata only arrives on the first ``tool_use``
chunk and is implicit for the rest. Forwarding those ``None``
values to the wire produces ``"id": null, "name": null`` payloads
that can clobber previously-observed identifiers on accumulating
consumers (e.g. SDK message assemblers that fold deltas via
``{...target, ...delta}`` spread).
Normalize by dropping ``id``/``name`` keys when they would serialize
to ``null`` on tool-call-chunk-shaped deltas. This matches the wire
shape produced by the JS ``toProtocolDeltaBlock`` in
``langgraphjs``'s ``messages-v2`` pipeline, where id/name are only
surfaced when they carry a real value.
"""
btype = block.get("type")
if btype in ("tool_call_chunk", "server_tool_call_chunk"):
cleaned = dict(block)
if cleaned.get("id") is None:
cleaned.pop("id", None)
if cleaned.get("name") is None:
cleaned.pop("name", None)
return cast("ContentBlock", cleaned)
return _to_protocol_block(block)
def _accumulate(state: CompatBlock | None, delta: CompatBlock) -> CompatBlock:
"""Merge a per-chunk delta slice into accumulated per-index state.
@@ -448,7 +477,7 @@ def chunks_to_events(
yield ContentBlockDeltaData(
event="content-block-delta",
index=idx,
content_block=_to_protocol_block(block),
content_block=_to_protocol_delta_block(block),
)
state[idx] = _accumulate(state.get(idx), block)
@@ -510,7 +539,7 @@ async def achunks_to_events(
yield ContentBlockDeltaData(
event="content-block-delta",
index=idx,
content_block=_to_protocol_block(block),
content_block=_to_protocol_delta_block(block),
)
state[idx] = _accumulate(state.get(idx), block)
@@ -574,7 +603,7 @@ def message_to_events(
yield ContentBlockDeltaData(
event="content-block-delta",
index=idx,
content_block=_to_protocol_block(block),
content_block=_to_protocol_delta_block(block),
)
finalized = _finalize_block(block)
if finalized.get("type") == "tool_call":

View File

@@ -204,6 +204,92 @@ def test_chunks_to_events_tool_call_multichunk() -> None:
assert cast("MessageFinishData", events[-1])["reason"] == "tool_use"
def test_chunks_to_events_tool_call_delta_omits_null_id_name() -> None:
"""Subsequent ``input_json_delta`` chunks must not serialize null id/name.
Many providers (notably Anthropic) only attach ``id`` and ``name`` to
the very first ``tool_use`` chunk; every subsequent ``input_json_delta``
chunk carries ``id=None, name=None``. Forwarding those ``None`` values
on the wire produces ``"id": null, "name": null`` payloads that can
clobber previously-observed identifiers on consumers that fold deltas
via ``{...target, ...delta}`` spread (e.g. langgraph-js SDK's
``MessageAssembler.applyContentDelta``). The bridge must drop those
keys entirely so the delta only carries the newly-observed fields.
"""
chunks = [
ChatGenerationChunk(
message=AIMessageChunk(
content="",
id="msg-1",
tool_call_chunks=[
{
"index": 0,
"id": "tc1",
"name": "search",
"args": "",
"type": "tool_call_chunk",
}
],
)
),
ChatGenerationChunk(
message=AIMessageChunk(
content="",
id="msg-1",
tool_call_chunks=[
{
"index": 0,
"id": None,
"name": None,
"args": '{"q":',
"type": "tool_call_chunk",
}
],
)
),
ChatGenerationChunk(
message=AIMessageChunk(
content="",
id="msg-1",
tool_call_chunks=[
{
"index": 0,
"id": None,
"name": None,
"args": ' "test"}',
"type": "tool_call_chunk",
}
],
)
),
]
events = list(chunks_to_events(iter(chunks), message_id="msg-1"))
deltas = [e for e in events if e["event"] == "content-block-delta"]
# One delta per chunk (the first carries id/name, the remaining
# two carry only args).
assert len(deltas) == 3
for delta in deltas:
block = cast("dict", delta["content_block"])
assert block["type"] == "tool_call_chunk"
# ``None`` id/name must not appear on the wire — they would
# otherwise null out the identifiers captured via
# ``content-block-start``.
assert "id" not in block or block["id"] is not None
assert "name" not in block or block["name"] is not None
# Only the first delta carries the identifying id/name; subsequent
# deltas omit those keys entirely so the wire payload can't
# accidentally overwrite previously-observed values.
first_block = cast("dict", deltas[0]["content_block"])
assert first_block["id"] == "tc1"
assert first_block["name"] == "search"
for delta in deltas[1:]:
block = cast("dict", delta["content_block"])
assert "id" not in block
assert "name" not in block
def test_chunks_to_events_invalid_tool_call_keeps_stop_reason() -> None:
"""Malformed tool-args become invalid_tool_call; finish_reason stays `stop`."""
chunks = [