mirror of
https://github.com/hwchase17/langchain.git
synced 2026-04-23 20:23:59 +00:00
fix(core): strip null id/name from tool-call-chunk deltas in compat bridge
Many provider integrations (notably Anthropic's `input_json_delta`
path) attach the tool-call `id` and `name` only to the first
`tool_use` chunk; subsequent per-chunk slices carry `id=None,
name=None` and just the fresh `args` segment. The compat bridge
forwarded those `None` values verbatim, producing wire payloads like
`{"type": "tool_call_chunk", "id": null, "name": null, "args": "..."}`.
Consumers that fold deltas via a naive `{...target, ...delta}` spread
(e.g. the langgraph-js SDK's `MessageAssembler.applyContentDelta`)
interpret those as "identifier reset to null" and lose the id/name
captured from `content-block-start`. Downstream extractors then drop
the chunk until the final `content-block-finish` arrives — visible to
end users as tool-call cards appearing all-at-once at the end of a
turn instead of streaming in incrementally (the Deep Agent example
rendering four subagents in a single flicker rather than one after
another).
Introduce `_to_protocol_delta_block` and route every
`content-block-delta` emission (sync / async chunk streams and the
`message_to_events` replay path) through it. For `tool_call_chunk`
and `server_tool_call_chunk` shapes, drop `id` / `name` keys when
they would serialize to `null`. This matches the wire shape produced
by langgraph-js's `toProtocolDeltaBlock`, where identifiers are only
surfaced when they carry a real value.
This commit is contained in:
@@ -202,6 +202,35 @@ def _should_emit_delta(block: CompatBlock) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _to_protocol_delta_block(block: CompatBlock) -> ContentBlock:
|
||||
"""Sanitize a per-chunk block for emission as a ``content-block-delta``.
|
||||
|
||||
Many provider integrations (notably Anthropic's ``input_json_delta``
|
||||
path) emit subsequent tool-call chunks with ``id`` and ``name`` set
|
||||
to ``None`` — the metadata only arrives on the first ``tool_use``
|
||||
chunk and is implicit for the rest. Forwarding those ``None``
|
||||
values to the wire produces ``"id": null, "name": null`` payloads
|
||||
that can clobber previously-observed identifiers on accumulating
|
||||
consumers (e.g. SDK message assemblers that fold deltas via
|
||||
``{...target, ...delta}`` spread).
|
||||
|
||||
Normalize by dropping ``id``/``name`` keys when they would serialize
|
||||
to ``null`` on tool-call-chunk-shaped deltas. This matches the wire
|
||||
shape produced by the JS ``toProtocolDeltaBlock`` in
|
||||
``langgraphjs``'s ``messages-v2`` pipeline, where id/name are only
|
||||
surfaced when they carry a real value.
|
||||
"""
|
||||
btype = block.get("type")
|
||||
if btype in ("tool_call_chunk", "server_tool_call_chunk"):
|
||||
cleaned = dict(block)
|
||||
if cleaned.get("id") is None:
|
||||
cleaned.pop("id", None)
|
||||
if cleaned.get("name") is None:
|
||||
cleaned.pop("name", None)
|
||||
return cast("ContentBlock", cleaned)
|
||||
return _to_protocol_block(block)
|
||||
|
||||
|
||||
def _accumulate(state: CompatBlock | None, delta: CompatBlock) -> CompatBlock:
|
||||
"""Merge a per-chunk delta slice into accumulated per-index state.
|
||||
|
||||
@@ -448,7 +477,7 @@ def chunks_to_events(
|
||||
yield ContentBlockDeltaData(
|
||||
event="content-block-delta",
|
||||
index=idx,
|
||||
content_block=_to_protocol_block(block),
|
||||
content_block=_to_protocol_delta_block(block),
|
||||
)
|
||||
state[idx] = _accumulate(state.get(idx), block)
|
||||
|
||||
@@ -510,7 +539,7 @@ async def achunks_to_events(
|
||||
yield ContentBlockDeltaData(
|
||||
event="content-block-delta",
|
||||
index=idx,
|
||||
content_block=_to_protocol_block(block),
|
||||
content_block=_to_protocol_delta_block(block),
|
||||
)
|
||||
state[idx] = _accumulate(state.get(idx), block)
|
||||
|
||||
@@ -574,7 +603,7 @@ def message_to_events(
|
||||
yield ContentBlockDeltaData(
|
||||
event="content-block-delta",
|
||||
index=idx,
|
||||
content_block=_to_protocol_block(block),
|
||||
content_block=_to_protocol_delta_block(block),
|
||||
)
|
||||
finalized = _finalize_block(block)
|
||||
if finalized.get("type") == "tool_call":
|
||||
|
||||
@@ -204,6 +204,92 @@ def test_chunks_to_events_tool_call_multichunk() -> None:
|
||||
assert cast("MessageFinishData", events[-1])["reason"] == "tool_use"
|
||||
|
||||
|
||||
def test_chunks_to_events_tool_call_delta_omits_null_id_name() -> None:
|
||||
"""Subsequent ``input_json_delta`` chunks must not serialize null id/name.
|
||||
|
||||
Many providers (notably Anthropic) only attach ``id`` and ``name`` to
|
||||
the very first ``tool_use`` chunk; every subsequent ``input_json_delta``
|
||||
chunk carries ``id=None, name=None``. Forwarding those ``None`` values
|
||||
on the wire produces ``"id": null, "name": null`` payloads that can
|
||||
clobber previously-observed identifiers on consumers that fold deltas
|
||||
via ``{...target, ...delta}`` spread (e.g. langgraph-js SDK's
|
||||
``MessageAssembler.applyContentDelta``). The bridge must drop those
|
||||
keys entirely so the delta only carries the newly-observed fields.
|
||||
"""
|
||||
chunks = [
|
||||
ChatGenerationChunk(
|
||||
message=AIMessageChunk(
|
||||
content="",
|
||||
id="msg-1",
|
||||
tool_call_chunks=[
|
||||
{
|
||||
"index": 0,
|
||||
"id": "tc1",
|
||||
"name": "search",
|
||||
"args": "",
|
||||
"type": "tool_call_chunk",
|
||||
}
|
||||
],
|
||||
)
|
||||
),
|
||||
ChatGenerationChunk(
|
||||
message=AIMessageChunk(
|
||||
content="",
|
||||
id="msg-1",
|
||||
tool_call_chunks=[
|
||||
{
|
||||
"index": 0,
|
||||
"id": None,
|
||||
"name": None,
|
||||
"args": '{"q":',
|
||||
"type": "tool_call_chunk",
|
||||
}
|
||||
],
|
||||
)
|
||||
),
|
||||
ChatGenerationChunk(
|
||||
message=AIMessageChunk(
|
||||
content="",
|
||||
id="msg-1",
|
||||
tool_call_chunks=[
|
||||
{
|
||||
"index": 0,
|
||||
"id": None,
|
||||
"name": None,
|
||||
"args": ' "test"}',
|
||||
"type": "tool_call_chunk",
|
||||
}
|
||||
],
|
||||
)
|
||||
),
|
||||
]
|
||||
|
||||
events = list(chunks_to_events(iter(chunks), message_id="msg-1"))
|
||||
deltas = [e for e in events if e["event"] == "content-block-delta"]
|
||||
# One delta per chunk (the first carries id/name, the remaining
|
||||
# two carry only args).
|
||||
assert len(deltas) == 3
|
||||
for delta in deltas:
|
||||
block = cast("dict", delta["content_block"])
|
||||
assert block["type"] == "tool_call_chunk"
|
||||
# ``None`` id/name must not appear on the wire — they would
|
||||
# otherwise null out the identifiers captured via
|
||||
# ``content-block-start``.
|
||||
assert "id" not in block or block["id"] is not None
|
||||
assert "name" not in block or block["name"] is not None
|
||||
|
||||
# Only the first delta carries the identifying id/name; subsequent
|
||||
# deltas omit those keys entirely so the wire payload can't
|
||||
# accidentally overwrite previously-observed values.
|
||||
first_block = cast("dict", deltas[0]["content_block"])
|
||||
assert first_block["id"] == "tc1"
|
||||
assert first_block["name"] == "search"
|
||||
for delta in deltas[1:]:
|
||||
block = cast("dict", delta["content_block"])
|
||||
assert "id" not in block
|
||||
assert "name" not in block
|
||||
|
||||
|
||||
def test_chunks_to_events_invalid_tool_call_keeps_stop_reason() -> None:
|
||||
"""Malformed tool-args become invalid_tool_call; finish_reason stays `stop`."""
|
||||
chunks = [
|
||||
|
||||
Reference in New Issue
Block a user