feat(core): add content-block-centric streaming (v2) (#36834)

This commit is contained in:
Nick Hollon
2026-04-24 11:36:17 -04:00
committed by GitHub
parent 889a45b664
commit 9ce72eba9f
46 changed files with 8367 additions and 96 deletions

View File

@@ -6,7 +6,7 @@ import asyncio
import json
import os
from base64 import b64encode
from typing import Literal, cast
from typing import Any, Literal, cast
import anthropic
import httpx
@@ -28,6 +28,7 @@ from langchain_core.messages import (
from langchain_core.outputs import ChatGeneration, LLMResult
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.tools import tool
from langchain_tests.utils.stream_lifecycle import assert_valid_event_stream
from pydantic import BaseModel, Field
from typing_extensions import TypedDict
@@ -902,8 +903,17 @@ def test_agent_loop(output_version: Literal["v0", "v1"]) -> None:
@pytest.mark.default_cassette("test_agent_loop_streaming.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["v0", "v1"])
def test_agent_loop_streaming(output_version: Literal["v0", "v1"]) -> None:
@pytest.mark.parametrize(
("output_version", "use_v2_stream"),
[
("v0", False),
("v1", False),
("v1", True),
],
)
def test_agent_loop_streaming(
output_version: Literal["v0", "v1"], *, use_v2_stream: bool
) -> None:
@tool
def get_weather(location: str) -> str:
"""Get the weather for a location."""
@@ -916,7 +926,10 @@ def test_agent_loop_streaming(output_version: Literal["v0", "v1"]) -> None:
)
llm_with_tools = llm.bind_tools([get_weather])
input_message = HumanMessage("What is the weather in San Francisco, CA?")
tool_call_message = llm_with_tools.invoke([input_message])
if use_v2_stream:
tool_call_message = llm_with_tools.stream_v2([input_message]).output
else:
tool_call_message = llm_with_tools.invoke([input_message])
assert isinstance(tool_call_message, AIMessage)
tool_calls = tool_call_message.tool_calls
@@ -924,20 +937,68 @@ def test_agent_loop_streaming(output_version: Literal["v0", "v1"]) -> None:
tool_call = tool_calls[0]
tool_message = get_weather.invoke(tool_call)
assert isinstance(tool_message, ToolMessage)
response = llm_with_tools.invoke(
[
input_message,
tool_call_message,
tool_message,
]
if use_v2_stream:
response = llm_with_tools.stream_v2(
[input_message, tool_call_message, tool_message]
).output
else:
response = llm_with_tools.invoke(
[
input_message,
tool_call_message,
tool_message,
]
)
assert isinstance(response, AIMessage)
@pytest.mark.default_cassette("test_agent_loop_streaming.yaml.gz")
@pytest.mark.vcr
async def test_agent_loop_streaming_astream_v2_v1() -> None:
"""Async multi-turn through `astream_v2`.
Mirrors `test_agent_loop_streaming` for `output_version="v1"` but
exercises `AsyncChatModelStream` end-to-end.
"""
@tool
def get_weather(location: str) -> str:
"""Get the weather for a location."""
return "It's sunny."
llm = ChatAnthropic(
model=MODEL_NAME,
streaming=True,
output_version="v1", # type: ignore[call-arg]
)
llm_with_tools = llm.bind_tools([get_weather])
input_message = HumanMessage("What is the weather in San Francisco, CA?")
tool_call_message = await (await llm_with_tools.astream_v2([input_message]))
assert isinstance(tool_call_message, AIMessage)
tool_calls = tool_call_message.tool_calls
assert len(tool_calls) == 1
tool_call = tool_calls[0]
tool_message = get_weather.invoke(tool_call)
assert isinstance(tool_message, ToolMessage)
response = await (
await llm_with_tools.astream_v2(
[input_message, tool_call_message, tool_message]
)
)
assert isinstance(response, AIMessage)
@pytest.mark.default_cassette("test_citations.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["v0", "v1"])
def test_citations(output_version: Literal["v0", "v1"]) -> None:
@pytest.mark.parametrize(
("output_version", "use_v2_stream"),
[
("v0", False),
("v1", False),
("v1", True),
],
)
def test_citations(output_version: Literal["v0", "v1"], *, use_v2_stream: bool) -> None:
llm = ChatAnthropic(model=MODEL_NAME, output_version=output_version) # type: ignore[call-arg]
messages = [
{
@@ -967,10 +1028,19 @@ def test_citations(output_version: Literal["v0", "v1"]) -> None:
assert any("citations" in block for block in response.content)
# Test streaming
full: BaseMessageChunk | None = None
for chunk in llm.stream(messages):
full = cast("BaseMessageChunk", chunk) if full is None else full + chunk
assert isinstance(full, AIMessageChunk)
full: BaseMessage
if use_v2_stream:
full = llm.stream_v2(messages).output
else:
aggregated: BaseMessageChunk | None = None
for chunk in llm.stream(messages):
aggregated = (
cast("BaseMessageChunk", chunk)
if aggregated is None
else aggregated + chunk
)
assert isinstance(aggregated, AIMessageChunk)
full = aggregated
assert isinstance(full.content, list)
assert not any("citation" in block for block in full.content)
if output_version == "v1":
@@ -1029,7 +1099,8 @@ def test_thinking() -> None:
@pytest.mark.default_cassette("test_thinking.yaml.gz")
@pytest.mark.vcr
def test_thinking_v1() -> None:
@pytest.mark.parametrize("use_v2_stream", [False, True])
def test_thinking_v1(*, use_v2_stream: bool) -> None:
llm = ChatAnthropic(
model="claude-sonnet-4-5-20250929", # type: ignore[call-arg]
max_tokens=5_000, # type: ignore[call-arg]
@@ -1051,10 +1122,19 @@ def test_thinking_v1() -> None:
assert isinstance(signature, str)
# Test streaming
full: BaseMessageChunk | None = None
for chunk in llm.stream([input_message]):
full = cast(BaseMessageChunk, chunk) if full is None else full + chunk
assert isinstance(full, AIMessageChunk)
full: BaseMessage
if use_v2_stream:
full = llm.stream_v2([input_message]).output
else:
aggregated: BaseMessageChunk | None = None
for chunk in llm.stream([input_message]):
aggregated = (
cast(BaseMessageChunk, chunk)
if aggregated is None
else aggregated + chunk
)
assert isinstance(aggregated, AIMessageChunk)
full = aggregated
assert isinstance(full.content, list)
assert any("reasoning" in block for block in full.content)
for block in full.content:
@@ -2516,3 +2596,96 @@ def test_compaction_streaming() -> None:
third_response = llm.invoke(messages)
content_blocks = third_response.content_blocks
assert [block["type"] for block in content_blocks] == ["text"]
class _Person(BaseModel):
"""A person with a name and age."""
name: str = Field(description="The person's name")
age: int = Field(description="The person's age in years")
def _stable_blocks(blocks: Any) -> list[dict[str, Any]]:
"""Drop fields that vary between API calls so blocks can be compared.
Tool-call ids, wire indices, and provider extras are not path- or call-
stable; strip them so the comparison targets the semantic content.
"""
volatile = {"id", "index", "extras"}
return [{k: v for k, v in b.items() if k not in volatile} for b in blocks]
@pytest.mark.default_cassette("test_streaming_tool_call_v1_v2_parity.yaml.gz")
@pytest.mark.vcr
def test_streaming_tool_call_v1_v2_parity() -> None:
"""`AIMessage` parity between `stream()` reduction and `stream_v2().output`.
Runs the same forced-tool-call prompt through both the legacy chunk
stream (reduced with `AIMessageChunk.__add__`) and the `stream_v2`
bridge path on a `v1`-output `ChatAnthropic`, then compares the
resulting messages on path-independent invariants:
- tool call name and args (ids vary between calls and are ignored)
- exactly one tool call, no invalid tool calls
- `content_blocks` (the v1 projection, stripped of volatile fields)
- a valid tool-use `finish_reason`
The v2 path is additionally validated against the full protocol
lifecycle via `assert_valid_event_stream`.
"""
llm = ChatAnthropic(
model=MODEL_NAME,
output_version="v1", # type: ignore[call-arg]
)
with_tool = llm.bind_tools(
[_Person],
tool_choice={"type": "tool", "name": "_Person"},
)
prompt = "Extract: Erick is 27 years old."
v1_full: AIMessageChunk | None = None
for chunk in with_tool.stream(prompt):
assert isinstance(chunk, AIMessageChunk)
v1_full = chunk if v1_full is None else v1_full + chunk
assert isinstance(v1_full, AIMessageChunk)
stream = with_tool.stream_v2(prompt)
events = list(stream)
assert_valid_event_stream(events)
v2_message = stream.output
assert isinstance(v2_message, AIMessage)
assert len(v1_full.tool_calls) == len(v2_message.tool_calls) == 1
assert not v1_full.invalid_tool_calls
assert not v2_message.invalid_tool_calls
v1_tc = v1_full.tool_calls[0]
v2_tc = v2_message.tool_calls[0]
assert v1_tc["name"] == v2_tc["name"] == "_Person"
assert v1_tc["args"] == v2_tc["args"] == {"name": "Erick", "age": 27}
v1_blocks = _stable_blocks(v1_full.content_blocks)
v2_blocks = _stable_blocks(v2_message.content_blocks)
assert v1_blocks == v2_blocks
assert v1_blocks == [
{
"type": "tool_call",
"name": "_Person",
"args": {"name": "Erick", "age": 27},
}
]
# The compat bridge passes the provider's raw terminal reason through
# unchanged — Anthropic surfaces it under `stop_reason` on both paths.
# Accept either key on both sides rather than asserting a specific
# normalization that the bridge does not perform.
v1_finish = v1_full.response_metadata.get(
"finish_reason"
) or v1_full.response_metadata.get("stop_reason")
v2_finish = v2_message.response_metadata.get(
"finish_reason"
) or v2_message.response_metadata.get("stop_reason")
assert v1_finish is not None
assert v2_finish is not None
assert any(k in v1_finish for k in ("tool_use", "tool_calls", "stop"))
assert any(k in v2_finish for k in ("tool_use", "tool_calls", "stop"))

View File

@@ -2843,3 +2843,161 @@ def test_no_task_budget_no_beta() -> None:
betas = payload.get("betas")
if betas:
assert "task-budgets-2026-03-13" not in betas
def test_anthropic_stream_v2_lifecycle() -> None:
"""Validate lifecycle events across a thinking + text + tool_use stream.
Anthropic emits raw `content_block_start` / `content_block_delta` /
`content_block_stop` events with integer `index` fields, interleaved
with `message_start` and `message_delta`. This test threads a
realistic event sequence through `_stream` via a mocked raw client
and asserts that `stream_v2` produces a spec-conformant event
stream: paired start/finish per block, no interleaving, sequential
`uint` wire indices.
"""
from unittest.mock import patch
from anthropic.types import (
InputJSONDelta,
RawContentBlockDeltaEvent,
RawContentBlockStartEvent,
RawContentBlockStopEvent,
RawMessageDeltaEvent,
RawMessageStartEvent,
RawMessageStopEvent,
TextDelta,
ThinkingBlock,
ThinkingDelta,
ToolUseBlock,
)
from anthropic.types.raw_message_delta_event import Delta as RawMessageDelta
from anthropic.types.raw_message_delta_event import (
MessageDeltaUsage as RawMessageDeltaUsage,
)
from langchain_tests.utils.stream_lifecycle import assert_valid_event_stream
msg = Message(
id="msg_1",
content=[],
model=MODEL_NAME,
role="assistant",
stop_reason=None,
stop_sequence=None,
usage=Usage(input_tokens=10, output_tokens=0),
type="message",
)
events = [
RawMessageStartEvent(message=msg, type="message_start"),
# thinking block (index=0)
RawContentBlockStartEvent(
content_block=ThinkingBlock(signature="", thinking="", type="thinking"),
index=0,
type="content_block_start",
),
RawContentBlockDeltaEvent(
delta=ThinkingDelta(thinking="Let me ", type="thinking_delta"),
index=0,
type="content_block_delta",
),
RawContentBlockDeltaEvent(
delta=ThinkingDelta(thinking="think.", type="thinking_delta"),
index=0,
type="content_block_delta",
),
RawContentBlockStopEvent(index=0, type="content_block_stop"),
# text block (index=1)
RawContentBlockStartEvent(
content_block=TextBlock(text="", type="text"),
index=1,
type="content_block_start",
),
RawContentBlockDeltaEvent(
delta=TextDelta(text="The answer ", type="text_delta"),
index=1,
type="content_block_delta",
),
RawContentBlockDeltaEvent(
delta=TextDelta(text="is 42.", type="text_delta"),
index=1,
type="content_block_delta",
),
RawContentBlockStopEvent(index=1, type="content_block_stop"),
# tool_use block (index=2)
RawContentBlockStartEvent(
content_block=ToolUseBlock(
id="toolu_1",
input={},
name="search",
type="tool_use",
),
index=2,
type="content_block_start",
),
RawContentBlockDeltaEvent(
delta=InputJSONDelta(partial_json='{"q":', type="input_json_delta"),
index=2,
type="content_block_delta",
),
RawContentBlockDeltaEvent(
delta=InputJSONDelta(partial_json=' "weather"}', type="input_json_delta"),
index=2,
type="content_block_delta",
),
RawContentBlockStopEvent(index=2, type="content_block_stop"),
# message_delta with final usage and stop_reason
RawMessageDeltaEvent(
delta=RawMessageDelta(stop_reason="tool_use", stop_sequence=None),
type="message_delta",
usage=RawMessageDeltaUsage(
output_tokens=50,
input_tokens=10,
cache_read_input_tokens=0,
cache_creation_input_tokens=0,
),
),
RawMessageStopEvent(type="message_stop"),
]
# Enable thinking so `coerce_content_to_string=False` in `_stream`,
# which gives every content block an integer `index` field — the
# structured path the protocol bridge actually exercises. Default
# (no tools / thinking / documents) coerces text to a plain string,
# which strips indices and is a separate code path not covered here.
llm = ChatAnthropic(
model=MODEL_NAME,
thinking={"type": "enabled", "budget_tokens": 1024},
)
def mock_create(_payload: Any) -> list:
return events
with patch.object(llm, "_create", mock_create):
stream_events = list(llm.stream_v2("Test query"))
assert_valid_event_stream(stream_events)
finishes = [e for e in stream_events if e["event"] == "content-block-finish"]
types = [f["content_block"]["type"] for f in finishes]
assert types == ["reasoning", "text", "tool_call"]
wire_indices = [f["index"] for f in finishes]
assert wire_indices == [0, 1, 2]
# Content accumulation reaches content-block-finish intact.
reasoning_block = cast("dict[str, Any]", finishes[0]["content_block"])
text_block = cast("dict[str, Any]", finishes[1]["content_block"])
tool_block = cast("dict[str, Any]", finishes[2]["content_block"])
assert reasoning_block["reasoning"] == "Let me think."
assert text_block["text"] == "The answer is 42."
assert tool_block["args"] == {"q": "weather"}
assert tool_block["name"] == "search"
# message-finish carries the tool_use stop reason inside metadata
# (protocol 0.0.9 moved the finish reason off the top-level event
# and into `metadata`, where the bridge deposits the provider's raw
# `stop_reason` alongside other response metadata).
message_finish = stream_events[-1]
assert message_finish["event"] == "message-finish"
assert message_finish["metadata"]["stop_reason"] == "tool_use"

View File

@@ -1,5 +1,5 @@
version = 1
revision = 2
revision = 3
requires-python = ">=3.10.0, <4.0.0"
resolution-markers = [
"python_full_version >= '3.13' and platform_python_implementation == 'PyPy'",
@@ -660,10 +660,11 @@ typing = [
[[package]]
name = "langchain-core"
version = "1.3.0a3"
version = "1.3.1"
source = { editable = "../../core" }
dependencies = [
{ name = "jsonpatch" },
{ name = "langchain-protocol" },
{ name = "langsmith" },
{ name = "packaging" },
{ name = "pydantic" },
@@ -676,6 +677,7 @@ dependencies = [
[package.metadata]
requires-dist = [
{ name = "jsonpatch", specifier = ">=1.33.0,<2.0.0" },
{ name = "langchain-protocol", specifier = ">=0.0.10" },
{ name = "langsmith", specifier = ">=0.3.45,<1.0.0" },
{ name = "packaging", specifier = ">=23.2.0" },
{ name = "pydantic", specifier = ">=2.7.4,<3.0.0" },
@@ -718,6 +720,18 @@ typing = [
{ name = "types-requests", specifier = ">=2.28.11.5,<3.0.0.0" },
]
[[package]]
name = "langchain-protocol"
version = "0.0.10"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/bf/c3/0d3911d3274f097040e92133f18a425980cd4085e72b6cd65add1f25327c/langchain_protocol-0.0.10.tar.gz", hash = "sha256:5bc530e0b350d3a15a3ab6889abb8132692a2c8a15eed536bce46624751acaaf", size = 6528, upload-time = "2026-04-23T17:31:34.212Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f8/11/6c89bc86b5494cfe29ee23420c398406cc147a09b5cf756e323070e358d7/langchain_protocol-0.0.10-py3-none-any.whl", hash = "sha256:040bb2ae966a06ffcd0051a1d1ca7e4926f12e951e83b07440cb80e0e8e12268", size = 6677, upload-time = "2026-04-23T17:31:33.367Z" },
]
[[package]]
name = "langchain-tests"
version = "1.1.6"