mirror of
https://github.com/hwchase17/langchain.git
synced 2026-06-09 10:17:00 +00:00
feat(core): add content-block-centric streaming (v2) (#36834)
This commit is contained in:
Binary file not shown.
@@ -6,7 +6,7 @@ import asyncio
|
||||
import json
|
||||
import os
|
||||
from base64 import b64encode
|
||||
from typing import Literal, cast
|
||||
from typing import Any, Literal, cast
|
||||
|
||||
import anthropic
|
||||
import httpx
|
||||
@@ -28,6 +28,7 @@ from langchain_core.messages import (
|
||||
from langchain_core.outputs import ChatGeneration, LLMResult
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.tools import tool
|
||||
from langchain_tests.utils.stream_lifecycle import assert_valid_event_stream
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
@@ -902,8 +903,17 @@ def test_agent_loop(output_version: Literal["v0", "v1"]) -> None:
|
||||
|
||||
@pytest.mark.default_cassette("test_agent_loop_streaming.yaml.gz")
|
||||
@pytest.mark.vcr
|
||||
@pytest.mark.parametrize("output_version", ["v0", "v1"])
|
||||
def test_agent_loop_streaming(output_version: Literal["v0", "v1"]) -> None:
|
||||
@pytest.mark.parametrize(
|
||||
("output_version", "use_v2_stream"),
|
||||
[
|
||||
("v0", False),
|
||||
("v1", False),
|
||||
("v1", True),
|
||||
],
|
||||
)
|
||||
def test_agent_loop_streaming(
|
||||
output_version: Literal["v0", "v1"], *, use_v2_stream: bool
|
||||
) -> None:
|
||||
@tool
|
||||
def get_weather(location: str) -> str:
|
||||
"""Get the weather for a location."""
|
||||
@@ -916,7 +926,10 @@ def test_agent_loop_streaming(output_version: Literal["v0", "v1"]) -> None:
|
||||
)
|
||||
llm_with_tools = llm.bind_tools([get_weather])
|
||||
input_message = HumanMessage("What is the weather in San Francisco, CA?")
|
||||
tool_call_message = llm_with_tools.invoke([input_message])
|
||||
if use_v2_stream:
|
||||
tool_call_message = llm_with_tools.stream_v2([input_message]).output
|
||||
else:
|
||||
tool_call_message = llm_with_tools.invoke([input_message])
|
||||
assert isinstance(tool_call_message, AIMessage)
|
||||
|
||||
tool_calls = tool_call_message.tool_calls
|
||||
@@ -924,20 +937,68 @@ def test_agent_loop_streaming(output_version: Literal["v0", "v1"]) -> None:
|
||||
tool_call = tool_calls[0]
|
||||
tool_message = get_weather.invoke(tool_call)
|
||||
assert isinstance(tool_message, ToolMessage)
|
||||
response = llm_with_tools.invoke(
|
||||
[
|
||||
input_message,
|
||||
tool_call_message,
|
||||
tool_message,
|
||||
]
|
||||
if use_v2_stream:
|
||||
response = llm_with_tools.stream_v2(
|
||||
[input_message, tool_call_message, tool_message]
|
||||
).output
|
||||
else:
|
||||
response = llm_with_tools.invoke(
|
||||
[
|
||||
input_message,
|
||||
tool_call_message,
|
||||
tool_message,
|
||||
]
|
||||
)
|
||||
assert isinstance(response, AIMessage)
|
||||
|
||||
|
||||
@pytest.mark.default_cassette("test_agent_loop_streaming.yaml.gz")
|
||||
@pytest.mark.vcr
|
||||
async def test_agent_loop_streaming_astream_v2_v1() -> None:
|
||||
"""Async multi-turn through `astream_v2`.
|
||||
|
||||
Mirrors `test_agent_loop_streaming` for `output_version="v1"` but
|
||||
exercises `AsyncChatModelStream` end-to-end.
|
||||
"""
|
||||
|
||||
@tool
|
||||
def get_weather(location: str) -> str:
|
||||
"""Get the weather for a location."""
|
||||
return "It's sunny."
|
||||
|
||||
llm = ChatAnthropic(
|
||||
model=MODEL_NAME,
|
||||
streaming=True,
|
||||
output_version="v1", # type: ignore[call-arg]
|
||||
)
|
||||
llm_with_tools = llm.bind_tools([get_weather])
|
||||
input_message = HumanMessage("What is the weather in San Francisco, CA?")
|
||||
tool_call_message = await (await llm_with_tools.astream_v2([input_message]))
|
||||
assert isinstance(tool_call_message, AIMessage)
|
||||
tool_calls = tool_call_message.tool_calls
|
||||
assert len(tool_calls) == 1
|
||||
tool_call = tool_calls[0]
|
||||
tool_message = get_weather.invoke(tool_call)
|
||||
assert isinstance(tool_message, ToolMessage)
|
||||
response = await (
|
||||
await llm_with_tools.astream_v2(
|
||||
[input_message, tool_call_message, tool_message]
|
||||
)
|
||||
)
|
||||
assert isinstance(response, AIMessage)
|
||||
|
||||
|
||||
@pytest.mark.default_cassette("test_citations.yaml.gz")
|
||||
@pytest.mark.vcr
|
||||
@pytest.mark.parametrize("output_version", ["v0", "v1"])
|
||||
def test_citations(output_version: Literal["v0", "v1"]) -> None:
|
||||
@pytest.mark.parametrize(
|
||||
("output_version", "use_v2_stream"),
|
||||
[
|
||||
("v0", False),
|
||||
("v1", False),
|
||||
("v1", True),
|
||||
],
|
||||
)
|
||||
def test_citations(output_version: Literal["v0", "v1"], *, use_v2_stream: bool) -> None:
|
||||
llm = ChatAnthropic(model=MODEL_NAME, output_version=output_version) # type: ignore[call-arg]
|
||||
messages = [
|
||||
{
|
||||
@@ -967,10 +1028,19 @@ def test_citations(output_version: Literal["v0", "v1"]) -> None:
|
||||
assert any("citations" in block for block in response.content)
|
||||
|
||||
# Test streaming
|
||||
full: BaseMessageChunk | None = None
|
||||
for chunk in llm.stream(messages):
|
||||
full = cast("BaseMessageChunk", chunk) if full is None else full + chunk
|
||||
assert isinstance(full, AIMessageChunk)
|
||||
full: BaseMessage
|
||||
if use_v2_stream:
|
||||
full = llm.stream_v2(messages).output
|
||||
else:
|
||||
aggregated: BaseMessageChunk | None = None
|
||||
for chunk in llm.stream(messages):
|
||||
aggregated = (
|
||||
cast("BaseMessageChunk", chunk)
|
||||
if aggregated is None
|
||||
else aggregated + chunk
|
||||
)
|
||||
assert isinstance(aggregated, AIMessageChunk)
|
||||
full = aggregated
|
||||
assert isinstance(full.content, list)
|
||||
assert not any("citation" in block for block in full.content)
|
||||
if output_version == "v1":
|
||||
@@ -1029,7 +1099,8 @@ def test_thinking() -> None:
|
||||
|
||||
@pytest.mark.default_cassette("test_thinking.yaml.gz")
|
||||
@pytest.mark.vcr
|
||||
def test_thinking_v1() -> None:
|
||||
@pytest.mark.parametrize("use_v2_stream", [False, True])
|
||||
def test_thinking_v1(*, use_v2_stream: bool) -> None:
|
||||
llm = ChatAnthropic(
|
||||
model="claude-sonnet-4-5-20250929", # type: ignore[call-arg]
|
||||
max_tokens=5_000, # type: ignore[call-arg]
|
||||
@@ -1051,10 +1122,19 @@ def test_thinking_v1() -> None:
|
||||
assert isinstance(signature, str)
|
||||
|
||||
# Test streaming
|
||||
full: BaseMessageChunk | None = None
|
||||
for chunk in llm.stream([input_message]):
|
||||
full = cast(BaseMessageChunk, chunk) if full is None else full + chunk
|
||||
assert isinstance(full, AIMessageChunk)
|
||||
full: BaseMessage
|
||||
if use_v2_stream:
|
||||
full = llm.stream_v2([input_message]).output
|
||||
else:
|
||||
aggregated: BaseMessageChunk | None = None
|
||||
for chunk in llm.stream([input_message]):
|
||||
aggregated = (
|
||||
cast(BaseMessageChunk, chunk)
|
||||
if aggregated is None
|
||||
else aggregated + chunk
|
||||
)
|
||||
assert isinstance(aggregated, AIMessageChunk)
|
||||
full = aggregated
|
||||
assert isinstance(full.content, list)
|
||||
assert any("reasoning" in block for block in full.content)
|
||||
for block in full.content:
|
||||
@@ -2516,3 +2596,96 @@ def test_compaction_streaming() -> None:
|
||||
third_response = llm.invoke(messages)
|
||||
content_blocks = third_response.content_blocks
|
||||
assert [block["type"] for block in content_blocks] == ["text"]
|
||||
|
||||
|
||||
class _Person(BaseModel):
|
||||
"""A person with a name and age."""
|
||||
|
||||
name: str = Field(description="The person's name")
|
||||
age: int = Field(description="The person's age in years")
|
||||
|
||||
|
||||
def _stable_blocks(blocks: Any) -> list[dict[str, Any]]:
|
||||
"""Drop fields that vary between API calls so blocks can be compared.
|
||||
|
||||
Tool-call ids, wire indices, and provider extras are not path- or call-
|
||||
stable; strip them so the comparison targets the semantic content.
|
||||
"""
|
||||
volatile = {"id", "index", "extras"}
|
||||
return [{k: v for k, v in b.items() if k not in volatile} for b in blocks]
|
||||
|
||||
|
||||
@pytest.mark.default_cassette("test_streaming_tool_call_v1_v2_parity.yaml.gz")
|
||||
@pytest.mark.vcr
|
||||
def test_streaming_tool_call_v1_v2_parity() -> None:
|
||||
"""`AIMessage` parity between `stream()` reduction and `stream_v2().output`.
|
||||
|
||||
Runs the same forced-tool-call prompt through both the legacy chunk
|
||||
stream (reduced with `AIMessageChunk.__add__`) and the `stream_v2`
|
||||
bridge path on a `v1`-output `ChatAnthropic`, then compares the
|
||||
resulting messages on path-independent invariants:
|
||||
|
||||
- tool call name and args (ids vary between calls and are ignored)
|
||||
- exactly one tool call, no invalid tool calls
|
||||
- `content_blocks` (the v1 projection, stripped of volatile fields)
|
||||
- a valid tool-use `finish_reason`
|
||||
|
||||
The v2 path is additionally validated against the full protocol
|
||||
lifecycle via `assert_valid_event_stream`.
|
||||
"""
|
||||
llm = ChatAnthropic(
|
||||
model=MODEL_NAME,
|
||||
output_version="v1", # type: ignore[call-arg]
|
||||
)
|
||||
with_tool = llm.bind_tools(
|
||||
[_Person],
|
||||
tool_choice={"type": "tool", "name": "_Person"},
|
||||
)
|
||||
prompt = "Extract: Erick is 27 years old."
|
||||
|
||||
v1_full: AIMessageChunk | None = None
|
||||
for chunk in with_tool.stream(prompt):
|
||||
assert isinstance(chunk, AIMessageChunk)
|
||||
v1_full = chunk if v1_full is None else v1_full + chunk
|
||||
assert isinstance(v1_full, AIMessageChunk)
|
||||
|
||||
stream = with_tool.stream_v2(prompt)
|
||||
events = list(stream)
|
||||
assert_valid_event_stream(events)
|
||||
v2_message = stream.output
|
||||
assert isinstance(v2_message, AIMessage)
|
||||
|
||||
assert len(v1_full.tool_calls) == len(v2_message.tool_calls) == 1
|
||||
assert not v1_full.invalid_tool_calls
|
||||
assert not v2_message.invalid_tool_calls
|
||||
|
||||
v1_tc = v1_full.tool_calls[0]
|
||||
v2_tc = v2_message.tool_calls[0]
|
||||
assert v1_tc["name"] == v2_tc["name"] == "_Person"
|
||||
assert v1_tc["args"] == v2_tc["args"] == {"name": "Erick", "age": 27}
|
||||
|
||||
v1_blocks = _stable_blocks(v1_full.content_blocks)
|
||||
v2_blocks = _stable_blocks(v2_message.content_blocks)
|
||||
assert v1_blocks == v2_blocks
|
||||
assert v1_blocks == [
|
||||
{
|
||||
"type": "tool_call",
|
||||
"name": "_Person",
|
||||
"args": {"name": "Erick", "age": 27},
|
||||
}
|
||||
]
|
||||
|
||||
# The compat bridge passes the provider's raw terminal reason through
|
||||
# unchanged — Anthropic surfaces it under `stop_reason` on both paths.
|
||||
# Accept either key on both sides rather than asserting a specific
|
||||
# normalization that the bridge does not perform.
|
||||
v1_finish = v1_full.response_metadata.get(
|
||||
"finish_reason"
|
||||
) or v1_full.response_metadata.get("stop_reason")
|
||||
v2_finish = v2_message.response_metadata.get(
|
||||
"finish_reason"
|
||||
) or v2_message.response_metadata.get("stop_reason")
|
||||
assert v1_finish is not None
|
||||
assert v2_finish is not None
|
||||
assert any(k in v1_finish for k in ("tool_use", "tool_calls", "stop"))
|
||||
assert any(k in v2_finish for k in ("tool_use", "tool_calls", "stop"))
|
||||
|
||||
@@ -2843,3 +2843,161 @@ def test_no_task_budget_no_beta() -> None:
|
||||
betas = payload.get("betas")
|
||||
if betas:
|
||||
assert "task-budgets-2026-03-13" not in betas
|
||||
|
||||
|
||||
def test_anthropic_stream_v2_lifecycle() -> None:
|
||||
"""Validate lifecycle events across a thinking + text + tool_use stream.
|
||||
|
||||
Anthropic emits raw `content_block_start` / `content_block_delta` /
|
||||
`content_block_stop` events with integer `index` fields, interleaved
|
||||
with `message_start` and `message_delta`. This test threads a
|
||||
realistic event sequence through `_stream` via a mocked raw client
|
||||
and asserts that `stream_v2` produces a spec-conformant event
|
||||
stream: paired start/finish per block, no interleaving, sequential
|
||||
`uint` wire indices.
|
||||
"""
|
||||
from unittest.mock import patch
|
||||
|
||||
from anthropic.types import (
|
||||
InputJSONDelta,
|
||||
RawContentBlockDeltaEvent,
|
||||
RawContentBlockStartEvent,
|
||||
RawContentBlockStopEvent,
|
||||
RawMessageDeltaEvent,
|
||||
RawMessageStartEvent,
|
||||
RawMessageStopEvent,
|
||||
TextDelta,
|
||||
ThinkingBlock,
|
||||
ThinkingDelta,
|
||||
ToolUseBlock,
|
||||
)
|
||||
from anthropic.types.raw_message_delta_event import Delta as RawMessageDelta
|
||||
from anthropic.types.raw_message_delta_event import (
|
||||
MessageDeltaUsage as RawMessageDeltaUsage,
|
||||
)
|
||||
from langchain_tests.utils.stream_lifecycle import assert_valid_event_stream
|
||||
|
||||
msg = Message(
|
||||
id="msg_1",
|
||||
content=[],
|
||||
model=MODEL_NAME,
|
||||
role="assistant",
|
||||
stop_reason=None,
|
||||
stop_sequence=None,
|
||||
usage=Usage(input_tokens=10, output_tokens=0),
|
||||
type="message",
|
||||
)
|
||||
|
||||
events = [
|
||||
RawMessageStartEvent(message=msg, type="message_start"),
|
||||
# thinking block (index=0)
|
||||
RawContentBlockStartEvent(
|
||||
content_block=ThinkingBlock(signature="", thinking="", type="thinking"),
|
||||
index=0,
|
||||
type="content_block_start",
|
||||
),
|
||||
RawContentBlockDeltaEvent(
|
||||
delta=ThinkingDelta(thinking="Let me ", type="thinking_delta"),
|
||||
index=0,
|
||||
type="content_block_delta",
|
||||
),
|
||||
RawContentBlockDeltaEvent(
|
||||
delta=ThinkingDelta(thinking="think.", type="thinking_delta"),
|
||||
index=0,
|
||||
type="content_block_delta",
|
||||
),
|
||||
RawContentBlockStopEvent(index=0, type="content_block_stop"),
|
||||
# text block (index=1)
|
||||
RawContentBlockStartEvent(
|
||||
content_block=TextBlock(text="", type="text"),
|
||||
index=1,
|
||||
type="content_block_start",
|
||||
),
|
||||
RawContentBlockDeltaEvent(
|
||||
delta=TextDelta(text="The answer ", type="text_delta"),
|
||||
index=1,
|
||||
type="content_block_delta",
|
||||
),
|
||||
RawContentBlockDeltaEvent(
|
||||
delta=TextDelta(text="is 42.", type="text_delta"),
|
||||
index=1,
|
||||
type="content_block_delta",
|
||||
),
|
||||
RawContentBlockStopEvent(index=1, type="content_block_stop"),
|
||||
# tool_use block (index=2)
|
||||
RawContentBlockStartEvent(
|
||||
content_block=ToolUseBlock(
|
||||
id="toolu_1",
|
||||
input={},
|
||||
name="search",
|
||||
type="tool_use",
|
||||
),
|
||||
index=2,
|
||||
type="content_block_start",
|
||||
),
|
||||
RawContentBlockDeltaEvent(
|
||||
delta=InputJSONDelta(partial_json='{"q":', type="input_json_delta"),
|
||||
index=2,
|
||||
type="content_block_delta",
|
||||
),
|
||||
RawContentBlockDeltaEvent(
|
||||
delta=InputJSONDelta(partial_json=' "weather"}', type="input_json_delta"),
|
||||
index=2,
|
||||
type="content_block_delta",
|
||||
),
|
||||
RawContentBlockStopEvent(index=2, type="content_block_stop"),
|
||||
# message_delta with final usage and stop_reason
|
||||
RawMessageDeltaEvent(
|
||||
delta=RawMessageDelta(stop_reason="tool_use", stop_sequence=None),
|
||||
type="message_delta",
|
||||
usage=RawMessageDeltaUsage(
|
||||
output_tokens=50,
|
||||
input_tokens=10,
|
||||
cache_read_input_tokens=0,
|
||||
cache_creation_input_tokens=0,
|
||||
),
|
||||
),
|
||||
RawMessageStopEvent(type="message_stop"),
|
||||
]
|
||||
|
||||
# Enable thinking so `coerce_content_to_string=False` in `_stream`,
|
||||
# which gives every content block an integer `index` field — the
|
||||
# structured path the protocol bridge actually exercises. Default
|
||||
# (no tools / thinking / documents) coerces text to a plain string,
|
||||
# which strips indices and is a separate code path not covered here.
|
||||
llm = ChatAnthropic(
|
||||
model=MODEL_NAME,
|
||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||
)
|
||||
|
||||
def mock_create(_payload: Any) -> list:
|
||||
return events
|
||||
|
||||
with patch.object(llm, "_create", mock_create):
|
||||
stream_events = list(llm.stream_v2("Test query"))
|
||||
|
||||
assert_valid_event_stream(stream_events)
|
||||
|
||||
finishes = [e for e in stream_events if e["event"] == "content-block-finish"]
|
||||
types = [f["content_block"]["type"] for f in finishes]
|
||||
assert types == ["reasoning", "text", "tool_call"]
|
||||
|
||||
wire_indices = [f["index"] for f in finishes]
|
||||
assert wire_indices == [0, 1, 2]
|
||||
|
||||
# Content accumulation reaches content-block-finish intact.
|
||||
reasoning_block = cast("dict[str, Any]", finishes[0]["content_block"])
|
||||
text_block = cast("dict[str, Any]", finishes[1]["content_block"])
|
||||
tool_block = cast("dict[str, Any]", finishes[2]["content_block"])
|
||||
assert reasoning_block["reasoning"] == "Let me think."
|
||||
assert text_block["text"] == "The answer is 42."
|
||||
assert tool_block["args"] == {"q": "weather"}
|
||||
assert tool_block["name"] == "search"
|
||||
|
||||
# message-finish carries the tool_use stop reason inside metadata
|
||||
# (protocol 0.0.9 moved the finish reason off the top-level event
|
||||
# and into `metadata`, where the bridge deposits the provider's raw
|
||||
# `stop_reason` alongside other response metadata).
|
||||
message_finish = stream_events[-1]
|
||||
assert message_finish["event"] == "message-finish"
|
||||
assert message_finish["metadata"]["stop_reason"] == "tool_use"
|
||||
|
||||
18
libs/partners/anthropic/uv.lock
generated
18
libs/partners/anthropic/uv.lock
generated
@@ -1,5 +1,5 @@
|
||||
version = 1
|
||||
revision = 2
|
||||
revision = 3
|
||||
requires-python = ">=3.10.0, <4.0.0"
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.13' and platform_python_implementation == 'PyPy'",
|
||||
@@ -660,10 +660,11 @@ typing = [
|
||||
|
||||
[[package]]
|
||||
name = "langchain-core"
|
||||
version = "1.3.0a3"
|
||||
version = "1.3.1"
|
||||
source = { editable = "../../core" }
|
||||
dependencies = [
|
||||
{ name = "jsonpatch" },
|
||||
{ name = "langchain-protocol" },
|
||||
{ name = "langsmith" },
|
||||
{ name = "packaging" },
|
||||
{ name = "pydantic" },
|
||||
@@ -676,6 +677,7 @@ dependencies = [
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "jsonpatch", specifier = ">=1.33.0,<2.0.0" },
|
||||
{ name = "langchain-protocol", specifier = ">=0.0.10" },
|
||||
{ name = "langsmith", specifier = ">=0.3.45,<1.0.0" },
|
||||
{ name = "packaging", specifier = ">=23.2.0" },
|
||||
{ name = "pydantic", specifier = ">=2.7.4,<3.0.0" },
|
||||
@@ -718,6 +720,18 @@ typing = [
|
||||
{ name = "types-requests", specifier = ">=2.28.11.5,<3.0.0.0" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "langchain-protocol"
|
||||
version = "0.0.10"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/bf/c3/0d3911d3274f097040e92133f18a425980cd4085e72b6cd65add1f25327c/langchain_protocol-0.0.10.tar.gz", hash = "sha256:5bc530e0b350d3a15a3ab6889abb8132692a2c8a15eed536bce46624751acaaf", size = 6528, upload-time = "2026-04-23T17:31:34.212Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/f8/11/6c89bc86b5494cfe29ee23420c398406cc147a09b5cf756e323070e358d7/langchain_protocol-0.0.10-py3-none-any.whl", hash = "sha256:040bb2ae966a06ffcd0051a1d1ca7e4926f12e951e83b07440cb80e0e8e12268", size = 6677, upload-time = "2026-04-23T17:31:33.367Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "langchain-tests"
|
||||
version = "1.1.6"
|
||||
|
||||
Reference in New Issue
Block a user