Files
langchain/libs/partners/anthropic/tests/integration_tests/test_chat_models.py

2692 lines
98 KiB
Python

"""Test ChatAnthropic chat model."""
from __future__ import annotations
import asyncio
import json
import os
from base64 import b64encode
from typing import Any, Literal, cast
import anthropic
import httpx
import pytest
import requests
from langchain.agents import create_agent
from langchain.agents.structured_output import ProviderStrategy
from langchain_core.callbacks import CallbackManager
from langchain_core.exceptions import OutputParserException
from langchain_core.messages import (
AIMessage,
AIMessageChunk,
BaseMessage,
BaseMessageChunk,
HumanMessage,
SystemMessage,
ToolMessage,
)
from langchain_core.outputs import ChatGeneration, LLMResult
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.tools import tool
from langchain_tests.utils.stream_lifecycle import assert_valid_event_stream
from pydantic import BaseModel, Field
from typing_extensions import TypedDict
from langchain_anthropic import ChatAnthropic
from langchain_anthropic._compat import _convert_from_v1_to_anthropic
from tests.unit_tests._utils import FakeCallbackHandler
MODEL_NAME = "claude-haiku-4-5-20251001"
def test_stream() -> None:
"""Test streaming tokens from Anthropic."""
llm = ChatAnthropic(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg]
full: BaseMessageChunk | None = None
chunks_with_input_token_counts = 0
chunks_with_output_token_counts = 0
chunks_with_model_name = 0
for token in llm.stream("I'm Pickle Rick"):
assert isinstance(token.content, str)
full = cast("BaseMessageChunk", token) if full is None else full + token
assert isinstance(token, AIMessageChunk)
if token.usage_metadata is not None:
if token.usage_metadata.get("input_tokens"):
chunks_with_input_token_counts += 1
if token.usage_metadata.get("output_tokens"):
chunks_with_output_token_counts += 1
chunks_with_model_name += int("model_name" in token.response_metadata)
if chunks_with_input_token_counts != 1 or chunks_with_output_token_counts != 1:
msg = (
"Expected exactly one chunk with input or output token counts. "
"AIMessageChunk aggregation adds counts. Check that "
"this is behaving properly."
)
raise AssertionError(
msg,
)
assert chunks_with_model_name == 1
# check token usage is populated
assert isinstance(full, AIMessageChunk)
assert len(full.content_blocks) == 1
assert full.content_blocks[0]["type"] == "text"
assert full.content_blocks[0]["text"]
assert full.usage_metadata is not None
assert full.usage_metadata["input_tokens"] > 0
assert full.usage_metadata["output_tokens"] > 0
assert full.usage_metadata["total_tokens"] > 0
assert (
full.usage_metadata["input_tokens"] + full.usage_metadata["output_tokens"]
== full.usage_metadata["total_tokens"]
)
assert "stop_reason" in full.response_metadata
assert "stop_sequence" in full.response_metadata
assert "model_name" in full.response_metadata
async def test_astream() -> None:
"""Test streaming tokens from Anthropic."""
llm = ChatAnthropic(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg]
full: BaseMessageChunk | None = None
chunks_with_input_token_counts = 0
chunks_with_output_token_counts = 0
async for token in llm.astream("I'm Pickle Rick"):
assert isinstance(token.content, str)
full = cast("BaseMessageChunk", token) if full is None else full + token
assert isinstance(token, AIMessageChunk)
if token.usage_metadata is not None:
if token.usage_metadata.get("input_tokens"):
chunks_with_input_token_counts += 1
if token.usage_metadata.get("output_tokens"):
chunks_with_output_token_counts += 1
if chunks_with_input_token_counts != 1 or chunks_with_output_token_counts != 1:
msg = (
"Expected exactly one chunk with input or output token counts. "
"AIMessageChunk aggregation adds counts. Check that "
"this is behaving properly."
)
raise AssertionError(
msg,
)
# check token usage is populated
assert isinstance(full, AIMessageChunk)
assert len(full.content_blocks) == 1
assert full.content_blocks[0]["type"] == "text"
assert full.content_blocks[0]["text"]
assert full.usage_metadata is not None
assert full.usage_metadata["input_tokens"] > 0
assert full.usage_metadata["output_tokens"] > 0
assert full.usage_metadata["total_tokens"] > 0
assert (
full.usage_metadata["input_tokens"] + full.usage_metadata["output_tokens"]
== full.usage_metadata["total_tokens"]
)
assert "stop_reason" in full.response_metadata
assert "stop_sequence" in full.response_metadata
# Check expected raw API output
async_client = llm._async_client
params: dict = {
"model": MODEL_NAME,
"max_tokens": 1024,
"messages": [{"role": "user", "content": "hi"}],
"temperature": 0.0,
}
stream = await async_client.messages.create(**params, stream=True)
async for event in stream:
if event.type == "message_start":
assert event.message.usage.input_tokens > 1
# Different models may report different initial output token counts
# in the message_start event. Ensure it's a positive value.
assert event.message.usage.output_tokens >= 1
elif event.type == "message_delta":
assert event.usage.output_tokens >= 1
else:
pass
async def test_stream_usage() -> None:
"""Test usage metadata can be excluded."""
model = ChatAnthropic(model_name=MODEL_NAME, stream_usage=False) # type: ignore[call-arg]
async for token in model.astream("hi"):
assert isinstance(token, AIMessageChunk)
assert token.usage_metadata is None
async def test_stream_usage_override() -> None:
# check we override with kwarg
model = ChatAnthropic(model_name=MODEL_NAME) # type: ignore[call-arg]
assert model.stream_usage
async for token in model.astream("hi", stream_usage=False):
assert isinstance(token, AIMessageChunk)
assert token.usage_metadata is None
async def test_abatch() -> None:
"""Test streaming tokens."""
llm = ChatAnthropic(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg]
result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"])
for token in result:
assert isinstance(token.content, str)
async def test_abatch_tags() -> None:
"""Test batch tokens."""
llm = ChatAnthropic(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg]
result = await llm.abatch(
["I'm Pickle Rick", "I'm not Pickle Rick"],
config={"tags": ["foo"]},
)
for token in result:
assert isinstance(token.content, str)
async def test_async_tool_use() -> None:
llm = ChatAnthropic(
model=MODEL_NAME, # type: ignore[call-arg]
)
llm_with_tools = llm.bind_tools(
[
{
"name": "get_weather",
"description": "Get weather report for a city",
"input_schema": {
"type": "object",
"properties": {"location": {"type": "string"}},
},
},
],
)
response = await llm_with_tools.ainvoke("what's the weather in san francisco, ca")
assert isinstance(response, AIMessage)
assert isinstance(response.content, list)
assert isinstance(response.tool_calls, list)
assert len(response.tool_calls) == 1
tool_call = response.tool_calls[0]
assert tool_call["name"] == "get_weather"
assert isinstance(tool_call["args"], dict)
assert "location" in tool_call["args"]
# Test streaming
first = True
chunks: list[BaseMessage | BaseMessageChunk] = []
async for chunk in llm_with_tools.astream(
"what's the weather in san francisco, ca",
):
chunks = [*chunks, chunk]
if first:
gathered = chunk
first = False
else:
gathered = gathered + chunk # type: ignore[assignment]
assert len(chunks) > 1
assert isinstance(gathered, AIMessageChunk)
assert isinstance(gathered.tool_call_chunks, list)
assert len(gathered.tool_call_chunks) == 1
tool_call_chunk = gathered.tool_call_chunks[0]
assert tool_call_chunk["name"] == "get_weather"
assert isinstance(tool_call_chunk["args"], str)
assert "location" in json.loads(tool_call_chunk["args"])
def test_batch() -> None:
"""Test batch tokens."""
llm = ChatAnthropic(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg]
result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"])
for token in result:
assert isinstance(token.content, str)
async def test_ainvoke() -> None:
"""Test invoke tokens."""
llm = ChatAnthropic(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg]
result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]})
assert isinstance(result.content, str)
assert "model_name" in result.response_metadata
def test_invoke() -> None:
"""Test invoke tokens."""
llm = ChatAnthropic(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg]
result = llm.invoke("I'm Pickle Rick", config={"tags": ["foo"]})
assert isinstance(result.content, str)
def test_system_invoke() -> None:
"""Test invoke tokens with a system message."""
llm = ChatAnthropic(model_name=MODEL_NAME) # type: ignore[call-arg, call-arg]
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"You are an expert cartographer. If asked, you are a cartographer. "
"STAY IN CHARACTER",
),
("human", "Are you a mathematician?"),
],
)
chain = prompt | llm
result = chain.invoke({})
assert isinstance(result.content, str)
def test_handle_empty_aimessage() -> None:
# Anthropic can generate empty AIMessages, which are not valid unless in the last
# message in a sequence.
llm = ChatAnthropic(model=MODEL_NAME)
messages = [
HumanMessage("Hello"),
AIMessage([]),
HumanMessage("My name is Bob."),
]
_ = llm.invoke(messages)
# Test tool call sequence
llm_with_tools = llm.bind_tools(
[
{
"name": "get_weather",
"description": "Get weather report for a city",
"input_schema": {
"type": "object",
"properties": {"location": {"type": "string"}},
},
},
],
)
_ = llm_with_tools.invoke(
[
HumanMessage("What's the weather in Boston?"),
AIMessage(
content=[],
tool_calls=[
{
"name": "get_weather",
"args": {"location": "Boston"},
"id": "toolu_01V6d6W32QGGSmQm4BT98EKk",
"type": "tool_call",
},
],
),
ToolMessage(
content="It's sunny.", tool_call_id="toolu_01V6d6W32QGGSmQm4BT98EKk"
),
AIMessage([]),
HumanMessage("Thanks!"),
]
)
def test_anthropic_call() -> None:
"""Test valid call to anthropic."""
chat = ChatAnthropic(model=MODEL_NAME) # type: ignore[call-arg]
message = HumanMessage(content="Hello")
response = chat.invoke([message])
assert isinstance(response, AIMessage)
assert isinstance(response.content, str)
def test_anthropic_generate() -> None:
"""Test generate method of anthropic."""
chat = ChatAnthropic(model=MODEL_NAME) # type: ignore[call-arg]
chat_messages: list[list[BaseMessage]] = [
[HumanMessage(content="How many toes do dogs have?")],
]
messages_copy = [messages.copy() for messages in chat_messages]
result: LLMResult = chat.generate(chat_messages)
assert isinstance(result, LLMResult)
for response in result.generations[0]:
assert isinstance(response, ChatGeneration)
assert isinstance(response.text, str)
assert response.text == response.message.content
assert chat_messages == messages_copy
def test_anthropic_streaming() -> None:
"""Test streaming tokens from anthropic."""
chat = ChatAnthropic(model=MODEL_NAME) # type: ignore[call-arg]
message = HumanMessage(content="Hello")
response = chat.stream([message])
for token in response:
assert isinstance(token, AIMessageChunk)
assert isinstance(token.content, str)
def test_anthropic_streaming_callback() -> None:
"""Test that streaming correctly invokes on_llm_new_token callback."""
callback_handler = FakeCallbackHandler()
callback_manager = CallbackManager([callback_handler])
chat = ChatAnthropic(
model=MODEL_NAME, # type: ignore[call-arg]
callbacks=callback_manager,
verbose=True,
)
message = HumanMessage(content="Write me a sentence with 10 words.")
for token in chat.stream([message]):
assert isinstance(token, AIMessageChunk)
assert isinstance(token.content, str)
assert callback_handler.llm_streams > 1
async def test_anthropic_async_streaming_callback() -> None:
"""Test that streaming correctly invokes on_llm_new_token callback."""
callback_handler = FakeCallbackHandler()
callback_manager = CallbackManager([callback_handler])
chat = ChatAnthropic(
model=MODEL_NAME, # type: ignore[call-arg]
callbacks=callback_manager,
verbose=True,
)
chat_messages: list[BaseMessage] = [
HumanMessage(content="How many toes do dogs have?"),
]
async for token in chat.astream(chat_messages):
assert isinstance(token, AIMessageChunk)
assert isinstance(token.content, str)
assert callback_handler.llm_streams > 1
def test_anthropic_multimodal() -> None:
"""Test that multimodal inputs are handled correctly."""
chat = ChatAnthropic(model=MODEL_NAME) # type: ignore[call-arg]
messages: list[BaseMessage] = [
HumanMessage(
content=[
{
"type": "image_url",
"image_url": {
# langchain logo
"url": "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAMCAggHCQgGCQgICAcICAgICAgICAYICAgHDAgHCAgICAgIBggICAgICAgICBYICAgICwkKCAgNDQoIDggICQgBAwQEBgUGCgYGCBALCg0QCg0NEA0KCg8LDQoKCgoLDgoQDQoLDQoKCg4NDQ0NDgsQDw0OCg4NDQ4NDQoJDg8OCP/AABEIALAAsAMBEQACEQEDEQH/xAAdAAEAAgEFAQAAAAAAAAAAAAAABwgJAQIEBQYD/8QANBAAAgIBAwIDBwQCAgIDAAAAAQIAAwQFERIIEwYhMQcUFyJVldQjQVGBcZEJMzJiFRYk/8QAGwEBAAMAAwEAAAAAAAAAAAAAAAQFBgEDBwL/xAA5EQACAQIDBQQJBAIBBQAAAAAAAQIDEQQhMQVBUWGREhRxgRMVIjJSU8HR8CNyobFCguEGJGKi4v/aAAwDAQACEQMRAD8ApfJplBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBANl16qOTEKB6kkAD+z5Tkcj0On+z7Ub1FlOmanejeavj6dqV6kfsQ1OK4IP8AIM6pVYR1kuqJdLCV6qvCnJ/6v66nL+Ems/RNc+y63+BOvvFL411O/wBW4r5T6D4Saz9E1z7Lrf4Ed4pfGuo9W4r5T6D4Saz9E1z7Lrf4Ed4pfGuo9W4r5T6D4Saz9E1z7Lrf4Ed4pfGuo9W4r5T6D4Saz9E1z7Lrf4Ed4pfGuo9W4r5T6D4Saz9E1z7Lrf4Ed4pfGuo9W4r5T6D4Saz9E1z7Lrf4Ed4pfGuo9W4r5T6D4Saz9E1z7Lrf4Ed4pfGuo9W4r5T6HE1D2e6lQpsu0zU6EXzZ8jTtSoUD9yWuxUAA/kmdkasJaSXVHRVwlekrzpyX+r+mh56m9WHJSGU+hUgg/wBjynaRORvnAEAQBAEAQBAEAQCbennpVzfER95LHE0tX4tlsnJr2B2srw6yQLCpBQ3Me1W+4/VZLKlh4jFRo5ay4cPH7f0XWA2XUxft37MONs34ffRcy/Xsu6bdG0UK2Nh1tkAbHMyAt+Wx2HIi11/SDcQe3jrTXv6IJRVcRUqe88uC0Nxhdn0MMv0458XnJ+e7wVlyJPJkYsTSAIAgCAIAgCAIBqDAIx9qHTbo2tBmycOtcgjYZmOBRlqdjxJtQDuhdye3ette/qhkmliKlP3XlwehXYrZ9DEr9SOfFZS6rXwd1yKCdQ3Srm+HT7yGOXpbPxXLVOLUMTtXXmVgkVliQgvU9qx9h+kz11Ne4fFRrZaS4cfD7f2YfH7LqYT279qHHevH76PlvhKTClEAQBAEAQBAJp6WOn0+I80i7mumYnF8x1LIbSSe3iV2DYq13ElnQ8q6gdijWUuIeKxHoY5e89PuXWy8D3qp7S9iOvN/D9+XiZRNN06uiuvHqrSqmpFrqqrVUrrrUBUREUBVVVAAUAAATNNtu7PR4xUUoxVkskloktxyCZwfRj26jetHPtzrMXSM4Uabj7Vrfj10O2ZdsDbb3bqrCKEYmpeyED8Hs53LZVwvsPg4qN6kbt+OS8t5hdobYqOo44edorK6SzfmtFpz14H16f8Arkz6cmrD1e9crBvsFZy3ropvxC2yo7NTXXXbjhtuXcTmisz91hX2yr4KLjemrNbuPXeMDtuoqihiGnF/5ZJx55ZNceF76GQSUJuhAEAQBAEAhb239WWl+H391s7mXnbAnExu2WqUjdWyLHda6Qw2IXdrCCGFZX5pMo4WdXNZLiyoxm1KOFfZl7UuCtdeN2kvzcRB4d/5JMV7OOVpWRRSWAFmPk1ZTKN9uT1PRi+QHnsj2H12DHYGXLZzS9mV3zVvuVFL/qGDlapSaXFST6qyfS/3tb4M8a4up49WoYlyZGLcCUsTf1B2ZGVgHrsRgVNbqrIwIYAjaVc4Sg+zJWZqaVWFWCnB3T0/PodnqOnV312Y9taW02o1dtViq9dlbAq6OjAqyspIKkEEGfKbTuj7lFSTjJXTyaejXAxd9U/T6fDmYBTzbTMvm+G7FnNRBHcxLLDuWankCrueVlRG5dq7nOlwuI9NHP3lr9zzjamA7rU9n3Jacn8P25eBC0mFKIAgCAIBtdwASfQDc/4nIbsZXulr2ZDR9HwsYpxybqxmZe4Xl71cquyMR69hO3jg+fy0r5n1OWxNX0lRvdovBflz1DZuG7vh4xtZtXl+55vpp5EsyKWZ5X2seH783TdRwsZgmVk4OVRQzMUUXPRYle7gEoCxA5gEqDvsdp2U5KM03omv7I+Ig6lKUIuzaaXmigPtb6HNQ0bEytTGXjZeLiKlhWuu6rINPMLbY1bFqkXHQ908b7CyK+wUqFe+pY2FSSjZpvnl+MwmJ2JVw9OVTtqUYq+Sadt+WaVtd9+W+uLLv5HzB8j/AIlgZ8yRdGfUXXq2JXpGTZtquFUE+cnfMxU2Wu9CzEvaicEsG+/MdzYLbsmexmHdOXaS9l/w+H2PQ9kY9V6apyftxVtdUtJc3x58iykrjQCAIAgFdurzqbPh+lMHFKHVspC6FuLLh427Icp0O4d2ZWREb5WZLGbktJrssMJhvSu8vdX8vh9zP7X2i8LBRp27b46Rj8Vt73JebyVnCfSz0jNqh/8AsGsrZZRcxuoxrms7ua7HmcvLYkOaXJ5Ctjvkb8n/AE+K3TcVi+x+nS6rdyX33eJTbL2S636+JTaeaTveTf8AlLlwjv35ZFmfHnSnoWo47Yo0/FxLOBWnJw8ejHuobb5GVqkUOqnY9qwOjDyI9CKyGKqwd+03ybdjS19mYarHs+jSe5pJNdP6KudBPiTIwNYz/D1jA1WJk91AWKLqGJctDWVg+QFlfdQtsGcVY+//AFgSzx0VKmqi5dJK/wCeZm9iVJ0sRPDye6WWdu1BpXWeV78M8uGd/wCURuCJuqX2YjWNHzMYJyyaKzmYm3Hl71SrOqKW8h307mOT5fLc3mPUSsNV9HUT3aPwf5crNpYbvGHlG2azj+5Zrrp5mKFHBAI9CNx/iak8vTubpwBAEAQDtPCekLk5WHiON0yczFx3H8pbkVVMP7VyJ8zfZi3wTfRHdRh26kI8ZRXk5IzREf6mPPXTSAIB1/iPQa8yjIwrVD05NFuPYrAFWrsrat1YHyIKsRsf2nMXZpo+ZR7UXF77rqYW2xHrJqsHG2smu1T6rapKWKf8OCP6mxvfNHj1nH2XqsnfW6yOVpGr241teVRY9ORS4sqtrPF67B6Mp/2NiCGBIIYMQeGlJWaujsp1JU5KcHZrQyZdK/U3X4ipONdwq1fGQNkVL5JkVbhfe8cE/wDgWKq1e5NFjKD8ttLPm8ThnSd17r0+35qej7N2hHFQs8prVfVcv6J4kIuBAKtdWnV8uj89I090fVeP/wCi8hXq05CvIcg26PmMpDCpgVqUrZaCGqrussLhPSe3P3f7/wCOf4s9tTaXd16On77/APXn48EU58OYl+RremrrRyHbJzdPbI9+LvZZjW21vUlgs5FMe4OqmshVrrscca9jtcSaVKXotydrcVr58zH04znioLFXd3G/a17L08E3u5vJEveGeobX/Cuq2YmttbbjX3NflUu7ZC1VW2OTlaZZuzDHrIbbGXZOFbV9qmwfLElh6Venelqsl4rc+fP6FtT2hicHiHDEu8W7u+ii8lKObtHL3fH/AC1tn1AdReJ4exVvJW/MyEJwcVWG9x2G1zkb8MVNwTbt83kqhmYCVVDDyqytot7/ADeanG46GFh2nm37q4/8c/qVr/4/fZ9k5Obm+J7+Xa430V2soVcrNuuW3LtT+RQUNZKjj3L2QHlRYqWOPqJRVJcvJJWRnth4epKpLE1FqnZ8XJ3b8MuG/LQvdKQ2ZqB/qAYXfFmkLjZWZiINkxszKx0H8JVkW1KP6VAJsIPtRT4pPqjyKtDsVJx4SkvJSdjq59HSIAgCAdp4T1dcbKw8tzsmNmYuQ5/hKsiq1j/SoTPma7UWuKa6o7qM+xUhLhKL8lJXM0RP+pjz100gCAIBjA6x/Y9ZpGq35KofcdSssy8ewA8Vvcl8rHJ3OzrazXAeQNVq8d+3Zx0mDrKpTS3rLy3P6HnG18I6FdzS9mWa/c9V9fPkQTJxRnf+AfHeRpOXj6pjHa/GsDhd+K2p6W0WHY/p31lqidiVDchsyqR8VIKpFxlo/wAv5EjD15UKiqw1X8revMy++DfFtOo4uNqNDcsfKprvrJ8iFZQeLD1Dod0KnzVlI/aZKcXCTi9UerUqkasFOLumk14M8T1L+0uzRdHzdRp8skKlGO2wPC+6xKUt2PkezzN3E7g8NtjvO7D01UqKL03+CzIe0MQ8Ph5VI66Lxbsv7Ks9D3ThTqG/iXOBvSvJsGHTae4L8lWDXZ2QzMzXMt7MoWzzNyW2PzPaYWeNxDj+nDLLPw4dPsZ7Y+CVb/ua3tO7tfitZPzyS5XJS6zOlu3XAmrYSh9Rpq7N2OzKozMYF3RUZyEXIqZ325lVtVyrMOFUjYPEql7MtP6f2J+1tmvE2qU/fWWusfo1/P8AVWfbjruoWabpFGrl/wD5Wq/UOyMhO3mV6QFxaU98BCuzW5dNxW2wcraqeZawku1pQjFVJOn7uWmna1y8uhmMdUqOhSjiPfTlr73o0rXfi1k96V7nq/YP0n6lr99OdqgysfS6qqKw2QbK8rKx6kWrHxcdG2toxlrUA3lU+Q71c3ta+rpr4qFJONOzlnpom9/N8vpkTMBsyriZKeITUEla+rSyUbapLyvzeZkT0fR6saqvFprSmilFrqqrUJXXWo2VEUABVUDbYSgbbd3qbyMVFWSskcucH0ag/wCoBhd8WauuTlZmWh3TIzMrIQ/yluRbap/tXBmwguzFLgkuiPIq0+3UnLjKT8nJ2Orn0dIgCAIBtdAQQfQjY/4nIauZXulr2nDWNHw8kvyyaKxh5e/Hl71SqozsF8h307eQB5fLcvkPQZbE0vR1Gt2q8H+WPUNm4nvGHjK92spfuWT66+ZLMilmIAgHm/aL4ExtVxL9PyaVvptRtkb1WwA9uyths1dqNsRYhDKf39Z905uElKLszor0YVoOE1dP86mH7R/DORdi5OeKz2sI4iZZIKtU+Q11dPJSvl+rS1ZBIKsyDY7krrXJKSjxvbyzPKY0ZuMprSNlLim21p4rPh1t6fA9ieq34Ka1RhW5OA7XKbMcC6ypq7DU/doT9cLyBPNK7ECglmT0nW60FLsN2fPnnroSI4KvKl6aMLxz0zeTavbW3hfy3Wq/4+fbVQKbPDd9wW7vWZGnK2wW2l17l9FTehsS0W5PA/M62uV5CqzhV4+i7+kS5Px4/T8z02wcXHsvDyed24+DzaXg7u3PLLSderP2f3arombi0KXyEFWVVWBu1jU2pc1SD93sqWxAP3dlkHC1FCqm9NOuRd7ToOvhpwjrk14xadv4K7dEPU5gYOI2iZ+RXiql1l2Hk2fJjtVae5ZVbaSUrsW42WB7O2jpYqg8k+exxuGnKXbgr8eOWXmUGxtpUqdP0FV9m12m9Gm72/8AFp8dfEmb22dZmlaXjv7nk42pag4K0U49q3U1t5fqZV1LFErTfl2g4st/8VCjnZXDo4Oc37ScVvv9L/iLXG7Xo0IfpyU57kndeLa0X8vRcq59OnsAzPFWY3iTVmezBa3uMbQOWo2qdhSibcUwa+IrPEBSq9pB/wBjV2GIrxoR9HT1/r/6M/s7A1MbU7ziHeN75/5tbuUF/Oml28h0oDfCAIBE/VL7TRo+j5uSr8cm6s4eJtx5e9XKyK6hvJuwncyCPP5aW8j6GVhqXpKiW7V+C/LFZtLE93w8pXzeUf3PJdNfIxQIgAAHoBsP8TUnl6VjdOAIAgCAIBNPSx1BHw5mE3c20zL4JmIoZjUQT28uusblmp5EMiDlZUTsHaulDDxWH9NHL3lp9i62Xj+61Pa9yWvJ/F9+XgZRNN1Ku+uvIqsS2m1FsqtrZXrsrYBkdHUlWVlIIYEggzNNNOzPR4yUkpRd081bRp7zkTg+jUQCH9Q8FeJjnNdVrmImmPx/QfTKXuqAVOXa2ZeTO5tAe29hWq1bpeS8lKdLs2cH2v3Zfn5kVjpYr0t1VXY4djNaaZ+OumWpGh9j2vaVi6pp+NVpep4+ouxQXY9ZzMnKybbGy8rVbNsHENdKMdiot2Raa0pbtjud/pac5RlK6a4PJJaJasivD4inCcIdmSle11m3JttyeStn/RJ/sG8A6no2LgaTaultiY+MwuuxmzUyDlFue4rek1XGxmd3yWspLvuwoTnskevONSTkr58bafm7dxJuDpVaNONOXZsln2b6+evjv4I6jVejTRLMp9TqTLw8xrRkV24eVZT7vkcuZtorKvUjM25KMj1+Z2RdzOxYuoo9l2a5rVcOJGnsnDubqxTjLVOMmrPilnG/k1yJxrXYAbkkADkdtyf5OwA3Pr5AD+APSQi5K7e1zod0nVrnzanu07KtZnuOMK3x7rWO7WPjuNlsY7sWoenmzMzB2YtLCljZ012XmuevUoMVsWhXk5puEnra1m+Nnl0tffmeY8Df8dum49iXZmZkZ4Q79gImJjv/AALQj23Mv/qt6BvRuQJU9lTaE5K0Vb+X9iNQ2BRg71JOfKyUemb/AJ/gtXhYSVIlNaLXVWqpXWiqqIigBURVACqoAAUAAASrbvmzTpJKy0PtByIBx9R1KuiuzItsSqmpGsttsZUrrrUFnd3YhVVVBJYkAATlJt2R8ykopyk7JZtvRJbzF31T9QR8R5gNPNdMxOSYaMGQ2kkdzLsrOxVruICo45V1AbhGsuQaXC4f0Mc/eev2PONqY7vVT2fcjpzfxfbl4kLSYUogCAIAgCAIBNvTz1VZvh0+7FTl6Wz8mxGfi1DE72WYdhBFZYkuaGHasfc/os9lrQ8RhY1s9JcePj9/7LrAbUnhPYt2ocN68Pto+W+/fsv6ktG1oKuNmVrkEbnDyCKMtTsOQFTkd0LuB3KGtr39HMoquHqU/eWXFaG4wu0KGJX6cs+DykvJ6+KuuZJxEjFiaQBAEAQBAEAQBANQIBGHtR6ktG0UMuTmVtkAbjDxyt+Wx2PEGpG/SDcSO5kNTXv6uJJpYepV91ZcXoV2K2hQwy/UlnwWcn5bvF2XMoL1DdVWb4iPuwU4mlq/JcRX5NewO9dmZYABYVIDilR2q32P6rJXat7h8LGjnrLjw8Pv/Rh8ftSpi/Yt2YcL5vx+2i5kJSYUogCAIAgCAIAgCAbLqFYcWAZT6hgCD/R8pyOZ6HT/AGg6lQorp1PU6EXyVMfUdSoUD9gFpykAA/gCdUqUJaxXREuli69JWhUkv9n9Tl/FvWfreufetb/PnX3el8C6Hf6yxXzX1Hxb1n63rn3rW/z47vS+BdB6yxXzX1Hxb1n63rn3rW/z47vS+BdB6yxXzX1Hxb1n63rn3rW/z47vS+BdB6yxXzX1Hxb1n63rn3rW/wA+O70vgXQessV819R8W9Z+t65961v8+O70vgXQessV819R8W9Z+t65961v8+O70vgXQessV819R8W9Z+t65961v8+O70vgXQessV819Tiah7QdRvU13anqd6N5MmRqOpXqR+4K3ZTgg/wROyNKEdIrojoqYuvVVp1JP/Z/TU89TQqjioCgegAAA/oeU7SJzN84AgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgH/9k=", # noqa: E501
},
},
{"type": "text", "text": "What is this a logo for?"},
],
),
]
response = chat.invoke(messages)
assert isinstance(response, AIMessage)
assert isinstance(response.content, str)
num_tokens = chat.get_num_tokens_from_messages(messages)
assert num_tokens > 0
def test_streaming() -> None:
"""Test streaming tokens from Anthropic."""
callback_handler = FakeCallbackHandler()
callback_manager = CallbackManager([callback_handler])
llm = ChatAnthropic( # type: ignore[call-arg, call-arg]
model_name=MODEL_NAME,
streaming=True,
callbacks=callback_manager,
)
response = llm.generate([[HumanMessage(content="I'm Pickle Rick")]])
assert callback_handler.llm_streams > 0
assert isinstance(response, LLMResult)
async def test_astreaming() -> None:
"""Test streaming tokens from Anthropic."""
callback_handler = FakeCallbackHandler()
callback_manager = CallbackManager([callback_handler])
llm = ChatAnthropic( # type: ignore[call-arg, call-arg]
model_name=MODEL_NAME,
streaming=True,
callbacks=callback_manager,
)
response = await llm.agenerate([[HumanMessage(content="I'm Pickle Rick")]])
assert callback_handler.llm_streams > 0
assert isinstance(response, LLMResult)
def test_tool_use() -> None:
llm = ChatAnthropic(
model="claude-sonnet-4-5-20250929", # type: ignore[call-arg]
temperature=0,
)
tool_definition = {
"name": "get_weather",
"description": "Get weather report for a city",
"input_schema": {
"type": "object",
"properties": {"location": {"type": "string"}},
},
}
llm_with_tools = llm.bind_tools([tool_definition])
query = "how are you? what's the weather in san francisco, ca"
response = llm_with_tools.invoke(query)
assert isinstance(response, AIMessage)
assert isinstance(response.content, list)
assert isinstance(response.tool_calls, list)
assert len(response.tool_calls) == 1
tool_call = response.tool_calls[0]
assert tool_call["name"] == "get_weather"
assert isinstance(tool_call["args"], dict)
assert "location" in tool_call["args"]
content_blocks = response.content_blocks
assert len(content_blocks) == 2
assert content_blocks[0]["type"] == "text"
assert content_blocks[0]["text"]
assert content_blocks[1]["type"] == "tool_call"
assert content_blocks[1]["name"] == "get_weather"
assert content_blocks[1]["args"] == tool_call["args"]
# Test streaming
llm = ChatAnthropic(model="claude-sonnet-4-5-20250929") # type: ignore[call-arg]
llm_with_tools = llm.bind_tools([tool_definition])
first = True
chunks: list[BaseMessage | BaseMessageChunk] = []
for chunk in llm_with_tools.stream(query):
chunks = [*chunks, chunk]
if first:
gathered = chunk
first = False
else:
gathered = gathered + chunk # type: ignore[assignment]
for block in chunk.content_blocks:
assert block["type"] in ("text", "tool_call_chunk")
assert len(chunks) > 1
assert isinstance(gathered.content, list)
assert len(gathered.content) == 2
tool_use_block = None
for content_block in gathered.content:
assert isinstance(content_block, dict)
if content_block["type"] == "tool_use":
tool_use_block = content_block
break
assert tool_use_block is not None
assert tool_use_block["name"] == "get_weather"
assert "location" in json.loads(tool_use_block["partial_json"])
assert isinstance(gathered, AIMessageChunk)
assert isinstance(gathered.tool_calls, list)
assert len(gathered.tool_calls) == 1
tool_call = gathered.tool_calls[0]
assert tool_call["name"] == "get_weather"
assert isinstance(tool_call["args"], dict)
assert "location" in tool_call["args"]
assert tool_call["id"] is not None
content_blocks = gathered.content_blocks
assert len(content_blocks) == 2
assert content_blocks[0]["type"] == "text"
assert content_blocks[0]["text"]
assert content_blocks[1]["type"] == "tool_call"
assert content_blocks[1]["name"] == "get_weather"
assert content_blocks[1]["args"]
# Test passing response back to model
stream = llm_with_tools.stream(
[
query,
gathered,
ToolMessage(content="sunny and warm", tool_call_id=tool_call["id"]),
],
)
chunks = []
first = True
for chunk in stream:
chunks = [*chunks, chunk]
if first:
gathered = chunk
first = False
else:
gathered = gathered + chunk # type: ignore[assignment]
assert len(chunks) > 1
def test_builtin_tools_text_editor() -> None:
llm = ChatAnthropic(model="claude-sonnet-4-5-20250929") # type: ignore[call-arg]
tool = {"type": "text_editor_20250728", "name": "str_replace_based_edit_tool"}
llm_with_tools = llm.bind_tools([tool])
response = llm_with_tools.invoke(
"There's a syntax error in my primes.py file. Can you help me fix it?",
)
assert isinstance(response, AIMessage)
assert response.tool_calls
content_blocks = response.content_blocks
assert len(content_blocks) == 2
assert content_blocks[0]["type"] == "text"
assert content_blocks[0]["text"]
assert content_blocks[1]["type"] == "tool_call"
assert content_blocks[1]["name"] == "str_replace_based_edit_tool"
def test_builtin_tools_computer_use() -> None:
"""Test computer use tool integration.
Beta header should be automatically appended based on tool type.
This test only verifies tool call generation.
"""
llm = ChatAnthropic(
model="claude-sonnet-4-5-20250929", # type: ignore[call-arg]
)
tool = {
"type": "computer_20250124",
"name": "computer",
"display_width_px": 1024,
"display_height_px": 768,
"display_number": 1,
}
llm_with_tools = llm.bind_tools([tool])
response = llm_with_tools.invoke(
"Can you take a screenshot to see what's on the screen?",
)
assert isinstance(response, AIMessage)
assert response.tool_calls
content_blocks = response.content_blocks
assert len(content_blocks) >= 2
assert content_blocks[0]["type"] == "text"
assert content_blocks[0]["text"]
# Check that we have a tool_call for computer use
tool_call_blocks = [b for b in content_blocks if b["type"] == "tool_call"]
assert len(tool_call_blocks) >= 1
assert tool_call_blocks[0]["name"] == "computer"
# Verify tool call has expected action (screenshot in this case)
tool_call = response.tool_calls[0]
assert tool_call["name"] == "computer"
assert "action" in tool_call["args"]
assert tool_call["args"]["action"] == "screenshot"
class GenerateUsername(BaseModel):
"""Get a username based on someone's name and hair color."""
name: str
hair_color: str
def test_disable_parallel_tool_calling() -> None:
llm = ChatAnthropic(model=MODEL_NAME) # type: ignore[call-arg]
llm_with_tools = llm.bind_tools([GenerateUsername], parallel_tool_calls=False)
result = llm_with_tools.invoke(
"Use the GenerateUsername tool to generate user names for:\n\n"
"Sally with green hair\n"
"Bob with blue hair",
)
assert isinstance(result, AIMessage)
assert len(result.tool_calls) == 1
def test_anthropic_with_empty_text_block() -> None:
"""Anthropic SDK can return an empty text block."""
@tool
def type_letter(letter: str) -> str:
"""Type the given letter."""
return "OK"
model = ChatAnthropic(model=MODEL_NAME, temperature=0).bind_tools( # type: ignore[call-arg]
[type_letter],
)
messages = [
SystemMessage(
content="Repeat the given string using the provided tools. Do not write "
"anything else or provide any explanations. For example, "
"if the string is 'abc', you must print the "
"letters 'a', 'b', and 'c' one at a time and in that order. ",
),
HumanMessage(content="dog"),
AIMessage(
content=[
{"text": "", "type": "text"},
{
"id": "toolu_01V6d6W32QGGSmQm4BT98EKk",
"input": {"letter": "d"},
"name": "type_letter",
"type": "tool_use",
},
],
tool_calls=[
{
"name": "type_letter",
"args": {"letter": "d"},
"id": "toolu_01V6d6W32QGGSmQm4BT98EKk",
"type": "tool_call",
},
],
),
ToolMessage(content="OK", tool_call_id="toolu_01V6d6W32QGGSmQm4BT98EKk"),
]
model.invoke(messages)
def test_with_structured_output() -> None:
llm = ChatAnthropic(
model=MODEL_NAME, # type: ignore[call-arg]
)
structured_llm = llm.with_structured_output(
{
"name": "get_weather",
"description": "Get weather report for a city",
"input_schema": {
"type": "object",
"properties": {"location": {"type": "string"}},
},
},
)
response = structured_llm.invoke("what's the weather in san francisco, ca")
assert isinstance(response, dict)
assert response["location"]
class Person(BaseModel):
"""Person data."""
name: str
age: int
nicknames: list[str] | None
class PersonDict(TypedDict):
"""Person data as a TypedDict."""
name: str
age: int
nicknames: list[str] | None
@pytest.mark.parametrize("schema", [Person, Person.model_json_schema(), PersonDict])
def test_response_format(schema: dict | type) -> None:
model = ChatAnthropic(
model="claude-sonnet-4-5", # type: ignore[call-arg]
)
query = "Chester (a.k.a. Chet) is 100 years old."
response = model.invoke(query, response_format=schema)
parsed = json.loads(response.text)
if isinstance(schema, type) and issubclass(schema, BaseModel):
schema.model_validate(parsed)
else:
assert isinstance(parsed, dict)
assert parsed["name"]
assert parsed["age"]
@pytest.mark.vcr
def test_response_format_in_agent() -> None:
class Weather(BaseModel):
temperature: float
units: str
# no tools
agent = create_agent(
"anthropic:claude-sonnet-4-5", response_format=ProviderStrategy(Weather)
)
result = agent.invoke({"messages": [{"role": "user", "content": "75 degrees F."}]})
assert len(result["messages"]) == 2
parsed = json.loads(result["messages"][-1].text)
assert Weather(**parsed) == result["structured_response"]
# with tools
def get_weather(location: str) -> str:
"""Get the weather at a location."""
return "75 degrees Fahrenheit."
agent = create_agent(
"anthropic:claude-sonnet-4-5",
tools=[get_weather],
response_format=ProviderStrategy(Weather),
)
result = agent.invoke(
{"messages": [{"role": "user", "content": "What's the weather in SF?"}]},
)
assert len(result["messages"]) == 4
assert result["messages"][1].tool_calls
parsed = json.loads(result["messages"][-1].text)
assert Weather(**parsed) == result["structured_response"]
@pytest.mark.vcr
def test_strict_tool_use() -> None:
model = ChatAnthropic(
model="claude-sonnet-4-5", # type: ignore[call-arg]
)
def get_weather(location: str, unit: Literal["C", "F"]) -> str:
"""Get the weather at a location."""
return "75 degrees Fahrenheit."
model_with_tools = model.bind_tools([get_weather], strict=True)
response = model_with_tools.invoke("What's the weather in Boston, in Celsius?")
assert response.tool_calls
def test_get_num_tokens_from_messages() -> None:
llm = ChatAnthropic(model=MODEL_NAME) # type: ignore[call-arg]
# Test simple case
messages = [
SystemMessage(content="You are a scientist"),
HumanMessage(content="Hello, Claude"),
]
num_tokens = llm.get_num_tokens_from_messages(messages)
assert num_tokens > 0
# Test tool use
@tool(parse_docstring=True)
def get_weather(location: str) -> str:
"""Get the current weather in a given location.
Args:
location: The city and state, e.g. San Francisco, CA
"""
return "Sunny"
messages = [
HumanMessage(content="What's the weather like in San Francisco?"),
]
num_tokens = llm.get_num_tokens_from_messages(messages, tools=[get_weather])
assert num_tokens > 0
messages = [
HumanMessage(content="What's the weather like in San Francisco?"),
AIMessage(
content=[
{"text": "Let's see.", "type": "text"},
{
"id": "toolu_01V6d6W32QGGSmQm4BT98EKk",
"input": {"location": "SF"},
"name": "get_weather",
"type": "tool_use",
},
],
tool_calls=[
{
"name": "get_weather",
"args": {"location": "SF"},
"id": "toolu_01V6d6W32QGGSmQm4BT98EKk",
"type": "tool_call",
},
],
),
ToolMessage(content="Sunny", tool_call_id="toolu_01V6d6W32QGGSmQm4BT98EKk"),
]
num_tokens = llm.get_num_tokens_from_messages(messages, tools=[get_weather])
assert num_tokens > 0
class GetWeather(BaseModel):
"""Get the current weather in a given location."""
location: str = Field(..., description="The city and state, e.g. San Francisco, CA")
@pytest.mark.parametrize("tool_choice", ["GetWeather", "auto", "any"])
def test_anthropic_bind_tools_tool_choice(tool_choice: str) -> None:
chat_model = ChatAnthropic(
model=MODEL_NAME, # type: ignore[call-arg]
)
chat_model_with_tools = chat_model.bind_tools([GetWeather], tool_choice=tool_choice)
response = chat_model_with_tools.invoke("what's the weather in ny and la")
assert isinstance(response, AIMessage)
def test_pdf_document_input() -> None:
url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
data = b64encode(requests.get(url, timeout=10).content).decode()
result = ChatAnthropic(model=MODEL_NAME).invoke( # type: ignore[call-arg]
[
HumanMessage(
[
"summarize this document",
{
"type": "document",
"source": {
"type": "base64",
"data": data,
"media_type": "application/pdf",
},
},
],
),
],
)
assert isinstance(result, AIMessage)
assert isinstance(result.content, str)
assert len(result.content) > 0
@pytest.mark.default_cassette("test_agent_loop.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["v0", "v1"])
def test_agent_loop(output_version: Literal["v0", "v1"]) -> None:
@tool
def get_weather(location: str) -> str:
"""Get the weather for a location."""
return "It's sunny."
llm = ChatAnthropic(model=MODEL_NAME, output_version=output_version) # type: ignore[call-arg]
llm_with_tools = llm.bind_tools([get_weather])
input_message = HumanMessage("What is the weather in San Francisco, CA?")
tool_call_message = llm_with_tools.invoke([input_message])
assert isinstance(tool_call_message, AIMessage)
tool_calls = tool_call_message.tool_calls
assert len(tool_calls) == 1
tool_call = tool_calls[0]
tool_message = get_weather.invoke(tool_call)
assert isinstance(tool_message, ToolMessage)
response = llm_with_tools.invoke(
[
input_message,
tool_call_message,
tool_message,
]
)
assert isinstance(response, AIMessage)
@pytest.mark.default_cassette("test_agent_loop_streaming.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize(
("output_version", "use_v2_stream"),
[
("v0", False),
("v1", False),
("v1", True),
],
)
def test_agent_loop_streaming(
output_version: Literal["v0", "v1"], *, use_v2_stream: bool
) -> None:
@tool
def get_weather(location: str) -> str:
"""Get the weather for a location."""
return "It's sunny."
llm = ChatAnthropic(
model=MODEL_NAME,
streaming=True,
output_version=output_version, # type: ignore[call-arg]
)
llm_with_tools = llm.bind_tools([get_weather])
input_message = HumanMessage("What is the weather in San Francisco, CA?")
if use_v2_stream:
tool_call_message = llm_with_tools.stream_v2([input_message]).output
else:
tool_call_message = llm_with_tools.invoke([input_message])
assert isinstance(tool_call_message, AIMessage)
tool_calls = tool_call_message.tool_calls
assert len(tool_calls) == 1
tool_call = tool_calls[0]
tool_message = get_weather.invoke(tool_call)
assert isinstance(tool_message, ToolMessage)
if use_v2_stream:
response = llm_with_tools.stream_v2(
[input_message, tool_call_message, tool_message]
).output
else:
response = llm_with_tools.invoke(
[
input_message,
tool_call_message,
tool_message,
]
)
assert isinstance(response, AIMessage)
@pytest.mark.default_cassette("test_agent_loop_streaming.yaml.gz")
@pytest.mark.vcr
async def test_agent_loop_streaming_astream_v2_v1() -> None:
"""Async multi-turn through `astream_v2`.
Mirrors `test_agent_loop_streaming` for `output_version="v1"` but
exercises `AsyncChatModelStream` end-to-end.
"""
@tool
def get_weather(location: str) -> str:
"""Get the weather for a location."""
return "It's sunny."
llm = ChatAnthropic(
model=MODEL_NAME,
streaming=True,
output_version="v1", # type: ignore[call-arg]
)
llm_with_tools = llm.bind_tools([get_weather])
input_message = HumanMessage("What is the weather in San Francisco, CA?")
tool_call_message = await (await llm_with_tools.astream_v2([input_message]))
assert isinstance(tool_call_message, AIMessage)
tool_calls = tool_call_message.tool_calls
assert len(tool_calls) == 1
tool_call = tool_calls[0]
tool_message = get_weather.invoke(tool_call)
assert isinstance(tool_message, ToolMessage)
response = await (
await llm_with_tools.astream_v2(
[input_message, tool_call_message, tool_message]
)
)
assert isinstance(response, AIMessage)
@pytest.mark.default_cassette("test_citations.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize(
("output_version", "use_v2_stream"),
[
("v0", False),
("v1", False),
("v1", True),
],
)
def test_citations(output_version: Literal["v0", "v1"], *, use_v2_stream: bool) -> None:
llm = ChatAnthropic(model=MODEL_NAME, output_version=output_version) # type: ignore[call-arg]
messages = [
{
"role": "user",
"content": [
{
"type": "document",
"source": {
"type": "content",
"content": [
{"type": "text", "text": "The grass is green"},
{"type": "text", "text": "The sky is blue"},
],
},
"citations": {"enabled": True},
},
{"type": "text", "text": "What color is the grass and sky?"},
],
},
]
response = llm.invoke(messages)
assert isinstance(response, AIMessage)
assert isinstance(response.content, list)
if output_version == "v1":
assert any("annotations" in block for block in response.content)
else:
assert any("citations" in block for block in response.content)
# Test streaming
full: BaseMessage
if use_v2_stream:
full = llm.stream_v2(messages).output
else:
aggregated: BaseMessageChunk | None = None
for chunk in llm.stream(messages):
aggregated = (
cast("BaseMessageChunk", chunk)
if aggregated is None
else aggregated + chunk
)
assert isinstance(aggregated, AIMessageChunk)
full = aggregated
assert isinstance(full.content, list)
assert not any("citation" in block for block in full.content)
if output_version == "v1":
assert any("annotations" in block for block in full.content)
else:
assert any("citations" in block for block in full.content)
# Test pass back in
next_message = {
"role": "user",
"content": "Can you comment on the citations you just made?",
}
_ = llm.invoke([*messages, full, next_message])
@pytest.mark.vcr
def test_thinking() -> None:
llm = ChatAnthropic(
model="claude-sonnet-4-5-20250929", # type: ignore[call-arg]
max_tokens=5_000, # type: ignore[call-arg]
thinking={"type": "enabled", "budget_tokens": 2_000},
)
input_message = {"role": "user", "content": "Hello"}
response = llm.invoke([input_message])
assert any("thinking" in block for block in response.content)
for block in response.content:
assert isinstance(block, dict)
if block["type"] == "thinking":
assert set(block.keys()) == {"type", "thinking", "signature"}
assert block["thinking"]
assert isinstance(block["thinking"], str)
assert block["signature"]
assert isinstance(block["signature"], str)
# Test streaming
full: BaseMessageChunk | None = None
for chunk in llm.stream([input_message]):
full = cast("BaseMessageChunk", chunk) if full is None else full + chunk
assert isinstance(full, AIMessageChunk)
assert isinstance(full.content, list)
assert any("thinking" in block for block in full.content)
for block in full.content:
assert isinstance(block, dict)
if block["type"] == "thinking":
assert set(block.keys()) == {"type", "thinking", "signature", "index"}
assert block["thinking"]
assert isinstance(block["thinking"], str)
assert block["signature"]
assert isinstance(block["signature"], str)
# Test pass back in
next_message = {"role": "user", "content": "How are you?"}
_ = llm.invoke([input_message, full, next_message])
@pytest.mark.default_cassette("test_thinking.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("use_v2_stream", [False, True])
def test_thinking_v1(*, use_v2_stream: bool) -> None:
llm = ChatAnthropic(
model="claude-sonnet-4-5-20250929", # type: ignore[call-arg]
max_tokens=5_000, # type: ignore[call-arg]
thinking={"type": "enabled", "budget_tokens": 2_000},
output_version="v1",
)
input_message = {"role": "user", "content": "Hello"}
response = llm.invoke([input_message])
assert any("reasoning" in block for block in response.content)
for block in response.content:
assert isinstance(block, dict)
if block["type"] == "reasoning":
assert set(block.keys()) == {"type", "reasoning", "extras"}
assert block["reasoning"]
assert isinstance(block["reasoning"], str)
signature = block["extras"]["signature"]
assert signature
assert isinstance(signature, str)
# Test streaming
full: BaseMessage
if use_v2_stream:
full = llm.stream_v2([input_message]).output
else:
aggregated: BaseMessageChunk | None = None
for chunk in llm.stream([input_message]):
aggregated = (
cast(BaseMessageChunk, chunk)
if aggregated is None
else aggregated + chunk
)
assert isinstance(aggregated, AIMessageChunk)
full = aggregated
assert isinstance(full.content, list)
assert any("reasoning" in block for block in full.content)
for block in full.content:
assert isinstance(block, dict)
if block["type"] == "reasoning":
assert set(block.keys()) == {"type", "reasoning", "extras", "index"}
assert block["reasoning"]
assert isinstance(block["reasoning"], str)
signature = block["extras"]["signature"]
assert signature
assert isinstance(signature, str)
# Test pass back in
next_message = {"role": "user", "content": "How are you?"}
_ = llm.invoke([input_message, full, next_message])
@pytest.mark.default_cassette("test_redacted_thinking.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["v0", "v1"])
def test_redacted_thinking(output_version: Literal["v0", "v1"]) -> None:
llm = ChatAnthropic(
# It appears that Sonnet 4.5 either: isn't returning redacted thinking blocks,
# or the magic string is broken? Retry later once 3-7 finally removed
model="claude-3-7-sonnet-latest", # type: ignore[call-arg]
max_tokens=5_000, # type: ignore[call-arg]
thinking={"type": "enabled", "budget_tokens": 2_000},
output_version=output_version,
)
query = "ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB" # noqa: E501
input_message = {"role": "user", "content": query}
response = llm.invoke([input_message])
value = None
for block in response.content:
assert isinstance(block, dict)
if block["type"] == "redacted_thinking":
value = block
elif (
block["type"] == "non_standard"
and block["value"]["type"] == "redacted_thinking"
):
value = block["value"]
else:
pass
if value:
assert set(value.keys()) == {"type", "data"}
assert value["data"]
assert isinstance(value["data"], str)
assert value is not None
# Test streaming
full: BaseMessageChunk | None = None
for chunk in llm.stream([input_message]):
full = cast("BaseMessageChunk", chunk) if full is None else full + chunk
assert isinstance(full, AIMessageChunk)
assert isinstance(full.content, list)
value = None
for block in full.content:
assert isinstance(block, dict)
if block["type"] == "redacted_thinking":
value = block
assert set(value.keys()) == {"type", "data", "index"}
assert "index" in block
elif (
block["type"] == "non_standard"
and block["value"]["type"] == "redacted_thinking"
):
value = block["value"]
assert isinstance(value, dict)
assert set(value.keys()) == {"type", "data"}
assert "index" in block
else:
pass
if value:
assert value["data"]
assert isinstance(value["data"], str)
assert value is not None
# Test pass back in
next_message = {"role": "user", "content": "What?"}
_ = llm.invoke([input_message, full, next_message])
def test_structured_output_thinking_enabled() -> None:
llm = ChatAnthropic(
model="claude-sonnet-4-5-20250929", # type: ignore[call-arg]
max_tokens=5_000, # type: ignore[call-arg]
thinking={"type": "enabled", "budget_tokens": 2_000},
)
with pytest.warns(match="structured output"):
structured_llm = llm.with_structured_output(GenerateUsername)
query = "Generate a username for Sally with green hair"
response = structured_llm.invoke(query)
assert isinstance(response, GenerateUsername)
with pytest.raises(OutputParserException):
structured_llm.invoke("Hello")
# Test streaming
for chunk in structured_llm.stream(query):
assert isinstance(chunk, GenerateUsername)
def test_structured_output_thinking_force_tool_use() -> None:
# Structured output currently relies on forced tool use, which is not supported
# when `thinking` is enabled. When this test fails, it means that the feature
# is supported and the workarounds in `with_structured_output` should be removed.
client = anthropic.Anthropic()
with pytest.raises(anthropic.BadRequestError):
_ = client.messages.create(
model="claude-sonnet-4-5-20250929",
max_tokens=5_000,
thinking={"type": "enabled", "budget_tokens": 2_000},
tool_choice={"type": "tool", "name": "get_weather"},
tools=[
{
"name": "get_weather",
"description": "Get the weather at a location.",
"input_schema": {
"type": "object",
"properties": {
"location": {"type": "string"},
},
"required": ["location"],
},
}
],
messages=[
{
"role": "user",
"content": "What's the weather in San Francisco?",
}
],
)
def test_effort_parameter() -> None:
"""Test that effort parameter can be passed without errors.
Only Opus 4.5 supports currently.
"""
llm = ChatAnthropic(
model="claude-opus-4-5-20251101",
effort="medium",
max_tokens=100,
)
result = llm.invoke("Say hello in one sentence")
# Verify we got a response
assert isinstance(result.content, str)
assert len(result.content) > 0
# Verify response metadata is present
assert "model_name" in result.response_metadata
assert result.usage_metadata is not None
assert result.usage_metadata["input_tokens"] > 0
assert result.usage_metadata["output_tokens"] > 0
def test_image_tool_calling() -> None:
"""Test tool calling with image inputs."""
class color_picker(BaseModel): # noqa: N801
"""Input your fav color and get a random fact about it."""
fav_color: str
human_content: list[dict] = [
{
"type": "text",
"text": "what's your favorite color in this image",
},
]
image_url = "https://raw.githubusercontent.com/langchain-ai/docs/4d11d08b6b0e210bd456943f7a22febbd168b543/src/images/agentic-rag-output.png"
image_data = b64encode(httpx.get(image_url, timeout=10.0).content).decode("utf-8")
human_content.append(
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": image_data,
},
},
)
messages = [
SystemMessage("you're a good assistant"),
HumanMessage(human_content), # type: ignore[arg-type]
AIMessage(
[
{"type": "text", "text": "Hmm let me think about that"},
{
"type": "tool_use",
"input": {"fav_color": "purple"},
"id": "foo",
"name": "color_picker",
},
],
),
HumanMessage(
[
{
"type": "tool_result",
"tool_use_id": "foo",
"content": [
{
"type": "text",
"text": "purple is a great pick! that's my sister's favorite color", # noqa: E501
},
],
"is_error": False,
},
{"type": "text", "text": "what's my sister's favorite color"},
],
),
]
llm = ChatAnthropic(model=MODEL_NAME) # type: ignore[call-arg]
_ = llm.bind_tools([color_picker]).invoke(messages)
@pytest.mark.default_cassette("test_web_search.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["v0", "v1"])
def test_web_search(output_version: Literal["v0", "v1"]) -> None:
llm = ChatAnthropic(
model=MODEL_NAME, # type: ignore[call-arg]
max_tokens=1024,
output_version=output_version,
)
tool = {"type": "web_search_20250305", "name": "web_search", "max_uses": 1}
llm_with_tools = llm.bind_tools([tool])
input_message = {
"role": "user",
"content": [
{
"type": "text",
"text": "How do I update a web app to TypeScript 5.5?",
},
],
}
response = llm_with_tools.invoke([input_message])
assert all(isinstance(block, dict) for block in response.content)
block_types = {block["type"] for block in response.content} # type: ignore[index]
if output_version == "v0":
assert block_types == {"text", "server_tool_use", "web_search_tool_result"}
else:
assert block_types == {"text", "server_tool_call", "server_tool_result"}
# Test streaming
full: BaseMessageChunk | None = None
for chunk in llm_with_tools.stream([input_message]):
assert isinstance(chunk, AIMessageChunk)
full = chunk if full is None else full + chunk
assert isinstance(full, AIMessageChunk)
assert isinstance(full.content, list)
block_types = {block["type"] for block in full.content} # type: ignore[index]
if output_version == "v0":
assert block_types == {"text", "server_tool_use", "web_search_tool_result"}
else:
assert block_types == {"text", "server_tool_call", "server_tool_result"}
# Test we can pass back in
next_message = {
"role": "user",
"content": "Please repeat the last search, but focus on sources from 2024.",
}
_ = llm_with_tools.invoke(
[input_message, full, next_message],
)
@pytest.mark.vcr
def test_web_fetch() -> None:
"""Note: this is a beta feature.
TODO: Update to remove beta once it's generally available.
"""
llm = ChatAnthropic(
model=MODEL_NAME, # type: ignore[call-arg]
max_tokens=1024,
betas=["web-fetch-2025-09-10"],
)
tool = {"type": "web_fetch_20250910", "name": "web_fetch", "max_uses": 1}
llm_with_tools = llm.bind_tools([tool])
input_message = {
"role": "user",
"content": [
{
"type": "text",
"text": "Fetch the content at https://docs.langchain.com and analyze",
},
],
}
response = llm_with_tools.invoke([input_message])
assert all(isinstance(block, dict) for block in response.content)
block_types = {
block["type"] for block in response.content if isinstance(block, dict)
}
# A successful fetch call should include:
# 1. text response from the model (e.g. "I'll fetch that for you")
# 2. server_tool_use block indicating the tool was called (using tool "web_fetch")
# 3. web_fetch_tool_result block with the results of said fetch
assert block_types == {"text", "server_tool_use", "web_fetch_tool_result"}
# Verify web fetch result structure
web_fetch_results = [
block
for block in response.content
if isinstance(block, dict) and block.get("type") == "web_fetch_tool_result"
]
assert len(web_fetch_results) == 1 # Since max_uses=1
fetch_result = web_fetch_results[0]
assert "content" in fetch_result
assert "url" in fetch_result["content"]
assert "retrieved_at" in fetch_result["content"]
# Fetch with citations enabled
tool_with_citations = tool.copy()
tool_with_citations["citations"] = {"enabled": True}
llm_with_citations = llm.bind_tools([tool_with_citations])
citation_message = {
"role": "user",
"content": (
"Fetch https://docs.langchain.com and provide specific quotes with "
"citations"
),
}
citation_response = llm_with_citations.invoke([citation_message])
citation_results = [
block
for block in citation_response.content
if isinstance(block, dict) and block.get("type") == "web_fetch_tool_result"
]
assert len(citation_results) == 1 # Since max_uses=1
citation_result = citation_results[0]
assert citation_result["content"]["content"]["citations"]["enabled"]
text_blocks = [
block
for block in citation_response.content
if isinstance(block, dict) and block.get("type") == "text"
]
# Check that the response contains actual citations in the content
has_citations = False
for block in text_blocks:
citations = block.get("citations", [])
for citation in citations:
if citation.get("type") and citation.get("start_char_index"):
has_citations = True
break
assert has_citations, (
"Expected inline citation tags in response when citations are enabled for "
"web fetch"
)
# Max content tokens param
tool_with_limit = tool.copy()
tool_with_limit["max_content_tokens"] = 1000
llm_with_limit = llm.bind_tools([tool_with_limit])
limit_response = llm_with_limit.invoke([input_message])
# Response should still work even with content limits
assert any(
block["type"] == "web_fetch_tool_result"
for block in limit_response.content
if isinstance(block, dict)
)
# Domains filtering (note: only one can be set at a time)
tool_with_allowed_domains = tool.copy()
tool_with_allowed_domains["allowed_domains"] = ["docs.langchain.com"]
llm_with_allowed = llm.bind_tools([tool_with_allowed_domains])
allowed_response = llm_with_allowed.invoke([input_message])
assert any(
block["type"] == "web_fetch_tool_result"
for block in allowed_response.content
if isinstance(block, dict)
)
# Test that a disallowed domain doesn't work
tool_with_disallowed_domains = tool.copy()
tool_with_disallowed_domains["allowed_domains"] = [
"example.com"
] # Not docs.langchain.com
llm_with_disallowed = llm.bind_tools([tool_with_disallowed_domains])
disallowed_response = llm_with_disallowed.invoke([input_message])
# We should get an error result since the domain (docs.langchain.com) is not allowed
disallowed_results = [
block
for block in disallowed_response.content
if isinstance(block, dict) and block.get("type") == "web_fetch_tool_result"
]
if disallowed_results:
disallowed_result = disallowed_results[0]
if disallowed_result.get("content", {}).get("type") == "web_fetch_tool_error":
assert disallowed_result["content"]["error_code"] in [
"invalid_url",
"fetch_failed",
]
# Blocked domains filtering
tool_with_blocked_domains = tool.copy()
tool_with_blocked_domains["blocked_domains"] = ["example.com"]
llm_with_blocked = llm.bind_tools([tool_with_blocked_domains])
blocked_response = llm_with_blocked.invoke([input_message])
assert any(
block["type"] == "web_fetch_tool_result"
for block in blocked_response.content
if isinstance(block, dict)
)
# Test fetching from a blocked domain fails
blocked_domain_message = {
"role": "user",
"content": "Fetch https://example.com and analyze",
}
tool_with_blocked_example = tool.copy()
tool_with_blocked_example["blocked_domains"] = ["example.com"]
llm_with_blocked_example = llm.bind_tools([tool_with_blocked_example])
blocked_domain_response = llm_with_blocked_example.invoke([blocked_domain_message])
# Should get an error when trying to access a blocked domain
blocked_domain_results = [
block
for block in blocked_domain_response.content
if isinstance(block, dict) and block.get("type") == "web_fetch_tool_result"
]
if blocked_domain_results:
blocked_result = blocked_domain_results[0]
if blocked_result.get("content", {}).get("type") == "web_fetch_tool_error":
assert blocked_result["content"]["error_code"] in [
"invalid_url",
"fetch_failed",
]
# Max uses parameter - test exceeding the limit
multi_fetch_message = {
"role": "user",
"content": (
"Fetch https://docs.langchain.com and then try to fetch "
"https://langchain.com"
),
}
max_uses_response = llm_with_tools.invoke([multi_fetch_message])
# Should contain at least one fetch result and potentially an error for the second
fetch_results = [
block
for block in max_uses_response.content
if isinstance(block, dict) and block.get("type") == "web_fetch_tool_result"
] # type: ignore[index]
assert len(fetch_results) >= 1
error_results = [
r
for r in fetch_results
if r.get("content", {}).get("type") == "web_fetch_tool_error"
]
if error_results:
assert any(
r["content"]["error_code"] == "max_uses_exceeded" for r in error_results
)
# Streaming
full: BaseMessageChunk | None = None
for chunk in llm_with_tools.stream([input_message]):
assert isinstance(chunk, AIMessageChunk)
full = chunk if full is None else full + chunk
assert isinstance(full, AIMessageChunk)
assert isinstance(full.content, list)
block_types = {block["type"] for block in full.content if isinstance(block, dict)}
assert block_types == {"text", "server_tool_use", "web_fetch_tool_result"}
# Test that URLs from context can be used in follow-up
next_message = {
"role": "user",
"content": "What does the site you just fetched say about models?",
}
follow_up_response = llm_with_tools.invoke(
[input_message, full, next_message],
)
# Should work without issues since URL was already in context
assert isinstance(follow_up_response.content, (list, str))
# Error handling - test with an invalid URL format
error_message = {
"role": "user",
"content": "Try to fetch this invalid URL: not-a-valid-url",
}
error_response = llm_with_tools.invoke([error_message])
# Should handle the error gracefully
assert isinstance(error_response.content, (list, str))
# PDF document fetching
pdf_message = {
"role": "user",
"content": (
"Fetch this PDF: "
"https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf "
"and summarize its content",
),
}
pdf_response = llm_with_tools.invoke([pdf_message])
assert any(
block["type"] == "web_fetch_tool_result"
for block in pdf_response.content
if isinstance(block, dict)
)
# Verify PDF content structure (should have base64 data for PDFs)
pdf_results = [
block
for block in pdf_response.content
if isinstance(block, dict) and block.get("type") == "web_fetch_tool_result"
]
if pdf_results:
pdf_result = pdf_results[0]
content = pdf_result.get("content", {})
if content.get("content", {}).get("source", {}).get("type") == "base64":
assert content["content"]["source"]["media_type"] == "application/pdf"
assert "data" in content["content"]["source"]
@pytest.mark.default_cassette("test_web_fetch_v1.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["v0", "v1"])
def test_web_fetch_v1(output_version: Literal["v0", "v1"]) -> None:
"""Test that http calls are unchanged between v0 and v1."""
llm = ChatAnthropic(
model=MODEL_NAME, # type: ignore[call-arg]
betas=["web-fetch-2025-09-10"],
output_version=output_version,
)
if output_version == "v0":
call_key = "server_tool_use"
result_key = "web_fetch_tool_result"
else:
# v1
call_key = "server_tool_call"
result_key = "server_tool_result"
tool = {
"type": "web_fetch_20250910",
"name": "web_fetch",
"max_uses": 1,
"citations": {"enabled": True},
}
llm_with_tools = llm.bind_tools([tool])
input_message = {
"role": "user",
"content": [
{
"type": "text",
"text": "Fetch the content at https://docs.langchain.com and analyze",
},
],
}
response = llm_with_tools.invoke([input_message])
assert all(isinstance(block, dict) for block in response.content)
block_types = {block["type"] for block in response.content} # type: ignore[index]
assert block_types == {"text", call_key, result_key}
# Test streaming
full: BaseMessageChunk | None = None
for chunk in llm_with_tools.stream([input_message]):
assert isinstance(chunk, AIMessageChunk)
full = chunk if full is None else full + chunk
assert isinstance(full, AIMessageChunk)
assert isinstance(full.content, list)
block_types = {block["type"] for block in full.content} # type: ignore[index]
assert block_types == {"text", call_key, result_key}
# Test we can pass back in
next_message = {
"role": "user",
"content": "What does the site you just fetched say about models?",
}
_ = llm_with_tools.invoke(
[input_message, full, next_message],
)
@pytest.mark.default_cassette("test_code_execution_old.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["v0", "v1"])
def test_code_execution_old(output_version: Literal["v0", "v1"]) -> None:
"""Note: this tests the `code_execution_20250522` tool, which is now legacy.
See the `test_code_execution` test below to test the current
`code_execution_20250825` tool.
Migration guide: https://platform.claude.com/docs/en/agents-and-tools/tool-use/code-execution-tool#upgrade-to-latest-tool-version
"""
llm = ChatAnthropic(
model=MODEL_NAME, # type: ignore[call-arg]
betas=["code-execution-2025-05-22"],
output_version=output_version,
)
tool = {"type": "code_execution_20250522", "name": "code_execution"}
llm_with_tools = llm.bind_tools([tool])
input_message = {
"role": "user",
"content": [
{
"type": "text",
"text": (
"Calculate the mean and standard deviation of "
"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"
),
},
],
}
response = llm_with_tools.invoke([input_message])
assert all(isinstance(block, dict) for block in response.content)
block_types = {block["type"] for block in response.content} # type: ignore[index]
if output_version == "v0":
assert block_types == {"text", "server_tool_use", "code_execution_tool_result"}
else:
assert block_types == {"text", "server_tool_call", "server_tool_result"}
# Test streaming
full: BaseMessageChunk | None = None
for chunk in llm_with_tools.stream([input_message]):
assert isinstance(chunk, AIMessageChunk)
full = chunk if full is None else full + chunk
assert isinstance(full, AIMessageChunk)
assert isinstance(full.content, list)
block_types = {block["type"] for block in full.content} # type: ignore[index]
if output_version == "v0":
assert block_types == {"text", "server_tool_use", "code_execution_tool_result"}
else:
assert block_types == {"text", "server_tool_call", "server_tool_result"}
# Test we can pass back in
next_message = {
"role": "user",
"content": "Please add more comments to the code.",
}
_ = llm_with_tools.invoke(
[input_message, full, next_message],
)
@pytest.mark.default_cassette("test_code_execution.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["v0", "v1"])
def test_code_execution(output_version: Literal["v0", "v1"]) -> None:
"""Note: this is a beta feature.
TODO: Update to remove beta once generally available.
"""
llm = ChatAnthropic(
model=MODEL_NAME, # type: ignore[call-arg]
betas=["code-execution-2025-08-25"],
output_version=output_version,
)
tool = {"type": "code_execution_20250825", "name": "code_execution"}
llm_with_tools = llm.bind_tools([tool])
input_message = {
"role": "user",
"content": [
{
"type": "text",
"text": (
"Calculate the mean and standard deviation of "
"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"
),
},
],
}
response = llm_with_tools.invoke([input_message])
assert all(isinstance(block, dict) for block in response.content)
block_types = {block["type"] for block in response.content} # type: ignore[index]
if output_version == "v0":
assert block_types == {
"text",
"server_tool_use",
"bash_code_execution_tool_result",
}
else:
assert block_types == {"text", "server_tool_call", "server_tool_result"}
# Test streaming
full: BaseMessageChunk | None = None
for chunk in llm_with_tools.stream([input_message]):
assert isinstance(chunk, AIMessageChunk)
full = chunk if full is None else full + chunk
assert isinstance(full, AIMessageChunk)
assert isinstance(full.content, list)
block_types = {block["type"] for block in full.content} # type: ignore[index]
if output_version == "v0":
assert block_types == {
"text",
"server_tool_use",
"bash_code_execution_tool_result",
}
else:
assert block_types == {"text", "server_tool_call", "server_tool_result"}
# Test we can pass back in
next_message = {
"role": "user",
"content": "Please add more comments to the code.",
}
_ = llm_with_tools.invoke(
[input_message, full, next_message],
)
@pytest.mark.default_cassette("test_remote_mcp.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["v0", "v1"])
def test_remote_mcp(output_version: Literal["v0", "v1"]) -> None:
"""Note: this is a beta feature.
TODO: Update to remove beta once generally available.
"""
mcp_servers = [
{
"type": "url",
"url": "https://mcp.deepwiki.com/mcp",
"name": "deepwiki",
"authorization_token": "PLACEHOLDER",
},
]
llm = ChatAnthropic(
model="claude-sonnet-4-5-20250929", # type: ignore[call-arg]
mcp_servers=mcp_servers,
output_version=output_version,
).bind_tools([{"type": "mcp_toolset", "mcp_server_name": "deepwiki"}])
input_message = {
"role": "user",
"content": [
{
"type": "text",
"text": (
"What transport protocols does the 2025-03-26 version of the MCP "
"spec (modelcontextprotocol/modelcontextprotocol) support?"
),
},
],
}
response = llm.invoke([input_message])
assert all(isinstance(block, dict) for block in response.content)
block_types = {block["type"] for block in response.content} # type: ignore[index]
if output_version == "v0":
assert block_types == {"text", "mcp_tool_use", "mcp_tool_result"}
else:
assert block_types == {"text", "server_tool_call", "server_tool_result"}
# Test streaming
full: BaseMessageChunk | None = None
for chunk in llm.stream([input_message]):
assert isinstance(chunk, AIMessageChunk)
full = chunk if full is None else full + chunk
assert isinstance(full, AIMessageChunk)
assert isinstance(full.content, list)
assert all(isinstance(block, dict) for block in full.content)
block_types = {block["type"] for block in full.content} # type: ignore[index]
if output_version == "v0":
assert block_types == {"text", "mcp_tool_use", "mcp_tool_result"}
else:
assert block_types == {"text", "server_tool_call", "server_tool_result"}
# Test we can pass back in
next_message = {
"role": "user",
"content": "Please query the same tool again, but add 'please' to your query.",
}
_ = llm.invoke(
[input_message, full, next_message],
)
@pytest.mark.parametrize("block_format", ["anthropic", "standard"])
def test_files_api_image(block_format: str) -> None:
"""Note: this is a beta feature.
TODO: Update to remove beta once generally available.
"""
image_file_id = os.getenv("ANTHROPIC_FILES_API_IMAGE_ID")
if not image_file_id:
pytest.skip()
llm = ChatAnthropic(
model=MODEL_NAME, # type: ignore[call-arg]
betas=["files-api-2025-04-14"],
)
if block_format == "anthropic":
block = {
"type": "image",
"source": {
"type": "file",
"file_id": image_file_id,
},
}
else:
# standard block format
block = {
"type": "image",
"file_id": image_file_id,
}
input_message = {
"role": "user",
"content": [
{"type": "text", "text": "Describe this image."},
block,
],
}
_ = llm.invoke([input_message])
@pytest.mark.parametrize("block_format", ["anthropic", "standard"])
def test_files_api_pdf(block_format: str) -> None:
"""Note: this is a beta feature.
TODO: Update to remove beta once generally available.
"""
pdf_file_id = os.getenv("ANTHROPIC_FILES_API_PDF_ID")
if not pdf_file_id:
pytest.skip()
llm = ChatAnthropic(
model=MODEL_NAME, # type: ignore[call-arg]
betas=["files-api-2025-04-14"],
)
if block_format == "anthropic":
block = {"type": "document", "source": {"type": "file", "file_id": pdf_file_id}}
else:
# standard block format
block = {
"type": "file",
"file_id": pdf_file_id,
}
input_message = {
"role": "user",
"content": [
{"type": "text", "text": "Describe this document."},
block,
],
}
_ = llm.invoke([input_message])
@pytest.mark.vcr
def test_search_result_tool_message() -> None:
"""Test that we can pass a search result tool message to the model."""
llm = ChatAnthropic(
model=MODEL_NAME, # type: ignore[call-arg]
)
@tool
def retrieval_tool(query: str) -> list[dict]:
"""Retrieve information from a knowledge base."""
return [
{
"type": "search_result",
"title": "Leave policy",
"source": "HR Leave Policy 2025",
"citations": {"enabled": True},
"content": [
{
"type": "text",
"text": (
"To request vacation days, submit a leave request form "
"through the HR portal. Approval will be sent by email."
),
},
],
},
]
tool_call = {
"type": "tool_call",
"name": "retrieval_tool",
"args": {"query": "vacation days request process"},
"id": "toolu_abc123",
}
tool_message = retrieval_tool.invoke(tool_call)
assert isinstance(tool_message, ToolMessage)
assert isinstance(tool_message.content, list)
messages = [
HumanMessage("How do I request vacation days?"),
AIMessage(
[{"type": "text", "text": "Let me look that up for you."}],
tool_calls=[tool_call],
),
tool_message,
]
result = llm.invoke(messages)
assert isinstance(result, AIMessage)
assert isinstance(result.content, list)
assert any("citations" in block for block in result.content)
assert (
_convert_from_v1_to_anthropic(result.content_blocks, [], "anthropic")
== result.content
)
def test_search_result_top_level() -> None:
llm = ChatAnthropic(
model=MODEL_NAME, # type: ignore[call-arg]
)
input_message = HumanMessage(
[
{
"type": "search_result",
"title": "Leave policy",
"source": "HR Leave Policy 2025 - page 1",
"citations": {"enabled": True},
"content": [
{
"type": "text",
"text": (
"To request vacation days, submit a leave request form "
"through the HR portal. Approval will be sent by email."
),
},
],
},
{
"type": "search_result",
"title": "Leave policy",
"source": "HR Leave Policy 2025 - page 2",
"citations": {"enabled": True},
"content": [
{
"type": "text",
"text": "Managers have 3 days to approve a request.",
},
],
},
{
"type": "text",
"text": "How do I request vacation days?",
},
],
)
result = llm.invoke([input_message])
assert isinstance(result, AIMessage)
assert isinstance(result.content, list)
assert any("citations" in block for block in result.content)
assert (
_convert_from_v1_to_anthropic(result.content_blocks, [], "anthropic")
== result.content
)
def test_memory_tool() -> None:
llm = ChatAnthropic(
model="claude-sonnet-4-5-20250929", # type: ignore[call-arg]
betas=["context-management-2025-06-27"],
)
llm_with_tools = llm.bind_tools([{"type": "memory_20250818", "name": "memory"}])
response = llm_with_tools.invoke("What are my interests?")
assert isinstance(response, AIMessage)
assert response.tool_calls
assert response.tool_calls[0]["name"] == "memory"
@pytest.mark.vcr
def test_context_management() -> None:
# TODO: update example to trigger action
llm = ChatAnthropic(
model="claude-sonnet-4-5-20250929", # type: ignore[call-arg]
betas=["context-management-2025-06-27"],
context_management={
"edits": [
{
"type": "clear_tool_uses_20250919",
"trigger": {"type": "input_tokens", "value": 10},
"clear_at_least": {"type": "input_tokens", "value": 5},
}
]
},
max_tokens=1024, # type: ignore[call-arg]
)
llm_with_tools = llm.bind_tools(
[{"type": "web_search_20250305", "name": "web_search"}]
)
input_message = {"role": "user", "content": "Search for recent developments in AI"}
response = llm_with_tools.invoke([input_message])
assert response.response_metadata.get("context_management")
# Test streaming
full: BaseMessageChunk | None = None
for chunk in llm_with_tools.stream([input_message]):
assert isinstance(chunk, AIMessageChunk)
full = chunk if full is None else full + chunk
assert isinstance(full, AIMessageChunk)
assert full.response_metadata.get("context_management")
@pytest.mark.default_cassette("test_tool_search.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["v0", "v1"])
def test_tool_search(output_version: str) -> None:
"""Test tool search with LangChain tools using extras parameter."""
@tool(parse_docstring=True, extras={"defer_loading": True})
def get_weather(location: str, unit: str = "fahrenheit") -> str:
"""Get the current weather for a location.
Args:
location: City name
unit: Temperature unit (celsius or fahrenheit)
"""
return f"The weather in {location} is sunny and 72°{unit[0].upper()}"
@tool(parse_docstring=True, extras={"defer_loading": True})
def search_files(query: str) -> str:
"""Search through files in the workspace.
Args:
query: Search query
"""
return f"Found 3 files matching '{query}'"
model = ChatAnthropic(
model="claude-opus-4-5-20251101", output_version=output_version
)
agent = create_agent( # type: ignore[var-annotated]
model,
tools=[
{
"type": "tool_search_tool_regex_20251119",
"name": "tool_search_tool_regex",
},
get_weather,
search_files,
],
)
# Test with actual API call
input_message = {
"role": "user",
"content": "What's the weather in San Francisco? Find and use a tool.",
}
result = agent.invoke({"messages": [input_message]})
first_response = result["messages"][1]
content_types = [block["type"] for block in first_response.content]
if output_version == "v0":
assert content_types == [
"text",
"server_tool_use",
"tool_search_tool_result",
"text",
"tool_use",
]
else:
# v1
assert content_types == [
"text",
"server_tool_call",
"server_tool_result",
"text",
"tool_call",
]
answer = result["messages"][-1]
assert not answer.tool_calls
assert answer.text
@pytest.mark.default_cassette("test_programmatic_tool_use.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["v0", "v1"])
def test_programmatic_tool_use(output_version: str) -> None:
"""Test programmatic tool use.
Implicitly checks that `allowed_callers` in tool extras works.
"""
@tool(extras={"allowed_callers": ["code_execution_20250825"]})
def get_weather(location: str) -> str:
"""Get the weather at a location."""
return "It's sunny."
tools: list = [
{"type": "code_execution_20250825", "name": "code_execution"},
get_weather,
]
model = ChatAnthropic(
model="claude-sonnet-4-5",
betas=["advanced-tool-use-2025-11-20"],
reuse_last_container=True,
output_version=output_version,
)
agent = create_agent(model, tools=tools) # type: ignore[var-annotated]
input_query = {
"role": "user",
"content": "What's the weather in Boston?",
}
result = agent.invoke({"messages": [input_query]})
assert len(result["messages"]) == 4
tool_call_message = result["messages"][1]
response_message = result["messages"][-1]
if output_version == "v0":
server_tool_use_block = next(
block
for block in tool_call_message.content
if block["type"] == "server_tool_use"
)
assert server_tool_use_block
tool_use_block = next(
block for block in tool_call_message.content if block["type"] == "tool_use"
)
assert "caller" in tool_use_block
code_execution_result = next(
block
for block in response_message.content
if block["type"] == "code_execution_tool_result"
)
assert code_execution_result["content"]["return_code"] == 0
else:
server_tool_call_block = next(
block
for block in tool_call_message.content
if block["type"] == "server_tool_call"
)
assert server_tool_call_block
tool_call_block = next(
block for block in tool_call_message.content if block["type"] == "tool_call"
)
assert "caller" in tool_call_block["extras"]
server_tool_result = next(
block
for block in response_message.content
if block["type"] == "server_tool_result"
)
assert server_tool_result["output"]["return_code"] == 0
@pytest.mark.default_cassette("test_programmatic_tool_use_streaming.yaml.gz")
@pytest.mark.vcr
@pytest.mark.parametrize("output_version", ["v0", "v1"])
def test_programmatic_tool_use_streaming(output_version: str) -> None:
@tool(extras={"allowed_callers": ["code_execution_20250825"]})
def get_weather(location: str) -> str:
"""Get the weather at a location."""
return "It's sunny."
tools: list = [
{"type": "code_execution_20250825", "name": "code_execution"},
get_weather,
]
model = ChatAnthropic(
model="claude-sonnet-4-5",
betas=["advanced-tool-use-2025-11-20"],
reuse_last_container=True,
streaming=True,
output_version=output_version,
)
agent = create_agent(model, tools=tools) # type: ignore[var-annotated]
input_query = {
"role": "user",
"content": "What's the weather in Boston?",
}
result = agent.invoke({"messages": [input_query]})
assert len(result["messages"]) == 4
tool_call_message = result["messages"][1]
response_message = result["messages"][-1]
if output_version == "v0":
server_tool_use_block = next(
block
for block in tool_call_message.content
if block["type"] == "server_tool_use"
)
assert server_tool_use_block
tool_use_block = next(
block for block in tool_call_message.content if block["type"] == "tool_use"
)
assert "caller" in tool_use_block
code_execution_result = next(
block
for block in response_message.content
if block["type"] == "code_execution_tool_result"
)
assert code_execution_result["content"]["return_code"] == 0
else:
server_tool_call_block = next(
block
for block in tool_call_message.content
if block["type"] == "server_tool_call"
)
assert server_tool_call_block
tool_call_block = next(
block for block in tool_call_message.content if block["type"] == "tool_call"
)
assert "caller" in tool_call_block["extras"]
server_tool_result = next(
block
for block in response_message.content
if block["type"] == "server_tool_result"
)
assert server_tool_result["output"]["return_code"] == 0
def test_async_shared_client() -> None:
llm = ChatAnthropic(model=MODEL_NAME) # type: ignore[call-arg]
_ = asyncio.run(llm.ainvoke("Hello"))
_ = asyncio.run(llm.ainvoke("Hello"))
def test_fine_grained_tool_streaming() -> None:
"""Test fine-grained tool streaming reduces latency for tool parameter streaming.
Fine-grained tool streaming enables Claude to stream tool parameter values.
https://platform.claude.com/docs/en/agents-and-tools/tool-use/fine-grained-tool-streaming
"""
llm = ChatAnthropic(
model=MODEL_NAME, # type: ignore[call-arg]
temperature=0,
betas=["fine-grained-tool-streaming-2025-05-14"],
)
# Define a tool that requires a longer text parameter
tool_definition = {
"name": "write_document",
"description": "Write a document with the given content",
"input_schema": {
"type": "object",
"properties": {
"title": {"type": "string", "description": "Document title"},
"content": {
"type": "string",
"description": "The full document content",
},
},
"required": ["title", "content"],
},
}
llm_with_tools = llm.bind_tools([tool_definition])
query = (
"Write a document about the benefits of streaming APIs. "
"Include at least 3 paragraphs."
)
# Test streaming with fine-grained tool streaming
first = True
chunks: list[BaseMessage | BaseMessageChunk] = []
tool_call_chunks = []
for chunk in llm_with_tools.stream(query):
chunks.append(chunk)
if first:
gathered = chunk
first = False
else:
gathered = gathered + chunk # type: ignore[assignment]
# Collect tool call chunks
tool_call_chunks.extend(
[
block
for block in chunk.content_blocks
if block["type"] == "tool_call_chunk"
]
)
# Verify we got chunks
assert len(chunks) > 1
# Verify final message has tool call
assert isinstance(gathered, AIMessageChunk)
assert isinstance(gathered.tool_calls, list)
assert len(gathered.tool_calls) >= 1
# Find the write_document tool call
write_doc_call = None
for tool_call in gathered.tool_calls:
if tool_call["name"] == "write_document":
write_doc_call = tool_call
break
assert write_doc_call is not None, "write_document tool call not found"
assert isinstance(write_doc_call["args"], dict)
assert "title" in write_doc_call["args"]
assert "content" in write_doc_call["args"]
assert (
len(write_doc_call["args"]["content"]) > 100
) # Should have substantial content
# Verify tool_call_chunks were received
# With fine-grained streaming, we should get tool call chunks
assert len(tool_call_chunks) > 0
# Verify content_blocks in final message
content_blocks = gathered.content_blocks
assert len(content_blocks) >= 1
# Should have at least one tool_call block
tool_call_blocks = [b for b in content_blocks if b["type"] == "tool_call"]
assert len(tool_call_blocks) >= 1
write_doc_block = None
for block in tool_call_blocks:
if block["name"] == "write_document":
write_doc_block = block
break
assert write_doc_block is not None
assert write_doc_block["name"] == "write_document"
assert "args" in write_doc_block
@pytest.mark.vcr
def test_compaction() -> None:
"""Test the compaction beta feature."""
llm = ChatAnthropic(
model="claude-opus-4-6", # type: ignore[call-arg]
betas=["compact-2026-01-12"],
max_tokens=4096,
context_management={
"edits": [
{
"type": "compact_20260112",
"trigger": {"type": "input_tokens", "value": 50000},
"pause_after_compaction": True,
}
]
},
)
input_message = {
"role": "user",
"content": f"Generate a one-sentence summary of this:\n\n{'a' * 100000}",
}
messages: list = [input_message]
first_response = llm.invoke(messages)
messages.append(first_response)
second_message = {
"role": "user",
"content": f"Generate a one-sentence summary of this:\n\n{'b' * 100000}",
}
messages.append(second_message)
second_response = llm.invoke(messages)
messages.append(second_response)
content_blocks = second_response.content_blocks
compaction_block = next(
(block for block in content_blocks if block["type"] == "non_standard"),
None,
)
assert compaction_block
assert compaction_block["value"].get("type") == "compaction"
third_message = {
"role": "user",
"content": "What are we talking about?",
}
messages.append(third_message)
third_response = llm.invoke(messages)
content_blocks = third_response.content_blocks
assert [block["type"] for block in content_blocks] == ["text"]
@pytest.mark.vcr
def test_compaction_streaming() -> None:
"""Test the compaction beta feature."""
llm = ChatAnthropic(
model="claude-opus-4-6", # type: ignore[call-arg]
betas=["compact-2026-01-12"],
max_tokens=4096,
context_management={
"edits": [
{
"type": "compact_20260112",
"trigger": {"type": "input_tokens", "value": 50000},
"pause_after_compaction": False,
}
]
},
streaming=True,
)
input_message = {
"role": "user",
"content": f"Generate a one-sentence summary of this:\n\n{'a' * 100000}",
}
messages: list = [input_message]
first_response = llm.invoke(messages)
messages.append(first_response)
second_message = {
"role": "user",
"content": f"Generate a one-sentence summary of this:\n\n{'b' * 100000}",
}
messages.append(second_message)
second_response = llm.invoke(messages)
messages.append(second_response)
content_blocks = second_response.content_blocks
compaction_block = next(
(block for block in content_blocks if block["type"] == "non_standard"),
None,
)
assert compaction_block
assert compaction_block["value"].get("type") == "compaction"
third_message = {
"role": "user",
"content": "What are we talking about?",
}
messages.append(third_message)
third_response = llm.invoke(messages)
content_blocks = third_response.content_blocks
assert [block["type"] for block in content_blocks] == ["text"]
class _Person(BaseModel):
"""A person with a name and age."""
name: str = Field(description="The person's name")
age: int = Field(description="The person's age in years")
def _stable_blocks(blocks: Any) -> list[dict[str, Any]]:
"""Drop fields that vary between API calls so blocks can be compared.
Tool-call ids, wire indices, and provider extras are not path- or call-
stable; strip them so the comparison targets the semantic content.
"""
volatile = {"id", "index", "extras"}
return [{k: v for k, v in b.items() if k not in volatile} for b in blocks]
@pytest.mark.default_cassette("test_streaming_tool_call_v1_v2_parity.yaml.gz")
@pytest.mark.vcr
def test_streaming_tool_call_v1_v2_parity() -> None:
"""`AIMessage` parity between `stream()` reduction and `stream_v2().output`.
Runs the same forced-tool-call prompt through both the legacy chunk
stream (reduced with `AIMessageChunk.__add__`) and the `stream_v2`
bridge path on a `v1`-output `ChatAnthropic`, then compares the
resulting messages on path-independent invariants:
- tool call name and args (ids vary between calls and are ignored)
- exactly one tool call, no invalid tool calls
- `content_blocks` (the v1 projection, stripped of volatile fields)
- a valid tool-use `finish_reason`
The v2 path is additionally validated against the full protocol
lifecycle via `assert_valid_event_stream`.
"""
llm = ChatAnthropic(
model=MODEL_NAME,
output_version="v1", # type: ignore[call-arg]
)
with_tool = llm.bind_tools(
[_Person],
tool_choice={"type": "tool", "name": "_Person"},
)
prompt = "Extract: Erick is 27 years old."
v1_full: AIMessageChunk | None = None
for chunk in with_tool.stream(prompt):
assert isinstance(chunk, AIMessageChunk)
v1_full = chunk if v1_full is None else v1_full + chunk
assert isinstance(v1_full, AIMessageChunk)
stream = with_tool.stream_v2(prompt)
events = list(stream)
assert_valid_event_stream(events)
v2_message = stream.output
assert isinstance(v2_message, AIMessage)
assert len(v1_full.tool_calls) == len(v2_message.tool_calls) == 1
assert not v1_full.invalid_tool_calls
assert not v2_message.invalid_tool_calls
v1_tc = v1_full.tool_calls[0]
v2_tc = v2_message.tool_calls[0]
assert v1_tc["name"] == v2_tc["name"] == "_Person"
assert v1_tc["args"] == v2_tc["args"] == {"name": "Erick", "age": 27}
v1_blocks = _stable_blocks(v1_full.content_blocks)
v2_blocks = _stable_blocks(v2_message.content_blocks)
assert v1_blocks == v2_blocks
assert v1_blocks == [
{
"type": "tool_call",
"name": "_Person",
"args": {"name": "Erick", "age": 27},
}
]
# The compat bridge passes the provider's raw terminal reason through
# unchanged — Anthropic surfaces it under `stop_reason` on both paths.
# Accept either key on both sides rather than asserting a specific
# normalization that the bridge does not perform.
v1_finish = v1_full.response_metadata.get(
"finish_reason"
) or v1_full.response_metadata.get("stop_reason")
v2_finish = v2_message.response_metadata.get(
"finish_reason"
) or v2_message.response_metadata.get("stop_reason")
assert v1_finish is not None
assert v2_finish is not None
assert any(k in v1_finish for k in ("tool_use", "tool_calls", "stop"))
assert any(k in v2_finish for k in ("tool_use", "tool_calls", "stop"))