langchain/libs/partners/anthropic/tests/unit_tests/test_chat_models.py

"""Test chat model integration."""

from __future__ import annotations

import copy
import os
import warnings
from collections.abc import Callable
from typing import Any, Literal, cast
from unittest.mock import MagicMock, patch

import anthropic
import pytest
from anthropic.types import Message, TextBlock, Usage
from blockbuster import blockbuster_ctx
from langchain_core.exceptions import ContextOverflowError
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
from langchain_core.runnables import RunnableBinding
from langchain_core.tools import BaseTool, tool
from langchain_core.tracers.base import BaseTracer
from langchain_core.tracers.schemas import Run
from pydantic import BaseModel, Field, SecretStr, ValidationError
from pytest import CaptureFixture, MonkeyPatch

from langchain_anthropic import ChatAnthropic
from langchain_anthropic.chat_models import (
    _create_usage_metadata,
    _format_image,
    _format_messages,
    _is_builtin_tool,
    _merge_messages,
    _thinking_in_params,
    convert_to_anthropic_tool,
)

os.environ["ANTHROPIC_API_KEY"] = "foo"

MODEL_NAME = "claude-sonnet-4-5-20250929"


def test_initialization() -> None:
    """Test chat model initialization."""
    for model in [
        ChatAnthropic(model_name=MODEL_NAME, api_key="xyz", timeout=2),  # type: ignore[arg-type, call-arg]
        ChatAnthropic(  # type: ignore[call-arg, call-arg, call-arg]
            model=MODEL_NAME,
            anthropic_api_key="xyz",
            default_request_timeout=2,
            base_url="https://api.anthropic.com",
        ),
    ]:
        assert model.model == MODEL_NAME
        assert cast("SecretStr", model.anthropic_api_key).get_secret_value() == "xyz"
        assert model.default_request_timeout == 2.0
        assert model.anthropic_api_url == "https://api.anthropic.com"


def test_user_agent_header_in_client_params() -> None:
    """Test that _client_params includes a User-Agent header."""
    llm = ChatAnthropic(model=MODEL_NAME, api_key="test-key")  # type: ignore[arg-type]
    params = llm._client_params
    assert "default_headers" in params
    assert "User-Agent" in params["default_headers"]
    assert params["default_headers"]["User-Agent"].startswith("langchain-anthropic/")


@pytest.mark.parametrize("async_api", [True, False])
def test_streaming_attribute_should_stream(async_api: bool) -> None:  # noqa: FBT001
    llm = ChatAnthropic(model=MODEL_NAME, streaming=True)
    assert llm._should_stream(async_api=async_api)


def test_anthropic_client_caching() -> None:
    """Test that the OpenAI client is cached."""
    llm1 = ChatAnthropic(model=MODEL_NAME)
    llm2 = ChatAnthropic(model=MODEL_NAME)
    assert llm1._client._client is llm2._client._client

    llm3 = ChatAnthropic(model=MODEL_NAME, base_url="foo")
    assert llm1._client._client is not llm3._client._client

    llm4 = ChatAnthropic(model=MODEL_NAME, timeout=None)
    assert llm1._client._client is llm4._client._client

    llm5 = ChatAnthropic(model=MODEL_NAME, timeout=3)
    assert llm1._client._client is not llm5._client._client


def test_anthropic_proxy_support() -> None:
    """Test that both sync and async clients support proxy configuration."""
    proxy_url = "http://proxy.example.com:8080"

    # Test sync client with proxy
    llm_sync = ChatAnthropic(model=MODEL_NAME, anthropic_proxy=proxy_url)
    sync_client = llm_sync._client
    assert sync_client is not None

    # Test async client with proxy - this should not raise TypeError
    async_client = llm_sync._async_client
    assert async_client is not None

    # Test that clients with different proxy settings are not cached together
    llm_no_proxy = ChatAnthropic(model=MODEL_NAME)
    llm_with_proxy = ChatAnthropic(model=MODEL_NAME, anthropic_proxy=proxy_url)

    # Different proxy settings should result in different cached clients
    assert llm_no_proxy._client._client is not llm_with_proxy._client._client


def test_anthropic_proxy_from_environment() -> None:
    """Test that proxy can be set from ANTHROPIC_PROXY environment variable."""
    proxy_url = "http://env-proxy.example.com:8080"

    # Test with environment variable set
    with patch.dict(os.environ, {"ANTHROPIC_PROXY": proxy_url}):
        llm = ChatAnthropic(model=MODEL_NAME)
        assert llm.anthropic_proxy == proxy_url

        # Should be able to create clients successfully
        sync_client = llm._client
        async_client = llm._async_client
        assert sync_client is not None
        assert async_client is not None

    # Test that explicit parameter overrides environment variable
    with patch.dict(os.environ, {"ANTHROPIC_PROXY": "http://env-proxy.com"}):
        explicit_proxy = "http://explicit-proxy.com"
        llm = ChatAnthropic(model=MODEL_NAME, anthropic_proxy=explicit_proxy)
        assert llm.anthropic_proxy == explicit_proxy


def test_set_default_max_tokens() -> None:
    """Test the set_default_max_tokens function."""
    # Test claude-sonnet-4-5 models
    llm = ChatAnthropic(model="claude-sonnet-4-5-20250929", anthropic_api_key="test")
    assert llm.max_tokens == 64000

    # Test claude-opus-4 models
    llm = ChatAnthropic(model="claude-opus-4-20250514", anthropic_api_key="test")
    assert llm.max_tokens == 32000

    # Test claude-sonnet-4 models
    llm = ChatAnthropic(model="claude-sonnet-4-20250514", anthropic_api_key="test")
    assert llm.max_tokens == 64000

    # Test claude-3-7-sonnet models
    llm = ChatAnthropic(model="claude-3-7-sonnet-20250219", anthropic_api_key="test")
    assert llm.max_tokens == 64000

    # Test claude-3-5-haiku models
    llm = ChatAnthropic(model="claude-3-5-haiku-20241022", anthropic_api_key="test")
    assert llm.max_tokens == 8192

    # Test claude-3-haiku models (should default to 4096)
    llm = ChatAnthropic(model="claude-3-haiku-20240307", anthropic_api_key="test")
    assert llm.max_tokens == 4096

    # Test that existing max_tokens values are preserved
    llm = ChatAnthropic(model=MODEL_NAME, max_tokens=2048, anthropic_api_key="test")
    assert llm.max_tokens == 2048

    # Test that explicitly set max_tokens values are preserved
    llm = ChatAnthropic(model=MODEL_NAME, max_tokens=4096, anthropic_api_key="test")
    assert llm.max_tokens == 4096


@pytest.mark.requires("anthropic")
def test_anthropic_model_name_param() -> None:
    llm = ChatAnthropic(model_name=MODEL_NAME)  # type: ignore[call-arg, call-arg]
    assert llm.model == MODEL_NAME


@pytest.mark.requires("anthropic")
def test_anthropic_model_param() -> None:
    llm = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]
    assert llm.model == MODEL_NAME


@pytest.mark.requires("anthropic")
def test_anthropic_model_kwargs() -> None:
    llm = ChatAnthropic(model_name=MODEL_NAME, model_kwargs={"foo": "bar"})  # type: ignore[call-arg, call-arg]
    assert llm.model_kwargs == {"foo": "bar"}


@pytest.mark.requires("anthropic")
def test_anthropic_fields_in_model_kwargs() -> None:
    """Test that for backwards compatibility fields can be passed in as model_kwargs."""
    llm = ChatAnthropic(model=MODEL_NAME, model_kwargs={"max_tokens_to_sample": 5})  # type: ignore[call-arg]
    assert llm.max_tokens == 5
    llm = ChatAnthropic(model=MODEL_NAME, model_kwargs={"max_tokens": 5})  # type: ignore[call-arg]
    assert llm.max_tokens == 5


@pytest.mark.requires("anthropic")
def test_anthropic_incorrect_field() -> None:
    with pytest.warns(match="not default parameter"):
        llm = ChatAnthropic(model=MODEL_NAME, foo="bar")  # type: ignore[call-arg, call-arg]
    assert llm.model_kwargs == {"foo": "bar"}


@pytest.mark.requires("anthropic")
def test_anthropic_initialization() -> None:
    """Test anthropic initialization."""
    # Verify that chat anthropic can be initialized using a secret key provided
    # as a parameter rather than an environment variable.
    ChatAnthropic(model=MODEL_NAME, anthropic_api_key="test")  # type: ignore[call-arg, call-arg]


def test__format_output() -> None:
    anthropic_msg = Message(
        id="foo",
        content=[TextBlock(type="text", text="bar")],
        model="baz",
        role="assistant",
        stop_reason=None,
        stop_sequence=None,
        usage=Usage(input_tokens=2, output_tokens=1),
        type="message",
    )
    expected = AIMessage(  # type: ignore[misc]
        "bar",
        usage_metadata={
            "input_tokens": 2,
            "output_tokens": 1,
            "total_tokens": 3,
            "input_token_details": {},
        },
        response_metadata={"model_provider": "anthropic"},
    )
    llm = ChatAnthropic(model=MODEL_NAME, anthropic_api_key="test")  # type: ignore[call-arg, call-arg]
    actual = llm._format_output(anthropic_msg)
    assert actual.generations[0].message == expected


def test__format_output_cached() -> None:
    anthropic_msg = Message(
        id="foo",
        content=[TextBlock(type="text", text="bar")],
        model="baz",
        role="assistant",
        stop_reason=None,
        stop_sequence=None,
        usage=Usage(
            input_tokens=2,
            output_tokens=1,
            cache_creation_input_tokens=3,
            cache_read_input_tokens=4,
        ),
        type="message",
    )
    expected = AIMessage(  # type: ignore[misc]
        "bar",
        usage_metadata={
            "input_tokens": 9,
            "output_tokens": 1,
            "total_tokens": 10,
            "input_token_details": {"cache_creation": 3, "cache_read": 4},
        },
        response_metadata={"model_provider": "anthropic"},
    )

    llm = ChatAnthropic(model=MODEL_NAME, anthropic_api_key="test")  # type: ignore[call-arg, call-arg]
    actual = llm._format_output(anthropic_msg)
    assert actual.generations[0].message == expected


def test__merge_messages() -> None:
    messages = [
        SystemMessage("foo"),  # type: ignore[misc]
        HumanMessage("bar"),  # type: ignore[misc]
        AIMessage(  # type: ignore[misc]
            [
                {"text": "baz", "type": "text"},
                {
                    "tool_input": {"a": "b"},
                    "type": "tool_use",
                    "id": "1",
                    "text": None,
                    "name": "buz",
                },
                {"text": "baz", "type": "text"},
                {
                    "tool_input": {"a": "c"},
                    "type": "tool_use",
                    "id": "2",
                    "text": None,
                    "name": "blah",
                },
                {
                    "tool_input": {"a": "c"},
                    "type": "tool_use",
                    "id": "3",
                    "text": None,
                    "name": "blah",
                },
            ],
        ),
        ToolMessage("buz output", tool_call_id="1", status="error"),  # type: ignore[misc]
        ToolMessage(
            content=[
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/jpeg",
                        "data": "fake_image_data",
                    },
                },
            ],
            tool_call_id="2",
        ),  # type: ignore[misc]
        ToolMessage([], tool_call_id="3"),  # type: ignore[misc]
        HumanMessage("next thing"),  # type: ignore[misc]
    ]
    expected = [
        SystemMessage("foo"),  # type: ignore[misc]
        HumanMessage("bar"),  # type: ignore[misc]
        AIMessage(  # type: ignore[misc]
            [
                {"text": "baz", "type": "text"},
                {
                    "tool_input": {"a": "b"},
                    "type": "tool_use",
                    "id": "1",
                    "text": None,
                    "name": "buz",
                },
                {"text": "baz", "type": "text"},
                {
                    "tool_input": {"a": "c"},
                    "type": "tool_use",
                    "id": "2",
                    "text": None,
                    "name": "blah",
                },
                {
                    "tool_input": {"a": "c"},
                    "type": "tool_use",
                    "id": "3",
                    "text": None,
                    "name": "blah",
                },
            ],
        ),
        HumanMessage(  # type: ignore[misc]
            [
                {
                    "type": "tool_result",
                    "content": "buz output",
                    "tool_use_id": "1",
                    "is_error": True,
                },
                {
                    "type": "tool_result",
                    "content": [
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": "image/jpeg",
                                "data": "fake_image_data",
                            },
                        },
                    ],
                    "tool_use_id": "2",
                    "is_error": False,
                },
                {
                    "type": "tool_result",
                    "content": [],
                    "tool_use_id": "3",
                    "is_error": False,
                },
                {"type": "text", "text": "next thing"},
            ],
        ),
    ]
    actual = _merge_messages(messages)
    assert expected == actual

    # Test tool message case
    messages = [
        ToolMessage("buz output", tool_call_id="1"),  # type: ignore[misc]
        ToolMessage(  # type: ignore[misc]
            content=[
                {"type": "tool_result", "content": "blah output", "tool_use_id": "2"},
            ],
            tool_call_id="2",
        ),
    ]
    expected = [
        HumanMessage(  # type: ignore[misc]
            [
                {
                    "type": "tool_result",
                    "content": "buz output",
                    "tool_use_id": "1",
                    "is_error": False,
                },
                {"type": "tool_result", "content": "blah output", "tool_use_id": "2"},
            ],
        ),
    ]
    actual = _merge_messages(messages)
    assert expected == actual


def test__merge_messages_mutation() -> None:
    original_messages = [
        HumanMessage([{"type": "text", "text": "bar"}]),  # type: ignore[misc]
        HumanMessage("next thing"),  # type: ignore[misc]
    ]
    messages = [
        HumanMessage([{"type": "text", "text": "bar"}]),  # type: ignore[misc]
        HumanMessage("next thing"),  # type: ignore[misc]
    ]
    expected = [
        HumanMessage(  # type: ignore[misc]
            [{"type": "text", "text": "bar"}, {"type": "text", "text": "next thing"}],
        ),
    ]
    actual = _merge_messages(messages)
    assert expected == actual
    assert messages == original_messages


def test__merge_messages_tool_message_cache_control() -> None:
    """Test that cache_control is hoisted from content blocks to tool_result level."""
    # Test with cache_control in content block
    messages = [
        ToolMessage(
            content=[
                {
                    "type": "text",
                    "text": "tool output",
                    "cache_control": {"type": "ephemeral"},
                }
            ],
            tool_call_id="1",
        )
    ]
    original_messages = [copy.deepcopy(m) for m in messages]
    expected = [
        HumanMessage(
            [
                {
                    "type": "tool_result",
                    "content": [{"type": "text", "text": "tool output"}],
                    "tool_use_id": "1",
                    "is_error": False,
                    "cache_control": {"type": "ephemeral"},
                }
            ]
        )
    ]
    actual = _merge_messages(messages)
    assert expected == actual
    # Verify no mutation
    assert messages == original_messages

    # Test with multiple content blocks, cache_control on last one
    messages = [
        ToolMessage(
            content=[
                {"type": "text", "text": "first output"},
                {
                    "type": "text",
                    "text": "second output",
                    "cache_control": {"type": "ephemeral"},
                },
            ],
            tool_call_id="2",
        )
    ]
    expected = [
        HumanMessage(
            [
                {
                    "type": "tool_result",
                    "content": [
                        {"type": "text", "text": "first output"},
                        {"type": "text", "text": "second output"},
                    ],
                    "tool_use_id": "2",
                    "is_error": False,
                    "cache_control": {"type": "ephemeral"},
                }
            ]
        )
    ]
    actual = _merge_messages(messages)
    assert expected == actual

    # Test without cache_control
    messages = [ToolMessage(content="simple output", tool_call_id="3")]
    expected = [
        HumanMessage(
            [
                {
                    "type": "tool_result",
                    "content": "simple output",
                    "tool_use_id": "3",
                    "is_error": False,
                }
            ]
        )
    ]
    actual = _merge_messages(messages)
    assert expected == actual


def test__format_image() -> None:
    url = "dummyimage.com/600x400/000/fff"
    with pytest.raises(ValueError):
        _format_image(url)


@pytest.fixture
def pydantic() -> type[BaseModel]:
    class dummy_function(BaseModel):  # noqa: N801
        """Dummy function."""

        arg1: int = Field(..., description="foo")
        arg2: Literal["bar", "baz"] = Field(..., description="one of 'bar', 'baz'")

    return dummy_function


@pytest.fixture
def function() -> Callable:
    def dummy_function(arg1: int, arg2: Literal["bar", "baz"]) -> None:
        """Dummy function.

        Args:
            arg1: foo
            arg2: one of 'bar', 'baz'

        """

    return dummy_function


@pytest.fixture
def dummy_tool() -> BaseTool:
    class Schema(BaseModel):
        arg1: int = Field(..., description="foo")
        arg2: Literal["bar", "baz"] = Field(..., description="one of 'bar', 'baz'")

    class DummyFunction(BaseTool):  # type: ignore[override]
        args_schema: type[BaseModel] = Schema
        name: str = "dummy_function"
        description: str = "Dummy function."

        def _run(self, *args: Any, **kwargs: Any) -> Any:
            pass

    return DummyFunction()


@pytest.fixture
def json_schema() -> dict:
    return {
        "title": "dummy_function",
        "description": "Dummy function.",
        "type": "object",
        "properties": {
            "arg1": {"description": "foo", "type": "integer"},
            "arg2": {
                "description": "one of 'bar', 'baz'",
                "enum": ["bar", "baz"],
                "type": "string",
            },
        },
        "required": ["arg1", "arg2"],
    }


@pytest.fixture
def openai_function() -> dict:
    return {
        "name": "dummy_function",
        "description": "Dummy function.",
        "parameters": {
            "type": "object",
            "properties": {
                "arg1": {"description": "foo", "type": "integer"},
                "arg2": {
                    "description": "one of 'bar', 'baz'",
                    "enum": ["bar", "baz"],
                    "type": "string",
                },
            },
            "required": ["arg1", "arg2"],
        },
    }


def test_convert_to_anthropic_tool(
    pydantic: type[BaseModel],
    function: Callable,
    dummy_tool: BaseTool,
    json_schema: dict,
    openai_function: dict,
) -> None:
    expected = {
        "name": "dummy_function",
        "description": "Dummy function.",
        "input_schema": {
            "type": "object",
            "properties": {
                "arg1": {"description": "foo", "type": "integer"},
                "arg2": {
                    "description": "one of 'bar', 'baz'",
                    "enum": ["bar", "baz"],
                    "type": "string",
                },
            },
            "required": ["arg1", "arg2"],
        },
    }

    for fn in (pydantic, function, dummy_tool, json_schema, expected, openai_function):
        actual = convert_to_anthropic_tool(fn)
        assert actual == expected


def test__format_messages_with_tool_calls() -> None:
    system = SystemMessage("fuzz")  # type: ignore[misc]
    human = HumanMessage("foo")  # type: ignore[misc]
    ai = AIMessage(
        "",  # with empty string
        tool_calls=[{"name": "bar", "id": "1", "args": {"baz": "buzz"}}],
    )
    ai2 = AIMessage(
        [],  # with empty list
        tool_calls=[{"name": "bar", "id": "2", "args": {"baz": "buzz"}}],
    )
    tool = ToolMessage(
        "blurb",
        tool_call_id="1",
    )
    tool_image_url = ToolMessage(
        [{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,...."}}],
        tool_call_id="2",
    )
    tool_image = ToolMessage(
        [
            {
                "type": "image",
                "source": {
                    "data": "....",
                    "type": "base64",
                    "media_type": "image/jpeg",
                },
            },
        ],
        tool_call_id="3",
    )
    messages = [system, human, ai, tool, ai2, tool_image_url, tool_image]
    expected = (
        "fuzz",
        [
            {"role": "user", "content": "foo"},
            {
                "role": "assistant",
                "content": [
                    {
                        "type": "tool_use",
                        "name": "bar",
                        "id": "1",
                        "input": {"baz": "buzz"},
                    },
                ],
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "tool_result",
                        "content": "blurb",
                        "tool_use_id": "1",
                        "is_error": False,
                    },
                ],
            },
            {
                "role": "assistant",
                "content": [
                    {
                        "type": "tool_use",
                        "name": "bar",
                        "id": "2",
                        "input": {"baz": "buzz"},
                    },
                ],
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "tool_result",
                        "content": [
                            {
                                "type": "image",
                                "source": {
                                    "data": "....",
                                    "type": "base64",
                                    "media_type": "image/jpeg",
                                },
                            },
                        ],
                        "tool_use_id": "2",
                        "is_error": False,
                    },
                    {
                        "type": "tool_result",
                        "content": [
                            {
                                "type": "image",
                                "source": {
                                    "data": "....",
                                    "type": "base64",
                                    "media_type": "image/jpeg",
                                },
                            },
                        ],
                        "tool_use_id": "3",
                        "is_error": False,
                    },
                ],
            },
        ],
    )
    actual = _format_messages(messages)
    assert expected == actual

    # Check handling of empty AIMessage
    empty_contents: list[str | list[str | dict]] = ["", []]
    for empty_content in empty_contents:
        ## Permit message in final position
        _, anthropic_messages = _format_messages([human, AIMessage(empty_content)])
        expected_messages = [
            {"role": "user", "content": "foo"},
            {"role": "assistant", "content": empty_content},
        ]
        assert expected_messages == anthropic_messages

        ## Remove message otherwise
        _, anthropic_messages = _format_messages(
            [human, AIMessage(empty_content), human]
        )
        expected_messages = [
            {"role": "user", "content": "foo"},
            {"role": "user", "content": "foo"},
        ]
        assert expected_messages == anthropic_messages

        actual = _format_messages(
            [system, human, ai, tool, AIMessage(empty_content), human]
        )
        assert actual[0] == "fuzz"
        assert [message["role"] for message in actual[1]] == [
            "user",
            "assistant",
            "user",
            "user",
        ]


def test__format_tool_use_block() -> None:
    # Test we correctly format tool_use blocks when there is no corresponding tool_call.
    message = AIMessage(
        [
            {
                "type": "tool_use",
                "name": "foo_1",
                "id": "1",
                "input": {"bar_1": "baz_1"},
            },
            {
                "type": "tool_use",
                "name": "foo_2",
                "id": "2",
                "input": {},
                "partial_json": '{"bar_2": "baz_2"}',
                "index": 1,
            },
        ]
    )
    result = _format_messages([message])
    expected = {
        "role": "assistant",
        "content": [
            {
                "type": "tool_use",
                "name": "foo_1",
                "id": "1",
                "input": {"bar_1": "baz_1"},
            },
            {
                "type": "tool_use",
                "name": "foo_2",
                "id": "2",
                "input": {"bar_2": "baz_2"},
            },
        ],
    }
    assert result == (None, [expected])


def test__format_messages_with_str_content_and_tool_calls() -> None:
    system = SystemMessage("fuzz")  # type: ignore[misc]
    human = HumanMessage("foo")  # type: ignore[misc]
    # If content and tool_calls are specified and content is a string, then both are
    # included with content first.
    ai = AIMessage(  # type: ignore[misc]
        "thought",
        tool_calls=[{"name": "bar", "id": "1", "args": {"baz": "buzz"}}],
    )
    tool = ToolMessage("blurb", tool_call_id="1")  # type: ignore[misc]
    messages = [system, human, ai, tool]
    expected = (
        "fuzz",
        [
            {"role": "user", "content": "foo"},
            {
                "role": "assistant",
                "content": [
                    {"type": "text", "text": "thought"},
                    {
                        "type": "tool_use",
                        "name": "bar",
                        "id": "1",
                        "input": {"baz": "buzz"},
                    },
                ],
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "tool_result",
                        "content": "blurb",
                        "tool_use_id": "1",
                        "is_error": False,
                    },
                ],
            },
        ],
    )
    actual = _format_messages(messages)
    assert expected == actual


def test__format_messages_with_list_content_and_tool_calls() -> None:
    system = SystemMessage("fuzz")  # type: ignore[misc]
    human = HumanMessage("foo")  # type: ignore[misc]
    ai = AIMessage(  # type: ignore[misc]
        [{"type": "text", "text": "thought"}],
        tool_calls=[{"name": "bar", "id": "1", "args": {"baz": "buzz"}}],
    )
    tool = ToolMessage(  # type: ignore[misc]
        "blurb",
        tool_call_id="1",
    )
    messages = [system, human, ai, tool]
    expected = (
        "fuzz",
        [
            {"role": "user", "content": "foo"},
            {
                "role": "assistant",
                "content": [
                    {"type": "text", "text": "thought"},
                    {
                        "type": "tool_use",
                        "name": "bar",
                        "id": "1",
                        "input": {"baz": "buzz"},
                    },
                ],
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "tool_result",
                        "content": "blurb",
                        "tool_use_id": "1",
                        "is_error": False,
                    },
                ],
            },
        ],
    )
    actual = _format_messages(messages)
    assert expected == actual


def test__format_messages_with_tool_use_blocks_and_tool_calls() -> None:
    """Show that tool_calls are preferred to tool_use blocks when both have same id."""
    system = SystemMessage("fuzz")  # type: ignore[misc]
    human = HumanMessage("foo")  # type: ignore[misc]
    # NOTE: tool_use block in contents and tool_calls have different arguments.
    ai = AIMessage(  # type: ignore[misc]
        [
            {"type": "text", "text": "thought"},
            {
                "type": "tool_use",
                "name": "bar",
                "id": "1",
                "input": {"baz": "NOT_BUZZ"},
            },
        ],
        tool_calls=[{"name": "bar", "id": "1", "args": {"baz": "BUZZ"}}],
    )
    tool = ToolMessage("blurb", tool_call_id="1")  # type: ignore[misc]
    messages = [system, human, ai, tool]
    expected = (
        "fuzz",
        [
            {"role": "user", "content": "foo"},
            {
                "role": "assistant",
                "content": [
                    {"type": "text", "text": "thought"},
                    {
                        "type": "tool_use",
                        "name": "bar",
                        "id": "1",
                        "input": {"baz": "BUZZ"},  # tool_calls value preferred.
                    },
                ],
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "tool_result",
                        "content": "blurb",
                        "tool_use_id": "1",
                        "is_error": False,
                    },
                ],
            },
        ],
    )
    actual = _format_messages(messages)
    assert expected == actual


def test__format_messages_with_cache_control() -> None:
    messages = [
        SystemMessage(
            [
                {"type": "text", "text": "foo", "cache_control": {"type": "ephemeral"}},
            ],
        ),
        HumanMessage(
            [
                {"type": "text", "text": "foo", "cache_control": {"type": "ephemeral"}},
                {
                    "type": "text",
                    "text": "foo",
                },
            ],
        ),
    ]
    expected_system = [
        {"type": "text", "text": "foo", "cache_control": {"type": "ephemeral"}},
    ]
    expected_messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "foo", "cache_control": {"type": "ephemeral"}},
                {"type": "text", "text": "foo"},
            ],
        },
    ]
    actual_system, actual_messages = _format_messages(messages)
    assert expected_system == actual_system
    assert expected_messages == actual_messages

    # Test standard multi-modal format (v0)
    messages = [
        HumanMessage(
            [
                {
                    "type": "text",
                    "text": "Summarize this document:",
                },
                {
                    "type": "file",
                    "source_type": "base64",
                    "mime_type": "application/pdf",
                    "data": "<base64 data>",
                    "cache_control": {"type": "ephemeral"},
                },
            ],
        ),
    ]
    actual_system, actual_messages = _format_messages(messages)
    assert actual_system is None
    expected_messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Summarize this document:",
                },
                {
                    "type": "document",
                    "source": {
                        "type": "base64",
                        "media_type": "application/pdf",
                        "data": "<base64 data>",
                    },
                    "cache_control": {"type": "ephemeral"},
                },
            ],
        },
    ]
    assert actual_messages == expected_messages

    # Test standard multi-modal format (v1)
    messages = [
        HumanMessage(
            [
                {
                    "type": "text",
                    "text": "Summarize this document:",
                },
                {
                    "type": "file",
                    "mime_type": "application/pdf",
                    "base64": "<base64 data>",
                    "extras": {"cache_control": {"type": "ephemeral"}},
                },
            ],
        ),
    ]
    actual_system, actual_messages = _format_messages(messages)
    assert actual_system is None
    expected_messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Summarize this document:",
                },
                {
                    "type": "document",
                    "source": {
                        "type": "base64",
                        "media_type": "application/pdf",
                        "data": "<base64 data>",
                    },
                    "cache_control": {"type": "ephemeral"},
                },
            ],
        },
    ]
    assert actual_messages == expected_messages

    # Test standard multi-modal format (v1, unpacked extras)
    messages = [
        HumanMessage(
            [
                {
                    "type": "text",
                    "text": "Summarize this document:",
                },
                {
                    "type": "file",
                    "mime_type": "application/pdf",
                    "base64": "<base64 data>",
                    "cache_control": {"type": "ephemeral"},
                },
            ],
        ),
    ]
    actual_system, actual_messages = _format_messages(messages)
    assert actual_system is None
    expected_messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "Summarize this document:",
                },
                {
                    "type": "document",
                    "source": {
                        "type": "base64",
                        "media_type": "application/pdf",
                        "data": "<base64 data>",
                    },
                    "cache_control": {"type": "ephemeral"},
                },
            ],
        },
    ]
    assert actual_messages == expected_messages

    # Also test file inputs
    ## Images
    for block in [
        # v1
        {
            "type": "image",
            "file_id": "abc123",
        },
        # v0
        {
            "type": "image",
            "source_type": "id",
            "id": "abc123",
        },
    ]:
        messages = [
            HumanMessage(
                [
                    {
                        "type": "text",
                        "text": "Summarize this image:",
                    },
                    block,
                ],
            ),
        ]
        actual_system, actual_messages = _format_messages(messages)
        assert actual_system is None
        expected_messages = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Summarize this image:",
                    },
                    {
                        "type": "image",
                        "source": {
                            "type": "file",
                            "file_id": "abc123",
                        },
                    },
                ],
            },
        ]
        assert actual_messages == expected_messages

    ## Documents
    for block in [
        # v1
        {
            "type": "file",
            "file_id": "abc123",
        },
        # v0
        {
            "type": "file",
            "source_type": "id",
            "id": "abc123",
        },
    ]:
        messages = [
            HumanMessage(
                [
                    {
                        "type": "text",
                        "text": "Summarize this document:",
                    },
                    block,
                ],
            ),
        ]
        actual_system, actual_messages = _format_messages(messages)
        assert actual_system is None
        expected_messages = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Summarize this document:",
                    },
                    {
                        "type": "document",
                        "source": {
                            "type": "file",
                            "file_id": "abc123",
                        },
                    },
                ],
            },
        ]
        assert actual_messages == expected_messages


def test__format_messages_with_citations() -> None:
    input_messages = [
        HumanMessage(
            content=[
                {
                    "type": "file",
                    "source_type": "text",
                    "text": "The grass is green. The sky is blue.",
                    "mime_type": "text/plain",
                    "citations": {"enabled": True},
                },
                {"type": "text", "text": "What color is the grass and sky?"},
            ],
        ),
    ]
    expected_messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "document",
                    "source": {
                        "type": "text",
                        "media_type": "text/plain",
                        "data": "The grass is green. The sky is blue.",
                    },
                    "citations": {"enabled": True},
                },
                {"type": "text", "text": "What color is the grass and sky?"},
            ],
        },
    ]
    actual_system, actual_messages = _format_messages(input_messages)
    assert actual_system is None
    assert actual_messages == expected_messages


def test__format_messages_openai_image_format() -> None:
    message = HumanMessage(
        content=[
            {
                "type": "text",
                "text": "Can you highlight the differences between these two images?",
            },
            {
                "type": "image_url",
                "image_url": {"url": "data:image/jpeg;base64,<base64 data>"},
            },
            {
                "type": "image_url",
                "image_url": {"url": "https://<image url>"},
            },
        ],
    )
    actual_system, actual_messages = _format_messages([message])
    assert actual_system is None
    expected_messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": (
                        "Can you highlight the differences between these two images?"
                    ),
                },
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/jpeg",
                        "data": "<base64 data>",
                    },
                },
                {
                    "type": "image",
                    "source": {
                        "type": "url",
                        "url": "https://<image url>",
                    },
                },
            ],
        },
    ]
    assert actual_messages == expected_messages


def test__format_messages_with_multiple_system() -> None:
    messages = [
        HumanMessage("baz"),
        SystemMessage("bar"),
        SystemMessage("baz"),
        SystemMessage(
            [
                {"type": "text", "text": "foo", "cache_control": {"type": "ephemeral"}},
            ],
        ),
    ]
    expected_system = [
        {"type": "text", "text": "bar"},
        {"type": "text", "text": "baz"},
        {"type": "text", "text": "foo", "cache_control": {"type": "ephemeral"}},
    ]
    expected_messages = [{"role": "user", "content": "baz"}]
    actual_system, actual_messages = _format_messages(messages)
    assert expected_system == actual_system
    assert expected_messages == actual_messages


def test_anthropic_api_key_is_secret_string() -> None:
    """Test that the API key is stored as a SecretStr."""
    chat_model = ChatAnthropic(  # type: ignore[call-arg, call-arg]
        model=MODEL_NAME,
        anthropic_api_key="secret-api-key",
    )
    assert isinstance(chat_model.anthropic_api_key, SecretStr)


def test_anthropic_api_key_masked_when_passed_from_env(
    monkeypatch: MonkeyPatch,
    capsys: CaptureFixture,
) -> None:
    """Test that the API key is masked when passed from an environment variable."""
    monkeypatch.setenv("ANTHROPIC_API_KEY ", "secret-api-key")
    chat_model = ChatAnthropic(  # type: ignore[call-arg]
        model=MODEL_NAME,
    )
    print(chat_model.anthropic_api_key, end="")  # noqa: T201
    captured = capsys.readouterr()

    assert captured.out == "**********"


def test_anthropic_api_key_masked_when_passed_via_constructor(
    capsys: CaptureFixture,
) -> None:
    """Test that the API key is masked when passed via the constructor."""
    chat_model = ChatAnthropic(  # type: ignore[call-arg, call-arg]
        model=MODEL_NAME,
        anthropic_api_key="secret-api-key",
    )
    print(chat_model.anthropic_api_key, end="")  # noqa: T201
    captured = capsys.readouterr()

    assert captured.out == "**********"


def test_anthropic_uses_actual_secret_value_from_secretstr() -> None:
    """Test that the actual secret value is correctly retrieved."""
    chat_model = ChatAnthropic(  # type: ignore[call-arg, call-arg]
        model=MODEL_NAME,
        anthropic_api_key="secret-api-key",
    )
    assert (
        cast("SecretStr", chat_model.anthropic_api_key).get_secret_value()
        == "secret-api-key"
    )


class GetWeather(BaseModel):
    """Get the current weather in a given location."""

    location: str = Field(..., description="The city and state, e.g. San Francisco, CA")


def test_anthropic_bind_tools_tool_choice() -> None:
    chat_model = ChatAnthropic(  # type: ignore[call-arg, call-arg]
        model=MODEL_NAME,
        anthropic_api_key="secret-api-key",
    )
    chat_model_with_tools = chat_model.bind_tools(
        [GetWeather],
        tool_choice={"type": "tool", "name": "GetWeather"},
    )
    assert cast("RunnableBinding", chat_model_with_tools).kwargs["tool_choice"] == {
        "type": "tool",
        "name": "GetWeather",
    }
    chat_model_with_tools = chat_model.bind_tools(
        [GetWeather],
        tool_choice="GetWeather",
    )
    assert cast("RunnableBinding", chat_model_with_tools).kwargs["tool_choice"] == {
        "type": "tool",
        "name": "GetWeather",
    }
    chat_model_with_tools = chat_model.bind_tools([GetWeather], tool_choice="auto")
    assert cast("RunnableBinding", chat_model_with_tools).kwargs["tool_choice"] == {
        "type": "auto",
    }
    chat_model_with_tools = chat_model.bind_tools([GetWeather], tool_choice="any")
    assert cast("RunnableBinding", chat_model_with_tools).kwargs["tool_choice"] == {
        "type": "any",
    }


def test_fine_grained_tool_streaming_beta() -> None:
    """Test that fine-grained tool streaming beta can be enabled."""
    # Test with betas parameter at initialization
    model = ChatAnthropic(
        model=MODEL_NAME, betas=["fine-grained-tool-streaming-2025-05-14"]
    )

    # Create a simple tool
    def get_weather(city: str) -> str:
        """Get the weather for a city."""
        return f"Weather in {city}"

    model_with_tools = model.bind_tools([get_weather])
    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "What's the weather in SF?",
        stream=True,
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )

    # Verify beta header is in payload
    assert "fine-grained-tool-streaming-2025-05-14" in payload["betas"]
    assert payload["stream"] is True

    # Test combining with other betas
    model = ChatAnthropic(
        model=MODEL_NAME,
        betas=["context-1m-2025-08-07", "fine-grained-tool-streaming-2025-05-14"],
    )
    model_with_tools = model.bind_tools([get_weather])
    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "What's the weather?",
        stream=True,
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )
    assert set(payload["betas"]) == {
        "context-1m-2025-08-07",
        "fine-grained-tool-streaming-2025-05-14",
    }

    # Test that _create routes to beta client when betas are present
    model = ChatAnthropic(
        model=MODEL_NAME, betas=["fine-grained-tool-streaming-2025-05-14"]
    )
    payload = {"betas": ["fine-grained-tool-streaming-2025-05-14"], "stream": True}

    with patch.object(model._client.beta.messages, "create") as mock_beta_create:
        model._create(payload)
        mock_beta_create.assert_called_once_with(**payload)


def test_optional_description() -> None:
    llm = ChatAnthropic(model=MODEL_NAME)

    class SampleModel(BaseModel):
        sample_field: str

    _ = llm.with_structured_output(SampleModel.model_json_schema())


def test_get_num_tokens_from_messages_passes_kwargs() -> None:
    """Test that get_num_tokens_from_messages passes kwargs to the model."""
    llm = ChatAnthropic(model=MODEL_NAME)

    with patch.object(anthropic, "Client") as _client:
        llm.get_num_tokens_from_messages([HumanMessage("foo")], foo="bar")

    assert _client.return_value.messages.count_tokens.call_args.kwargs["foo"] == "bar"

    llm = ChatAnthropic(
        model=MODEL_NAME,
        betas=["context-management-2025-06-27"],
        context_management={"edits": [{"type": "clear_tool_uses_20250919"}]},
    )
    with patch.object(anthropic, "Client") as _client:
        llm.get_num_tokens_from_messages([HumanMessage("foo")])

    call_args = _client.return_value.beta.messages.count_tokens.call_args.kwargs
    assert call_args["betas"] == ["context-management-2025-06-27"]
    assert call_args["context_management"] == {
        "edits": [{"type": "clear_tool_uses_20250919"}]
    }


def test_usage_metadata_standardization() -> None:
    class UsageModel(BaseModel):
        input_tokens: int = 10
        output_tokens: int = 5
        cache_read_input_tokens: int = 3
        cache_creation_input_tokens: int = 2

    # Happy path
    usage = UsageModel()
    result = _create_usage_metadata(usage)
    assert result["input_tokens"] == 15  # 10 + 3 + 2
    assert result["output_tokens"] == 5
    assert result["total_tokens"] == 20
    assert result.get("input_token_details") == {"cache_read": 3, "cache_creation": 2}

    # Null input and output tokens
    class UsageModelNulls(BaseModel):
        input_tokens: int | None = None
        output_tokens: int | None = None
        cache_read_input_tokens: int | None = None
        cache_creation_input_tokens: int | None = None

    usage_nulls = UsageModelNulls()
    result = _create_usage_metadata(usage_nulls)
    assert result["input_tokens"] == 0
    assert result["output_tokens"] == 0
    assert result["total_tokens"] == 0

    # Test missing fields
    class UsageModelMissing(BaseModel):
        pass

    usage_missing = UsageModelMissing()
    result = _create_usage_metadata(usage_missing)
    assert result["input_tokens"] == 0
    assert result["output_tokens"] == 0
    assert result["total_tokens"] == 0


def test_usage_metadata_cache_creation_ttl() -> None:
    """Test _create_usage_metadata with granular cache_creation TTL fields."""

    # Case 1: cache_creation with specific ephemeral TTL tokens (BaseModel)
    class CacheCreation(BaseModel):
        ephemeral_5m_input_tokens: int = 100
        ephemeral_1h_input_tokens: int = 50

    class UsageWithCacheCreation(BaseModel):
        input_tokens: int = 200
        output_tokens: int = 30
        cache_read_input_tokens: int = 10
        cache_creation_input_tokens: int = 150
        cache_creation: CacheCreation = CacheCreation()

    result = _create_usage_metadata(UsageWithCacheCreation())
    # input_tokens = 200 (base) + 10 (cache_read) + 150 (specific: 100+50)
    assert result["input_tokens"] == 360
    assert result["output_tokens"] == 30
    assert result["total_tokens"] == 390
    details = dict(result.get("input_token_details") or {})
    assert details["cache_read"] == 10
    # cache_creation should be suppressed to avoid double counting
    assert details["cache_creation"] == 0
    assert details["ephemeral_5m_input_tokens"] == 100
    assert details["ephemeral_1h_input_tokens"] == 50

    # Case 2: cache_creation as a dict
    class UsageWithCacheCreationDict(BaseModel):
        input_tokens: int = 200
        output_tokens: int = 30
        cache_read_input_tokens: int = 10
        cache_creation_input_tokens: int = 150
        cache_creation: dict = {
            "ephemeral_5m_input_tokens": 80,
            "ephemeral_1h_input_tokens": 70,
        }

    result = _create_usage_metadata(UsageWithCacheCreationDict())
    assert result["input_tokens"] == 200 + 10 + 80 + 70
    details = dict(result.get("input_token_details") or {})
    assert details["cache_creation"] == 0
    assert details["ephemeral_5m_input_tokens"] == 80
    assert details["ephemeral_1h_input_tokens"] == 70

    # Case 3: cache_creation exists but specific keys are zero — falls back to
    # generic cache_creation_input_tokens
    class CacheCreationZero(BaseModel):
        ephemeral_5m_input_tokens: int = 0
        ephemeral_1h_input_tokens: int = 0

    class UsageWithCacheCreationZero(BaseModel):
        input_tokens: int = 200
        output_tokens: int = 30
        cache_read_input_tokens: int = 10
        cache_creation_input_tokens: int = 50
        cache_creation: CacheCreationZero = CacheCreationZero()

    result = _create_usage_metadata(UsageWithCacheCreationZero())
    # specific_cache_creation_tokens = 0, so falls back to cache_creation_input_tokens
    # input_tokens = 200 + 10 + 50 = 260
    assert result["input_tokens"] == 260
    assert result["output_tokens"] == 30
    assert result["total_tokens"] == 290
    details = dict(result.get("input_token_details") or {})
    assert details["cache_read"] == 10
    assert details["cache_creation"] == 50

    # Case 4: cache_creation exists but specific keys are missing from the dict
    class CacheCreationEmpty(BaseModel):
        pass

    class UsageWithCacheCreationEmpty(BaseModel):
        input_tokens: int = 100
        output_tokens: int = 20
        cache_read_input_tokens: int = 5
        cache_creation_input_tokens: int = 15
        cache_creation: CacheCreationEmpty = CacheCreationEmpty()

    result = _create_usage_metadata(UsageWithCacheCreationEmpty())
    # specific_cache_creation_tokens = 0, falls back to cache_creation_input_tokens
    assert result["input_tokens"] == 100 + 5 + 15
    assert result["output_tokens"] == 20
    assert result["total_tokens"] == 140
    details = dict(result.get("input_token_details") or {})
    assert details["cache_creation"] == 15

    # Case 5: only one ephemeral key is non-zero
    class CacheCreationPartial(BaseModel):
        ephemeral_5m_input_tokens: int = 0
        ephemeral_1h_input_tokens: int = 75

    class UsageWithPartialCache(BaseModel):
        input_tokens: int = 100
        output_tokens: int = 10
        cache_read_input_tokens: int = 0
        cache_creation_input_tokens: int = 75
        cache_creation: CacheCreationPartial = CacheCreationPartial()

    result = _create_usage_metadata(UsageWithPartialCache())
    # specific_cache_creation_tokens = 75 > 0, so generic cache_creation is suppressed
    assert result["input_tokens"] == 100 + 0 + 75
    assert result["output_tokens"] == 10
    assert result["total_tokens"] == 185
    details = dict(result.get("input_token_details") or {})
    assert details["cache_creation"] == 0
    assert details["ephemeral_1h_input_tokens"] == 75
    # ephemeral_5m_input_tokens is 0 — still included since 0 is not None
    assert details["ephemeral_5m_input_tokens"] == 0

    # Case 6: no cache_creation field at all (the pre-existing path)
    class UsageNoCacheCreation(BaseModel):
        input_tokens: int = 50
        output_tokens: int = 25
        cache_read_input_tokens: int = 5
        cache_creation_input_tokens: int = 10

    result = _create_usage_metadata(UsageNoCacheCreation())
    assert result["input_tokens"] == 50 + 5 + 10
    assert result["output_tokens"] == 25
    assert result["total_tokens"] == 90
    details = dict(result.get("input_token_details") or {})
    assert details["cache_read"] == 5
    assert details["cache_creation"] == 10


class FakeTracer(BaseTracer):
    """Fake tracer to capture inputs to `chat_model_start`."""

    def __init__(self) -> None:
        super().__init__()
        self.chat_model_start_inputs: list = []

    def _persist_run(self, run: Run) -> None:
        """Persist a run."""

    def on_chat_model_start(self, *args: Any, **kwargs: Any) -> Run:
        self.chat_model_start_inputs.append({"args": args, "kwargs": kwargs})
        return super().on_chat_model_start(*args, **kwargs)


def test_mcp_tracing() -> None:
    # Test we exclude sensitive information from traces
    mcp_servers = [
        {
            "type": "url",
            "url": "https://mcp.deepwiki.com/mcp",
            "name": "deepwiki",
            "authorization_token": "PLACEHOLDER",
        },
    ]

    llm = ChatAnthropic(
        model=MODEL_NAME,
        betas=["mcp-client-2025-04-04"],
        mcp_servers=mcp_servers,
    )

    tracer = FakeTracer()
    mock_client = MagicMock()

    def mock_create(*args: Any, **kwargs: Any) -> Message:
        return Message(
            id="foo",
            content=[TextBlock(type="text", text="bar")],
            model="baz",
            role="assistant",
            stop_reason=None,
            stop_sequence=None,
            usage=Usage(input_tokens=2, output_tokens=1),
            type="message",
        )

    mock_client.messages.create = mock_create
    input_message = HumanMessage("Test query")
    with patch.object(llm, "_client", mock_client):
        _ = llm.invoke([input_message], config={"callbacks": [tracer]})

    # Test headers are not traced
    assert len(tracer.chat_model_start_inputs) == 1
    assert "PLACEHOLDER" not in str(tracer.chat_model_start_inputs)

    # Test headers are correctly propagated to request
    payload = llm._get_request_payload([input_message])
    assert payload["mcp_servers"][0]["authorization_token"] == "PLACEHOLDER"  # noqa: S105


def test_cache_control_kwarg() -> None:
    llm = ChatAnthropic(model=MODEL_NAME)

    messages = [HumanMessage("foo"), AIMessage("bar"), HumanMessage("baz")]
    payload = llm._get_request_payload(messages)
    assert "cache_control" not in payload

    payload = llm._get_request_payload(messages, cache_control={"type": "ephemeral"})
    assert payload["cache_control"] == {"type": "ephemeral"}
    assert payload["messages"] == [
        {"role": "user", "content": "foo"},
        {"role": "assistant", "content": "bar"},
        {"role": "user", "content": "baz"},
    ]


def test_context_management_in_payload() -> None:
    llm = ChatAnthropic(
        model=MODEL_NAME,  # type: ignore[call-arg]
        betas=["context-management-2025-06-27"],
        context_management={"edits": [{"type": "clear_tool_uses_20250919"}]},
    )
    llm_with_tools = llm.bind_tools(
        [{"type": "web_search_20250305", "name": "web_search"}]
    )
    input_message = HumanMessage("Search for recent developments in AI")
    payload = llm_with_tools._get_request_payload([input_message])  # type: ignore[attr-defined]
    assert payload["context_management"] == {
        "edits": [{"type": "clear_tool_uses_20250919"}]
    }


def test_inference_geo_in_payload() -> None:
    llm = ChatAnthropic(model=MODEL_NAME, inference_geo="us")
    input_message = HumanMessage("Hello, world!")
    payload = llm._get_request_payload([input_message])
    assert payload["inference_geo"] == "us"


def test_anthropic_model_params() -> None:
    llm = ChatAnthropic(model=MODEL_NAME)

    ls_params = llm._get_ls_params()
    assert ls_params == {
        "ls_provider": "anthropic",
        "ls_model_type": "chat",
        "ls_model_name": MODEL_NAME,
        "ls_max_tokens": 64000,
        "ls_temperature": None,
    }

    ls_params = llm._get_ls_params(model=MODEL_NAME)
    assert ls_params.get("ls_model_name") == MODEL_NAME


def test_streaming_cache_token_reporting() -> None:
    """Test that cache tokens are properly reported in streaming events."""
    from unittest.mock import MagicMock

    from anthropic.types import MessageDeltaUsage

    # Create a mock message_start event
    mock_message = MagicMock()
    mock_message.model = MODEL_NAME
    mock_message.usage.input_tokens = 100
    mock_message.usage.output_tokens = 0
    mock_message.usage.cache_read_input_tokens = 25
    mock_message.usage.cache_creation_input_tokens = 10

    message_start_event = MagicMock()
    message_start_event.type = "message_start"
    message_start_event.message = mock_message

    # Create a mock message_delta event with complete usage info
    mock_delta_usage = MessageDeltaUsage(
        output_tokens=50,
        input_tokens=100,
        cache_read_input_tokens=25,
        cache_creation_input_tokens=10,
    )

    mock_delta = MagicMock()
    mock_delta.stop_reason = "end_turn"
    mock_delta.stop_sequence = None

    message_delta_event = MagicMock()
    message_delta_event.type = "message_delta"
    message_delta_event.usage = mock_delta_usage
    message_delta_event.delta = mock_delta

    llm = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]

    # Test message_start event
    start_chunk, _ = llm._make_message_chunk_from_anthropic_event(
        message_start_event,
        stream_usage=True,
        coerce_content_to_string=True,
        block_start_event=None,
    )

    # Test message_delta event - should contain complete usage metadata (w/ cache)
    delta_chunk, _ = llm._make_message_chunk_from_anthropic_event(
        message_delta_event,
        stream_usage=True,
        coerce_content_to_string=True,
        block_start_event=None,
    )

    # Verify message_delta has complete usage_metadata including cache tokens
    assert start_chunk is not None, "message_start should produce a chunk"
    assert getattr(start_chunk, "usage_metadata", None) is None, (
        "message_start should not have usage_metadata"
    )
    assert delta_chunk is not None, "message_delta should produce a chunk"
    assert delta_chunk.usage_metadata is not None, (
        "message_delta should have usage_metadata"
    )
    assert "input_token_details" in delta_chunk.usage_metadata
    input_details = delta_chunk.usage_metadata["input_token_details"]
    assert input_details.get("cache_read") == 25
    assert input_details.get("cache_creation") == 10

    # Verify totals are correct: 100 base + 25 cache_read + 10 cache_creation = 135
    assert delta_chunk.usage_metadata["input_tokens"] == 135
    assert delta_chunk.usage_metadata["output_tokens"] == 50
    assert delta_chunk.usage_metadata["total_tokens"] == 185


def test_strict_tool_use() -> None:
    model = ChatAnthropic(
        model=MODEL_NAME,  # type: ignore[call-arg]
    )

    def get_weather(location: str, unit: Literal["C", "F"]) -> str:
        """Get the weather at a location."""
        return "75 degrees Fahrenheit."

    model_with_tools = model.bind_tools([get_weather], strict=True)

    tool_definition = model_with_tools.kwargs["tools"][0]  # type: ignore[attr-defined]
    assert tool_definition["strict"] is True


def test_response_format_with_output_config() -> None:
    """Test that response_format is converted to output_config.format."""

    class Person(BaseModel):
        """Person data."""

        name: str
        age: int

    # Test that response_format converts to output_config.format
    model = ChatAnthropic(model=MODEL_NAME)
    payload = model._get_request_payload(
        "Test query",
        response_format=Person.model_json_schema(),
    )
    assert "output_config" in payload
    assert "format" in payload["output_config"]
    assert payload["output_config"]["format"]["type"] == "json_schema"
    assert "schema" in payload["output_config"]["format"]

    # No response_format - output_config should not have format
    model = ChatAnthropic(model=MODEL_NAME)
    payload = model._get_request_payload("Test query")
    if "output_config" in payload:
        assert "format" not in payload["output_config"]


def test_strict_tool_use_payload() -> None:
    """Test that strict tool use property is correctly passed through to payload."""

    def get_weather(location: str) -> str:
        """Get the weather at a location."""
        return "Sunny"

    # Test that strict=True is correctly passed to payload
    model = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]
    model_with_tools = model.bind_tools([get_weather], strict=True)
    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "What's the weather?",
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )
    assert payload["tools"][0]["strict"] is True

    # Test that strict=False is correctly passed to payload
    model_without_strict = model.bind_tools([get_weather], strict=False)
    payload = model_without_strict._get_request_payload(  # type: ignore[attr-defined]
        "What's the weather?",
        **model_without_strict.kwargs,  # type: ignore[attr-defined]
    )
    assert payload["tools"][0].get("strict") is False


def test_auto_append_betas_for_tool_types() -> None:
    """Test that betas are automatically appended based on tool types."""
    # Test web_fetch_20250910 auto-appends web-fetch-2025-09-10
    model = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]
    tool = {"type": "web_fetch_20250910", "name": "web_fetch", "max_uses": 3}
    model_with_tools = model.bind_tools([tool])
    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "test",
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )
    assert payload["betas"] == ["web-fetch-2025-09-10"]

    # Test code_execution_20250522 auto-appends code-execution-2025-05-22
    model = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]
    tool = {"type": "code_execution_20250522", "name": "code_execution"}
    model_with_tools = model.bind_tools([tool])
    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "test",
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )
    assert payload["betas"] == ["code-execution-2025-05-22"]

    # Test memory_20250818 auto-appends context-management-2025-06-27
    model = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]
    tool = {"type": "memory_20250818", "name": "memory"}
    model_with_tools = model.bind_tools([tool])
    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "test",
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )
    assert payload["betas"] == ["context-management-2025-06-27"]

    # Test merging with existing betas
    model = ChatAnthropic(
        model=MODEL_NAME,
        betas=["mcp-client-2025-04-04"],  # type: ignore[call-arg]
    )
    tool = {"type": "web_fetch_20250910", "name": "web_fetch"}
    model_with_tools = model.bind_tools([tool])
    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "test",
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )
    assert payload["betas"] == ["mcp-client-2025-04-04", "web-fetch-2025-09-10"]

    # Test that it doesn't duplicate existing betas
    model = ChatAnthropic(
        model=MODEL_NAME,
        betas=["web-fetch-2025-09-10"],  # type: ignore[call-arg]
    )
    tool = {"type": "web_fetch_20250910", "name": "web_fetch"}
    model_with_tools = model.bind_tools([tool])
    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "test",
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )
    assert payload["betas"] == ["web-fetch-2025-09-10"]

    # Test multiple tools with different beta requirements
    model = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]
    tools = [
        {"type": "web_fetch_20250910", "name": "web_fetch"},
        {"type": "code_execution_20250522", "name": "code_execution"},
    ]
    model_with_tools = model.bind_tools(tools)
    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "test",
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )
    assert set(payload["betas"]) == {
        "web-fetch-2025-09-10",
        "code-execution-2025-05-22",
    }


def test_tool_search_is_builtin_tool() -> None:
    """Test that tool search tools are recognized as built-in tools."""
    # Test regex variant
    regex_tool = {
        "type": "tool_search_tool_regex_20251119",
        "name": "tool_search_tool_regex",
    }
    assert _is_builtin_tool(regex_tool)

    # Test BM25 variant
    bm25_tool = {
        "type": "tool_search_tool_bm25_20251119",
        "name": "tool_search_tool_bm25",
    }
    assert _is_builtin_tool(bm25_tool)

    # Test non-builtin tool
    regular_tool = {
        "name": "get_weather",
        "description": "Get weather",
        "input_schema": {"type": "object", "properties": {}},
    }
    assert not _is_builtin_tool(regular_tool)


def test_tool_search_beta_headers() -> None:
    """Test that tool search tools auto-append the correct beta headers."""
    # Test regex variant
    model = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]
    regex_tool = {
        "type": "tool_search_tool_regex_20251119",
        "name": "tool_search_tool_regex",
    }
    model_with_tools = model.bind_tools([regex_tool])
    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "test",
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )
    assert payload["betas"] == ["advanced-tool-use-2025-11-20"]

    # Test BM25 variant
    model = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]
    bm25_tool = {
        "type": "tool_search_tool_bm25_20251119",
        "name": "tool_search_tool_bm25",
    }
    model_with_tools = model.bind_tools([bm25_tool])
    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "test",
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )
    assert payload["betas"] == ["advanced-tool-use-2025-11-20"]

    # Test merging with existing betas
    model = ChatAnthropic(
        model=MODEL_NAME,
        betas=["mcp-client-2025-04-04"],  # type: ignore[call-arg]
    )
    model_with_tools = model.bind_tools([regex_tool])
    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "test",
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )
    assert payload["betas"] == [
        "mcp-client-2025-04-04",
        "advanced-tool-use-2025-11-20",
    ]


def test_tool_search_with_deferred_tools() -> None:
    """Test that `defer_loading` works correctly with tool search."""
    llm = ChatAnthropic(
        model="claude-opus-4-5-20251101",  # type: ignore[call-arg]
    )

    # Create tools with defer_loading
    tools = [
        {
            "type": "tool_search_tool_bm25_20251119",
            "name": "tool_search_tool_bm25",
        },
        {
            "name": "calculator",
            "description": "Perform mathematical calculations",
            "input_schema": {
                "type": "object",
                "properties": {
                    "expression": {
                        "type": "string",
                        "description": "Mathematical expression",
                    },
                },
                "required": ["expression"],
            },
            "defer_loading": True,
        },
    ]

    llm_with_tools = llm.bind_tools(tools)  # type: ignore[arg-type]

    # Verify the payload includes tools with defer_loading
    payload = llm_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "test",
        **llm_with_tools.kwargs,  # type: ignore[attr-defined]
    )

    # Find the calculator tool in the payload
    calculator_tool = None
    for tool_ in payload["tools"]:
        if isinstance(tool_, dict) and tool_.get("name") == "calculator":
            calculator_tool = tool_
            break

    assert calculator_tool is not None
    assert calculator_tool.get("defer_loading") is True


def test_tool_search_result_formatting() -> None:
    """Test that `tool_result` blocks with `tool_reference` are handled correctly."""
    # Tool search result with tool_reference blocks
    messages = [
        HumanMessage("What tools can help with weather?"),  # type: ignore[misc]
        AIMessage(  # type: ignore[misc]
            [
                {
                    "type": "server_tool_use",
                    "id": "srvtoolu_123",
                    "name": "tool_search_tool_regex",
                    "input": {"query": "weather"},
                },
                {
                    "type": "tool_result",
                    "tool_use_id": "srvtoolu_123",
                    "content": [
                        {"type": "tool_reference", "tool_name": "get_weather"},
                        {"type": "tool_reference", "tool_name": "weather_forecast"},
                    ],
                },
            ],
        ),
    ]

    _, formatted = _format_messages(messages)

    # Verify the tool_result block is preserved correctly
    assistant_msg = formatted[1]
    assert assistant_msg["role"] == "assistant"

    # Find the tool_result block
    tool_result_block = None
    for block in assistant_msg["content"]:
        if isinstance(block, dict) and block.get("type") == "tool_result":
            tool_result_block = block
            break

    assert tool_result_block is not None
    assert tool_result_block["tool_use_id"] == "srvtoolu_123"
    assert isinstance(tool_result_block["content"], list)
    assert len(tool_result_block["content"]) == 2
    assert tool_result_block["content"][0]["type"] == "tool_reference"
    assert tool_result_block["content"][0]["tool_name"] == "get_weather"
    assert tool_result_block["content"][1]["type"] == "tool_reference"
    assert tool_result_block["content"][1]["tool_name"] == "weather_forecast"


def test_auto_append_betas_for_mcp_servers() -> None:
    """Test that `mcp-client-2025-11-20` beta is automatically appended
    for `mcp_servers`."""
    model = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]
    mcp_servers = [
        {
            "type": "url",
            "url": "https://mcp.example.com/mcp",
            "name": "example",
        }
    ]
    payload = model._get_request_payload(
        "Test query",
        mcp_servers=mcp_servers,  # type: ignore[arg-type]
    )
    assert payload["betas"] == ["mcp-client-2025-11-20"]
    assert payload["mcp_servers"] == mcp_servers

    # Test merging with existing betas
    model = ChatAnthropic(
        model=MODEL_NAME,
        betas=["context-management-2025-06-27"],
    )
    payload = model._get_request_payload(
        "Test query",
        mcp_servers=mcp_servers,  # type: ignore[arg-type]
    )
    assert payload["betas"] == [
        "context-management-2025-06-27",
        "mcp-client-2025-11-20",
    ]

    # Test that it doesn't duplicate if beta already present
    model = ChatAnthropic(
        model=MODEL_NAME,
        betas=["mcp-client-2025-11-20"],
    )
    payload = model._get_request_payload(
        "Test query",
        mcp_servers=mcp_servers,  # type: ignore[arg-type]
    )
    assert payload["betas"] == ["mcp-client-2025-11-20"]

    # Test with mcp_servers set on model initialization
    model = ChatAnthropic(
        model=MODEL_NAME,
        mcp_servers=mcp_servers,  # type: ignore[arg-type]
    )
    payload = model._get_request_payload("Test query")
    assert payload["betas"] == ["mcp-client-2025-11-20"]
    assert payload["mcp_servers"] == mcp_servers

    # Test with existing betas and mcp_servers on model initialization
    model = ChatAnthropic(
        model=MODEL_NAME,
        betas=["context-management-2025-06-27"],
        mcp_servers=mcp_servers,  # type: ignore[arg-type]
    )
    payload = model._get_request_payload("Test query")
    assert payload["betas"] == [
        "context-management-2025-06-27",
        "mcp-client-2025-11-20",
    ]

    # Test that beta is not appended when mcp_servers is None
    model = ChatAnthropic(model=MODEL_NAME)
    payload = model._get_request_payload("Test query")
    assert "betas" not in payload or payload["betas"] is None

    # Test combining mcp_servers with tool types that require betas
    model = ChatAnthropic(model=MODEL_NAME)
    tool = {"type": "web_fetch_20250910", "name": "web_fetch"}
    model_with_tools = model.bind_tools([tool])
    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "Test query",
        mcp_servers=mcp_servers,
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )
    assert set(payload["betas"]) == {
        "web-fetch-2025-09-10",
        "mcp-client-2025-11-20",
    }


def test_profile() -> None:
    model = ChatAnthropic(model="claude-sonnet-4-20250514")
    assert model.profile
    assert not model.profile["structured_output"]

    model = ChatAnthropic(model="claude-sonnet-4-5")
    assert model.profile
    assert model.profile["structured_output"]
    assert model.profile["tool_calling"]

    # Test overwriting a field
    model.profile["tool_calling"] = False
    assert not model.profile["tool_calling"]

    # Test we didn't mutate
    model = ChatAnthropic(model="claude-sonnet-4-5")
    assert model.profile
    assert model.profile["tool_calling"]

    # Test passing in profile
    model = ChatAnthropic(model="claude-sonnet-4-5", profile={"tool_calling": False})
    assert model.profile == {"tool_calling": False}


def test_profile_1m_context_beta() -> None:
    model = ChatAnthropic(model="claude-sonnet-4-5")
    assert model.profile
    assert model.profile["max_input_tokens"] == 200000

    model = ChatAnthropic(model="claude-sonnet-4-5", betas=["context-1m-2025-08-07"])
    assert model.profile
    assert model.profile["max_input_tokens"] == 1000000

    model = ChatAnthropic(
        model="claude-sonnet-4-5",
        betas=["token-efficient-tools-2025-02-19"],
    )
    assert model.profile
    assert model.profile["max_input_tokens"] == 200000


async def test_model_profile_not_blocking() -> None:
    with blockbuster_ctx():
        model = ChatAnthropic(model="claude-sonnet-4-5")
        _ = model.profile


def test_effort_parameter_validation() -> None:
    """Test that effort parameter is validated correctly.

    The effort parameter is generally available on Claude Opus 4.6 and Opus 4.5.
    """
    # Valid effort values should work
    model = ChatAnthropic(model="claude-opus-4-5-20251101", effort="high")
    assert model.effort == "high"

    model = ChatAnthropic(model="claude-opus-4-5-20251101", effort="medium")
    assert model.effort == "medium"

    model = ChatAnthropic(model="claude-opus-4-5-20251101", effort="low")
    assert model.effort == "low"

    model = ChatAnthropic(model="claude-opus-4-6", effort="max")
    assert model.effort == "max"

    # Invalid effort values should raise ValidationError
    with pytest.raises(ValidationError, match="Input should be"):
        ChatAnthropic(model="claude-opus-4-5-20251101", effort="invalid")  # type: ignore[arg-type]


def test_effort_in_output_config_payload() -> None:
    """Test that effort parameter is properly added to output_config in payload."""
    model = ChatAnthropic(model="claude-opus-4-5-20251101", effort="medium")
    assert model.effort == "medium"

    # Test that effort is added to output_config
    payload = model._get_request_payload("Test query")
    assert payload["output_config"]["effort"] == "medium"


def test_effort_in_output_config() -> None:
    """Test that effort can be specified in `output_config`."""
    # Test valid effort in output_config
    model = ChatAnthropic(
        model="claude-opus-4-5-20251101",
        output_config={"effort": "low"},
    )
    assert model.output_config == {"effort": "low"}
    payload = model._get_request_payload("Test query")
    assert payload["output_config"]["effort"] == "low"


def test_effort_priority() -> None:
    """Test that top-level effort takes precedence over `output_config`."""
    model = ChatAnthropic(
        model="claude-opus-4-5-20251101",
        effort="high",
        output_config={"effort": "low"},
    )

    # Top-level effort should take precedence in the payload
    payload = model._get_request_payload("Test query")
    assert payload["output_config"]["effort"] == "high"


def test_output_config_without_effort() -> None:
    """Test that output_config can be used without effort."""
    # output_config might have other fields in the future
    model = ChatAnthropic(
        model=MODEL_NAME,
        output_config={"some_future_param": "value"},
    )
    payload = model._get_request_payload("Test query")
    assert payload["output_config"] == {"some_future_param": "value"}


def test_extras_with_defer_loading() -> None:
    """Test that extras with `defer_loading` are merged into tool definitions."""

    @tool(extras={"defer_loading": True})
    def get_weather(location: str) -> str:
        """Get weather for a location."""
        return f"Weather in {location}"

    model = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]
    model_with_tools = model.bind_tools([get_weather])

    # Get the payload to check if defer_loading was merged
    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "test",
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )

    # Find the get_weather tool in the payload
    weather_tool = None
    for tool_def in payload["tools"]:
        if isinstance(tool_def, dict) and tool_def.get("name") == "get_weather":
            weather_tool = tool_def
            break

    assert weather_tool is not None
    assert weather_tool.get("defer_loading") is True


def test_extras_with_cache_control() -> None:
    """Test that extras with `cache_control` are merged into tool definitions."""

    @tool(extras={"cache_control": {"type": "ephemeral"}})
    def search_files(query: str) -> str:
        """Search files."""
        return f"Results for {query}"

    model = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]
    model_with_tools = model.bind_tools([search_files])

    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "test",
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )

    search_tool = None
    for tool_def in payload["tools"]:
        if isinstance(tool_def, dict) and tool_def.get("name") == "search_files":
            search_tool = tool_def
            break

    assert search_tool is not None
    assert search_tool.get("cache_control") == {"type": "ephemeral"}


def test_extras_with_fine_grained_streaming() -> None:
    @tool(extras={"eager_input_streaming": True})
    def tell_story(story: str) -> None:
        """Tell a story."""

    model = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]
    model_with_tools = model.bind_tools([tell_story])

    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "test",
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )

    tell_story_tool = None
    for tool_def in payload["tools"]:
        if isinstance(tool_def, dict) and tool_def.get("name") == "tell_story":
            tell_story_tool = tool_def
            break

    assert tell_story_tool is not None
    assert tell_story_tool.get("eager_input_streaming") is True


def test_extras_with_input_examples() -> None:
    """Test that extras with `input_examples` are merged into tool definitions."""

    @tool(
        extras={
            "input_examples": [
                {"location": "San Francisco, CA", "unit": "fahrenheit"},
                {"location": "Tokyo, Japan", "unit": "celsius"},
            ]
        }
    )
    def get_weather(location: str, unit: str = "fahrenheit") -> str:
        """Get weather for a location."""
        return f"Weather in {location}"

    model = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]
    model_with_tools = model.bind_tools([get_weather])

    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "test",
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )

    weather_tool = None
    for tool_def in payload["tools"]:
        if isinstance(tool_def, dict) and tool_def.get("name") == "get_weather":
            weather_tool = tool_def
            break

    assert weather_tool is not None
    assert "input_examples" in weather_tool
    assert len(weather_tool["input_examples"]) == 2
    assert weather_tool["input_examples"][0] == {
        "location": "San Francisco, CA",
        "unit": "fahrenheit",
    }

    # Beta header is required
    assert "betas" in payload
    assert "advanced-tool-use-2025-11-20" in payload["betas"]


def test_extras_with_multiple_fields() -> None:
    """Test that multiple extra fields can be specified together."""

    @tool(
        extras={
            "defer_loading": True,
            "cache_control": {"type": "ephemeral"},
            "input_examples": [{"query": "python files"}],
        }
    )
    def search_code(query: str) -> str:
        """Search code."""
        return f"Code for {query}"

    model = ChatAnthropic(model=MODEL_NAME)  # type: ignore[call-arg]
    model_with_tools = model.bind_tools([search_code])

    payload = model_with_tools._get_request_payload(  # type: ignore[attr-defined]
        "test",
        **model_with_tools.kwargs,  # type: ignore[attr-defined]
    )

    tool_def = None
    for t in payload["tools"]:
        if isinstance(t, dict) and t.get("name") == "search_code":
            tool_def = t
            break

    assert tool_def is not None
    assert tool_def.get("defer_loading") is True
    assert tool_def.get("cache_control") == {"type": "ephemeral"}
    assert "input_examples" in tool_def


@pytest.mark.parametrize("block_type", ["reasoning", "function_call"])
def test__format_messages_filters_non_anthropic_blocks(block_type: str) -> None:
    """Test that reasoning/function_call blocks are filtered for non-anthropic."""
    block = {"type": block_type, "other": "foo"}
    human = HumanMessage("hi")  # type: ignore[misc]
    ai = AIMessage(  # type: ignore[misc]
        content=[block, {"type": "text", "text": "hello"}],
        response_metadata={"model_provider": "openai"},
    )
    _, msgs = _format_messages([human, ai])
    assert msgs[1]["content"] == [{"type": "text", "text": "hello"}]

    ai_anthropic = AIMessage(  # type: ignore[misc]
        content=[block, {"type": "text", "text": "hello"}],
        response_metadata={"model_provider": "anthropic"},
    )
    _, msgs = _format_messages([human, ai_anthropic])
    assert any(b["type"] == block_type for b in msgs[1]["content"])


def test__format_messages_trailing_whitespace() -> None:
    """Test that trailing whitespace is trimmed from the final assistant message."""
    human = HumanMessage("foo")  # type: ignore[misc]

    # Test string content
    ai_string = AIMessage("thought ")  # type: ignore[misc]
    _, anthropic_messages = _format_messages([human, ai_string])
    assert anthropic_messages[-1]["content"] == "thought"

    # Test list content
    ai_list = AIMessage([{"type": "text", "text": "thought "}])  # type: ignore[misc]
    _, anthropic_messages = _format_messages([human, ai_list])
    assert anthropic_messages[-1]["content"][0]["text"] == "thought"  # type: ignore[index]

    # Test that intermediate messages are NOT trimmed
    ai_intermediate = AIMessage("thought ")  # type: ignore[misc]
    _, anthropic_messages = _format_messages([human, ai_intermediate, human])
    assert anthropic_messages[1]["content"] == "thought "


# Test fixtures for context overflow error tests
_CONTEXT_OVERFLOW_BAD_REQUEST_ERROR = anthropic.BadRequestError(
    message="prompt is too long: 209752 tokens > 200000 maximum",
    response=MagicMock(status_code=400),
    body={
        "type": "error",
        "error": {
            "type": "invalid_request_error",
            "message": "prompt is too long: 209752 tokens > 200000 maximum",
        },
    },
)


def test_context_overflow_error_invoke_sync() -> None:
    """Test context overflow error on invoke (sync)."""
    llm = ChatAnthropic(model=MODEL_NAME)

    with (  # noqa: PT012
        patch.object(llm._client.messages, "create") as mock_create,
        pytest.raises(ContextOverflowError) as exc_info,
    ):
        mock_create.side_effect = _CONTEXT_OVERFLOW_BAD_REQUEST_ERROR
        llm.invoke([HumanMessage(content="test")])

    assert "prompt is too long" in str(exc_info.value)


async def test_context_overflow_error_invoke_async() -> None:
    """Test context overflow error on invoke (async)."""
    llm = ChatAnthropic(model=MODEL_NAME)

    with (  # noqa: PT012
        patch.object(llm._async_client.messages, "create") as mock_create,
        pytest.raises(ContextOverflowError) as exc_info,
    ):
        mock_create.side_effect = _CONTEXT_OVERFLOW_BAD_REQUEST_ERROR
        await llm.ainvoke([HumanMessage(content="test")])

    assert "prompt is too long" in str(exc_info.value)


def test_context_overflow_error_stream_sync() -> None:
    """Test context overflow error on stream (sync)."""
    llm = ChatAnthropic(model=MODEL_NAME)

    with (  # noqa: PT012
        patch.object(llm._client.messages, "create") as mock_create,
        pytest.raises(ContextOverflowError) as exc_info,
    ):
        mock_create.side_effect = _CONTEXT_OVERFLOW_BAD_REQUEST_ERROR
        list(llm.stream([HumanMessage(content="test")]))

    assert "prompt is too long" in str(exc_info.value)


async def test_context_overflow_error_stream_async() -> None:
    """Test context overflow error on stream (async)."""
    llm = ChatAnthropic(model=MODEL_NAME)

    with (  # noqa: PT012
        patch.object(llm._async_client.messages, "create") as mock_create,
        pytest.raises(ContextOverflowError) as exc_info,
    ):
        mock_create.side_effect = _CONTEXT_OVERFLOW_BAD_REQUEST_ERROR
        async for _ in llm.astream([HumanMessage(content="test")]):
            pass

    assert "prompt is too long" in str(exc_info.value)


def test_context_overflow_error_backwards_compatibility() -> None:
    """Test that ContextOverflowError can be caught as BadRequestError."""
    llm = ChatAnthropic(model=MODEL_NAME)

    with (  # noqa: PT012
        patch.object(llm._client.messages, "create") as mock_create,
        pytest.raises(anthropic.BadRequestError) as exc_info,
    ):
        mock_create.side_effect = _CONTEXT_OVERFLOW_BAD_REQUEST_ERROR
        llm.invoke([HumanMessage(content="test")])

    # Verify it's both types (multiple inheritance)
    assert isinstance(exc_info.value, anthropic.BadRequestError)
    assert isinstance(exc_info.value, ContextOverflowError)


def test_bind_tools_drops_forced_tool_choice_when_thinking_enabled() -> None:
    """Regression test for https://github.com/langchain-ai/langchain/issues/35539.

    Anthropic API rejects forced tool_choice when thinking is enabled:
    "Thinking may not be enabled when tool_choice forces tool use."
    bind_tools should drop forced tool_choice and warn.
    """
    chat_model = ChatAnthropic(
        model=MODEL_NAME,
        anthropic_api_key="secret-api-key",
        thinking={"type": "enabled", "budget_tokens": 5000},
    )

    # tool_choice="any" should be dropped with warning
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        result = chat_model.bind_tools([GetWeather], tool_choice="any")
    assert "tool_choice" not in cast("RunnableBinding", result).kwargs
    assert len(w) == 1
    assert "thinking is enabled" in str(w[0].message)

    # tool_choice="auto" should NOT be dropped (auto is allowed)
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        result = chat_model.bind_tools([GetWeather], tool_choice="auto")
    assert cast("RunnableBinding", result).kwargs["tool_choice"] == {"type": "auto"}
    assert len(w) == 0

    # tool_choice=specific tool name should be dropped with warning
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        result = chat_model.bind_tools([GetWeather], tool_choice="GetWeather")
    assert "tool_choice" not in cast("RunnableBinding", result).kwargs
    assert len(w) == 1

    # tool_choice=dict with type "tool" should be dropped with warning
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        result = chat_model.bind_tools(
            [GetWeather],
            tool_choice={"type": "tool", "name": "GetWeather"},
        )
    assert "tool_choice" not in cast("RunnableBinding", result).kwargs
    assert len(w) == 1

    # tool_choice=dict with type "any" should also be dropped
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        result = chat_model.bind_tools(
            [GetWeather],
            tool_choice={"type": "any"},
        )
    assert "tool_choice" not in cast("RunnableBinding", result).kwargs
    assert len(w) == 1


def test_bind_tools_drops_forced_tool_choice_when_adaptive_thinking() -> None:
    """Adaptive thinking has the same forced tool_choice restriction as enabled."""
    chat_model = ChatAnthropic(
        model=MODEL_NAME,
        anthropic_api_key="secret-api-key",
        thinking={"type": "adaptive"},
    )

    # tool_choice="any" should be dropped with warning
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        result = chat_model.bind_tools([GetWeather], tool_choice="any")
    assert "tool_choice" not in cast("RunnableBinding", result).kwargs
    assert len(w) == 1
    assert "thinking is enabled" in str(w[0].message)

    # tool_choice="auto" should NOT be dropped (auto is allowed)
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        result = chat_model.bind_tools([GetWeather], tool_choice="auto")
    assert cast("RunnableBinding", result).kwargs["tool_choice"] == {"type": "auto"}
    assert len(w) == 0

    # tool_choice=specific tool name should be dropped with warning
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        result = chat_model.bind_tools([GetWeather], tool_choice="GetWeather")
    assert "tool_choice" not in cast("RunnableBinding", result).kwargs
    assert len(w) == 1

    # tool_choice=dict with type "tool" should be dropped with warning
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        result = chat_model.bind_tools(
            [GetWeather],
            tool_choice={"type": "tool", "name": "GetWeather"},
        )
    assert "tool_choice" not in cast("RunnableBinding", result).kwargs
    assert len(w) == 1

    # tool_choice=dict with type "any" should also be dropped
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        result = chat_model.bind_tools(
            [GetWeather],
            tool_choice={"type": "any"},
        )
    assert "tool_choice" not in cast("RunnableBinding", result).kwargs
    assert len(w) == 1


def test_bind_tools_keeps_forced_tool_choice_when_thinking_disabled() -> None:
    """When thinking is not enabled, forced tool_choice should pass through."""
    chat_model = ChatAnthropic(
        model=MODEL_NAME,
        anthropic_api_key="secret-api-key",
    )

    # No thinking — tool_choice="any" should pass through
    result = chat_model.bind_tools([GetWeather], tool_choice="any")
    assert cast("RunnableBinding", result).kwargs["tool_choice"] == {"type": "any"}

    # Thinking explicitly None
    chat_model_none = ChatAnthropic(
        model=MODEL_NAME,
        anthropic_api_key="secret-api-key",
        thinking=None,
    )
    result = chat_model_none.bind_tools([GetWeather], tool_choice="any")
    assert cast("RunnableBinding", result).kwargs["tool_choice"] == {"type": "any"}

    # Thinking explicitly disabled — should NOT drop tool_choice
    chat_model_disabled = ChatAnthropic(
        model=MODEL_NAME,
        anthropic_api_key="secret-api-key",
        thinking={"type": "disabled"},
    )
    result = chat_model_disabled.bind_tools([GetWeather], tool_choice="any")
    assert cast("RunnableBinding", result).kwargs["tool_choice"] == {"type": "any"}


def test_thinking_in_params_recognizes_adaptive() -> None:
    """_thinking_in_params should recognize both enabled and adaptive types."""
    assert _thinking_in_params({"thinking": {"type": "enabled", "budget_tokens": 5000}})
    assert _thinking_in_params({"thinking": {"type": "adaptive"}})
    assert not _thinking_in_params({"thinking": {"type": "disabled"}})
    assert not _thinking_in_params({"thinking": {}})
    assert not _thinking_in_params({})


def test_effort_xhigh() -> None:
    """Test that xhigh effort level is accepted and lands in output_config."""
    model = ChatAnthropic(model="claude-opus-4-6", effort="xhigh")
    assert model.effort == "xhigh"
    payload = model._get_request_payload("Test query")
    assert payload["output_config"]["effort"] == "xhigh"


def test_output_config_top_level_field() -> None:
    """Test that output_config is a top-level field, not model_kwargs."""
    model = ChatAnthropic(
        model=MODEL_NAME,
        output_config={
            "effort": "low",
            "task_budget": {"type": "tokens", "total": 50000},
        },
    )
    assert model.output_config == {
        "effort": "low",
        "task_budget": {"type": "tokens", "total": 50000},
    }
    assert "output_config" not in model.model_kwargs

    payload = model._get_request_payload("Test query")
    assert payload["output_config"]["effort"] == "low"
    assert payload["output_config"]["task_budget"] == {"type": "tokens", "total": 50000}


def test_output_config_merged_with_kwargs() -> None:
    """Test that call-time output_config overrides field-level output_config."""
    model = ChatAnthropic(
        model=MODEL_NAME,
        output_config={"effort": "low"},
    )
    payload = model._get_request_payload(
        "Test query",
        output_config={
            "effort": "high",
            "task_budget": {"type": "tokens", "total": 50000},
        },
    )
    # Call-time kwargs override field-level
    assert payload["output_config"]["effort"] == "high"
    assert payload["output_config"]["task_budget"] == {"type": "tokens", "total": 50000}


def test_task_budget_auto_appends_beta() -> None:
    """Test that task_budget in output_config triggers beta header."""
    model = ChatAnthropic(
        model=MODEL_NAME,
        output_config={"task_budget": {"type": "tokens", "total": 128000}},
    )
    payload = model._get_request_payload("Test query")
    assert "betas" in payload
    assert "task-budgets-2026-03-13" in payload["betas"]


def test_task_budget_beta_not_duplicated() -> None:
    """Test that task_budget beta is not duplicated if already present."""
    model = ChatAnthropic(
        model=MODEL_NAME,
        betas=["task-budgets-2026-03-13"],
        output_config={"task_budget": {"type": "tokens", "total": 128000}},
    )
    payload = model._get_request_payload("Test query")
    assert payload["betas"].count("task-budgets-2026-03-13") == 1


def test_no_task_budget_no_beta() -> None:
    """Test that task_budget beta is not added when no task_budget is set."""
    model = ChatAnthropic(model=MODEL_NAME, output_config={"effort": "high"})
    payload = model._get_request_payload("Test query")
    betas = payload.get("betas")
    if betas:
        assert "task-budgets-2026-03-13" not in betas


def test_anthropic_stream_v2_lifecycle() -> None:
    """Validate lifecycle events across a thinking + text + tool_use stream.

    Anthropic emits raw `content_block_start` / `content_block_delta` /
    `content_block_stop` events with integer `index` fields, interleaved
    with `message_start` and `message_delta`. This test threads a
    realistic event sequence through `_stream` via a mocked raw client
    and asserts that `stream_v2` produces a spec-conformant event
    stream: paired start/finish per block, no interleaving, sequential
    `uint` wire indices.
    """
    from unittest.mock import patch

    from anthropic.types import (
        InputJSONDelta,
        RawContentBlockDeltaEvent,
        RawContentBlockStartEvent,
        RawContentBlockStopEvent,
        RawMessageDeltaEvent,
        RawMessageStartEvent,
        RawMessageStopEvent,
        TextDelta,
        ThinkingBlock,
        ThinkingDelta,
        ToolUseBlock,
    )
    from anthropic.types.raw_message_delta_event import Delta as RawMessageDelta
    from anthropic.types.raw_message_delta_event import (
        MessageDeltaUsage as RawMessageDeltaUsage,
    )
    from langchain_tests.utils.stream_lifecycle import assert_valid_event_stream

    msg = Message(
        id="msg_1",
        content=[],
        model=MODEL_NAME,
        role="assistant",
        stop_reason=None,
        stop_sequence=None,
        usage=Usage(input_tokens=10, output_tokens=0),
        type="message",
    )

    events = [
        RawMessageStartEvent(message=msg, type="message_start"),
        # thinking block (index=0)
        RawContentBlockStartEvent(
            content_block=ThinkingBlock(signature="", thinking="", type="thinking"),
            index=0,
            type="content_block_start",
        ),
        RawContentBlockDeltaEvent(
            delta=ThinkingDelta(thinking="Let me ", type="thinking_delta"),
            index=0,
            type="content_block_delta",
        ),
        RawContentBlockDeltaEvent(
            delta=ThinkingDelta(thinking="think.", type="thinking_delta"),
            index=0,
            type="content_block_delta",
        ),
        RawContentBlockStopEvent(index=0, type="content_block_stop"),
        # text block (index=1)
        RawContentBlockStartEvent(
            content_block=TextBlock(text="", type="text"),
            index=1,
            type="content_block_start",
        ),
        RawContentBlockDeltaEvent(
            delta=TextDelta(text="The answer ", type="text_delta"),
            index=1,
            type="content_block_delta",
        ),
        RawContentBlockDeltaEvent(
            delta=TextDelta(text="is 42.", type="text_delta"),
            index=1,
            type="content_block_delta",
        ),
        RawContentBlockStopEvent(index=1, type="content_block_stop"),
        # tool_use block (index=2)
        RawContentBlockStartEvent(
            content_block=ToolUseBlock(
                id="toolu_1",
                input={},
                name="search",
                type="tool_use",
            ),
            index=2,
            type="content_block_start",
        ),
        RawContentBlockDeltaEvent(
            delta=InputJSONDelta(partial_json='{"q":', type="input_json_delta"),
            index=2,
            type="content_block_delta",
        ),
        RawContentBlockDeltaEvent(
            delta=InputJSONDelta(partial_json=' "weather"}', type="input_json_delta"),
            index=2,
            type="content_block_delta",
        ),
        RawContentBlockStopEvent(index=2, type="content_block_stop"),
        # message_delta with final usage and stop_reason
        RawMessageDeltaEvent(
            delta=RawMessageDelta(stop_reason="tool_use", stop_sequence=None),
            type="message_delta",
            usage=RawMessageDeltaUsage(
                output_tokens=50,
                input_tokens=10,
                cache_read_input_tokens=0,
                cache_creation_input_tokens=0,
            ),
        ),
        RawMessageStopEvent(type="message_stop"),
    ]

    # Enable thinking so `coerce_content_to_string=False` in `_stream`,
    # which gives every content block an integer `index` field — the
    # structured path the protocol bridge actually exercises.  Default
    # (no tools / thinking / documents) coerces text to a plain string,
    # which strips indices and is a separate code path not covered here.
    llm = ChatAnthropic(
        model=MODEL_NAME,
        thinking={"type": "enabled", "budget_tokens": 1024},
    )

    def mock_create(_payload: Any) -> list:
        return events

    with patch.object(llm, "_create", mock_create):
        stream_events = list(llm.stream_v2("Test query"))

    assert_valid_event_stream(stream_events)

    finishes = [e for e in stream_events if e["event"] == "content-block-finish"]
    types = [f["content_block"]["type"] for f in finishes]
    assert types == ["reasoning", "text", "tool_call"]

    wire_indices = [f["index"] for f in finishes]
    assert wire_indices == [0, 1, 2]

    # Content accumulation reaches content-block-finish intact.
    reasoning_block = cast("dict[str, Any]", finishes[0]["content_block"])
    text_block = cast("dict[str, Any]", finishes[1]["content_block"])
    tool_block = cast("dict[str, Any]", finishes[2]["content_block"])
    assert reasoning_block["reasoning"] == "Let me think."
    assert text_block["text"] == "The answer is 42."
    assert tool_block["args"] == {"q": "weather"}
    assert tool_block["name"] == "search"

    # message-finish carries the tool_use stop reason inside metadata
    # (protocol 0.0.9 moved the finish reason off the top-level event
    # and into `metadata`, where the bridge deposits the provider's raw
    # `stop_reason` alongside other response metadata).
    message_finish = stream_events[-1]
    assert message_finish["event"] == "message-finish"
    assert message_finish["metadata"]["stop_reason"] == "tool_use"