langchain/libs/partners/anthropic/langchain_anthropic/chat_models.py

"""Anthropic chat models."""

from __future__ import annotations

import copy
import json
import re
import warnings
from collections.abc import AsyncIterator, Callable, Iterator, Mapping, Sequence
from functools import cached_property
from operator import itemgetter
from typing import Any, Final, Literal, cast

import anthropic
from langchain_core.callbacks import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
)
from langchain_core.exceptions import OutputParserException
from langchain_core.language_models import (
    LanguageModelInput,
    ModelProfile,
    ModelProfileRegistry,
)
from langchain_core.language_models.chat_models import BaseChatModel, LangSmithParams
from langchain_core.messages import (
    AIMessage,
    AIMessageChunk,
    BaseMessage,
    HumanMessage,
    SystemMessage,
    ToolCall,
    ToolMessage,
    is_data_content_block,
)
from langchain_core.messages import content as types
from langchain_core.messages.ai import InputTokenDetails, UsageMetadata
from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk
from langchain_core.output_parsers import (
    JsonOutputKeyToolsParser,
    JsonOutputParser,
    PydanticOutputParser,
    PydanticToolsParser,
)
from langchain_core.output_parsers.base import OutputParserLike
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough
from langchain_core.tools import BaseTool
from langchain_core.utils import from_env, get_pydantic_field_names, secret_from_env
from langchain_core.utils.function_calling import (
    convert_to_json_schema,
    convert_to_openai_tool,
)
from langchain_core.utils.pydantic import is_basemodel_subclass
from langchain_core.utils.utils import _build_model_kwargs
from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
from typing_extensions import NotRequired, Self, TypedDict

from langchain_anthropic._client_utils import (
    _get_default_async_httpx_client,
    _get_default_httpx_client,
)
from langchain_anthropic._compat import _convert_from_v1_to_anthropic
from langchain_anthropic.data._profiles import _PROFILES
from langchain_anthropic.output_parsers import extract_tool_calls

_message_type_lookups = {
    "human": "user",
    "ai": "assistant",
    "AIMessageChunk": "assistant",
    "HumanMessageChunk": "user",
}

_MODEL_PROFILES = cast(ModelProfileRegistry, _PROFILES)


def _get_default_model_profile(model_name: str) -> ModelProfile:
    """Get the default profile for a model.

    Args:
        model_name: The model identifier.

    Returns:
        The model profile dictionary, or an empty dict if not found.
    """
    default = _MODEL_PROFILES.get(model_name)
    if default:
        return default.copy()
    return {}


_MODEL_DEFAULT_MAX_OUTPUT_TOKENS: Final[dict[str, int]] = {
    # Listed old to new
    "claude-3-haiku": 4096,  # Claude Haiku 3
    "claude-3-5-haiku": 8192,  # Claude Haiku 3.5
    "claude-3-7-sonnet": 64000,  # Claude Sonnet 3.7
    "claude-sonnet-4": 64000,  # Claude Sonnet 4
    "claude-opus-4": 32000,  # Claude Opus 4
    "claude-opus-4-1": 32000,  # Claude Opus 4.1
    "claude-sonnet-4-5": 64000,  # Claude Sonnet 4.5
    "claude-haiku-4-5": 64000,  # Claude Haiku 4.5
}
_FALLBACK_MAX_OUTPUT_TOKENS: Final[int] = 4096


def _default_max_tokens_for(model: str | None) -> int:
    """Return the default max output tokens for an Anthropic model (with fallback).

    See the Claude docs for [Max Tokens limits](https://platform.claude.com/docs/en/about-claude/models/overview#model-comparison-table).
    """
    if not model:
        return _FALLBACK_MAX_OUTPUT_TOKENS

    parts = model.split("-")
    family = "-".join(parts[:-1]) if len(parts) > 1 else model

    return _MODEL_DEFAULT_MAX_OUTPUT_TOKENS.get(family, _FALLBACK_MAX_OUTPUT_TOKENS)


class AnthropicTool(TypedDict):
    """Anthropic tool definition."""

    name: str

    input_schema: dict[str, Any]

    description: NotRequired[str]

    strict: NotRequired[bool]

    cache_control: NotRequired[dict[str, str]]


# Some tool types require specific beta headers to be enabled
# Mapping of tool type patterns to required beta headers
_TOOL_TYPE_TO_BETA: dict[str, str] = {
    "web_fetch_20250910": "web-fetch-2025-09-10",
    "code_execution_20250522": "code-execution-2025-05-22",
    "code_execution_20250825": "code-execution-2025-08-25",
    "memory_20250818": "context-management-2025-06-27",
    "computer_20250124": "computer-use-2025-01-24",
    "computer_20251124": "computer-use-2025-11-24",
    "tool_search_tool_regex_20251119": "advanced-tool-use-2025-11-20",
    "tool_search_tool_bm25_20251119": "advanced-tool-use-2025-11-20",
}


def _is_builtin_tool(tool: Any) -> bool:
    """Check if a tool is a built-in Anthropic tool.

    [Claude docs for built-in tools](https://platform.claude.com/docs/en/agents-and-tools/tool-use/overview)
    """
    if not isinstance(tool, dict):
        return False

    tool_type = tool.get("type")
    if not tool_type or not isinstance(tool_type, str):
        return False

    _builtin_tool_prefixes = [
        "text_editor_",
        "computer_",
        "bash_",
        "web_search_",
        "web_fetch_",
        "code_execution_",
        "memory_",
        "tool_search_",
    ]
    return any(tool_type.startswith(prefix) for prefix in _builtin_tool_prefixes)


def _format_image(url: str) -> dict:
    """Convert part["image_url"]["url"] strings (OpenAI format) to Anthropic format.

    {
        "type": "base64",
        "media_type": "image/jpeg",
        "data": "/9j/4AAQSkZJRg...",
    }

    Or

    {
        "type": "url",
        "url": "https://example.com/image.jpg",
    }
    """
    # Base64 encoded image
    base64_regex = r"^data:(?P<media_type>image/.+);base64,(?P<data>.+)$"
    base64_match = re.match(base64_regex, url)

    if base64_match:
        return {
            "type": "base64",
            "media_type": base64_match.group("media_type"),
            "data": base64_match.group("data"),
        }

    # Url
    url_regex = r"^https?://.*$"
    url_match = re.match(url_regex, url)

    if url_match:
        return {
            "type": "url",
            "url": url,
        }

    msg = (
        "Malformed url parameter."
        " Must be either an image URL (https://example.com/image.jpg)"
        " or base64 encoded string (data:image/png;base64,'/9j/4AAQSk'...)"
    )
    raise ValueError(
        msg,
    )


def _merge_messages(
    messages: Sequence[BaseMessage],
) -> list[SystemMessage | AIMessage | HumanMessage]:
    """Merge runs of human/tool messages into single human messages with content blocks."""  # noqa: E501
    merged: list = []
    for curr in messages:
        if isinstance(curr, ToolMessage):
            if (
                isinstance(curr.content, list)
                and curr.content
                and all(
                    isinstance(block, dict) and block.get("type") == "tool_result"
                    for block in curr.content
                )
            ):
                curr = HumanMessage(curr.content)  # type: ignore[misc]
            else:
                curr = HumanMessage(  # type: ignore[misc]
                    [
                        {
                            "type": "tool_result",
                            "content": curr.content,
                            "tool_use_id": curr.tool_call_id,
                            "is_error": curr.status == "error",
                        },
                    ],
                )
        last = merged[-1] if merged else None
        if any(
            all(isinstance(m, c) for m in (curr, last))
            for c in (SystemMessage, HumanMessage)
        ):
            if isinstance(cast("BaseMessage", last).content, str):
                new_content: list = [
                    {"type": "text", "text": cast("BaseMessage", last).content},
                ]
            else:
                new_content = copy.copy(cast("list", cast("BaseMessage", last).content))
            if isinstance(curr.content, str):
                new_content.append({"type": "text", "text": curr.content})
            else:
                new_content.extend(curr.content)
            merged[-1] = curr.model_copy(update={"content": new_content})
        else:
            merged.append(curr)
    return merged


def _format_data_content_block(block: dict) -> dict:
    """Format standard data content block to format expected by Anthropic."""
    if block["type"] == "image":
        if "url" in block:
            if block["url"].startswith("data:"):
                # Data URI
                formatted_block = {
                    "type": "image",
                    "source": _format_image(block["url"]),
                }
            else:
                formatted_block = {
                    "type": "image",
                    "source": {"type": "url", "url": block["url"]},
                }
        elif "base64" in block or block.get("source_type") == "base64":
            formatted_block = {
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": block["mime_type"],
                    "data": block.get("base64") or block.get("data", ""),
                },
            }
        elif "file_id" in block:
            formatted_block = {
                "type": "image",
                "source": {
                    "type": "file",
                    "file_id": block["file_id"],
                },
            }
        elif block.get("source_type") == "id":
            formatted_block = {
                "type": "image",
                "source": {
                    "type": "file",
                    "file_id": block["id"],
                },
            }
        else:
            msg = (
                "Anthropic only supports 'url', 'base64', or 'id' keys for image "
                "content blocks."
            )
            raise ValueError(
                msg,
            )

    elif block["type"] == "file":
        if "url" in block:
            formatted_block = {
                "type": "document",
                "source": {
                    "type": "url",
                    "url": block["url"],
                },
            }
        elif "base64" in block or block.get("source_type") == "base64":
            formatted_block = {
                "type": "document",
                "source": {
                    "type": "base64",
                    "media_type": block.get("mime_type") or "application/pdf",
                    "data": block.get("base64") or block.get("data", ""),
                },
            }
        elif block.get("source_type") == "text":
            formatted_block = {
                "type": "document",
                "source": {
                    "type": "text",
                    "media_type": block.get("mime_type") or "text/plain",
                    "data": block["text"],
                },
            }
        elif "file_id" in block:
            formatted_block = {
                "type": "document",
                "source": {
                    "type": "file",
                    "file_id": block["file_id"],
                },
            }
        elif block.get("source_type") == "id":
            formatted_block = {
                "type": "document",
                "source": {
                    "type": "file",
                    "file_id": block["id"],
                },
            }
        else:
            msg = (
                "Anthropic only supports 'url', 'base64', or 'id' keys for file "
                "content blocks."
            )
            raise ValueError(msg)

    elif block["type"] == "text-plain":
        formatted_block = {
            "type": "document",
            "source": {
                "type": "text",
                "media_type": block.get("mime_type") or "text/plain",
                "data": block["text"],
            },
        }

    else:
        msg = f"Block of type {block['type']} is not supported."
        raise ValueError(msg)

    if formatted_block:
        for key in ["cache_control", "citations", "title", "context"]:
            if key in block:
                formatted_block[key] = block[key]
            elif (metadata := block.get("extras")) and key in metadata:
                formatted_block[key] = metadata[key]
            elif (metadata := block.get("metadata")) and key in metadata:
                # Backward compat
                formatted_block[key] = metadata[key]

    return formatted_block


def _format_messages(
    messages: Sequence[BaseMessage],
) -> tuple[str | list[dict] | None, list[dict]]:
    """Format messages for Anthropic's API."""
    system: str | list[dict] | None = None
    formatted_messages: list[dict] = []
    merged_messages = _merge_messages(messages)
    for _i, message in enumerate(merged_messages):
        if message.type == "system":
            if system is not None:
                msg = "Received multiple non-consecutive system messages."
                raise ValueError(msg)
            if isinstance(message.content, list):
                system = [
                    (
                        block
                        if isinstance(block, dict)
                        else {"type": "text", "text": block}
                    )
                    for block in message.content
                ]
            else:
                system = message.content
            continue

        role = _message_type_lookups[message.type]
        content: str | list

        if not isinstance(message.content, str):
            # parse as dict
            if not isinstance(message.content, list):
                msg = "Anthropic message content must be str or list of dicts"
                raise ValueError(
                    msg,
                )

            # populate content
            content = []
            for block in message.content:
                if isinstance(block, str):
                    content.append({"type": "text", "text": block})
                elif isinstance(block, dict):
                    if "type" not in block:
                        msg = "Dict content block must have a type key"
                        raise ValueError(msg)
                    if block["type"] == "image_url":
                        # convert format
                        source = _format_image(block["image_url"]["url"])
                        content.append({"type": "image", "source": source})
                    elif is_data_content_block(block):
                        content.append(_format_data_content_block(block))
                    elif block["type"] == "tool_use":
                        # If a tool_call with the same id as a tool_use content block
                        # exists, the tool_call is preferred.
                        if isinstance(message, AIMessage) and block["id"] in [
                            tc["id"] for tc in message.tool_calls
                        ]:
                            overlapping = [
                                tc
                                for tc in message.tool_calls
                                if tc["id"] == block["id"]
                            ]
                            content.extend(
                                _lc_tool_calls_to_anthropic_tool_use_blocks(
                                    overlapping,
                                ),
                            )
                        else:
                            if tool_input := block.get("input"):
                                args = tool_input
                            elif "partial_json" in block:
                                try:
                                    args = json.loads(block["partial_json"] or "{}")
                                except json.JSONDecodeError:
                                    args = {}
                            else:
                                args = {}
                            content.append(
                                _AnthropicToolUse(
                                    type="tool_use",
                                    name=block["name"],
                                    input=args,
                                    id=block["id"],
                                )
                            )
                    elif block["type"] in ("server_tool_use", "mcp_tool_use"):
                        formatted_block = {
                            k: v
                            for k, v in block.items()
                            if k
                            in (
                                "type",
                                "id",
                                "input",
                                "name",
                                "server_name",  # for mcp_tool_use
                                "cache_control",
                            )
                        }
                        # Attempt to parse streamed output
                        if block.get("input") == {} and "partial_json" in block:
                            try:
                                input_ = json.loads(block["partial_json"])
                                if input_:
                                    formatted_block["input"] = input_
                            except json.JSONDecodeError:
                                pass
                        content.append(formatted_block)
                    elif block["type"] == "text":
                        text = block.get("text", "")
                        # Only add non-empty strings for now as empty ones are not
                        # accepted.
                        # https://github.com/anthropics/anthropic-sdk-python/issues/461
                        if text.strip():
                            formatted_block = {
                                k: v
                                for k, v in block.items()
                                if k in ("type", "text", "cache_control", "citations")
                            }
                            # Clean up citations to remove null file_id fields
                            if formatted_block.get("citations"):
                                cleaned_citations = []
                                for citation in formatted_block["citations"]:
                                    cleaned_citation = {
                                        k: v
                                        for k, v in citation.items()
                                        if not (k == "file_id" and v is None)
                                    }
                                    cleaned_citations.append(cleaned_citation)
                                formatted_block["citations"] = cleaned_citations
                            content.append(formatted_block)
                    elif block["type"] == "thinking":
                        content.append(
                            {
                                k: v
                                for k, v in block.items()
                                if k
                                in ("type", "thinking", "cache_control", "signature")
                            },
                        )
                    elif block["type"] == "redacted_thinking":
                        content.append(
                            {
                                k: v
                                for k, v in block.items()
                                if k in ("type", "cache_control", "data")
                            },
                        )
                    elif (
                        block["type"] == "tool_result"
                        and isinstance(block.get("content"), list)
                        and any(
                            isinstance(item, dict)
                            and item.get("type") == "tool_reference"
                            for item in block["content"]
                        )
                    ):
                        # Tool search results with tool_reference blocks
                        content.append(
                            {
                                k: v
                                for k, v in block.items()
                                if k
                                in (
                                    "type",
                                    "content",
                                    "tool_use_id",
                                    "cache_control",
                                )
                            },
                        )
                    elif block["type"] == "tool_result":
                        # Regular tool results that need content formatting
                        tool_content = _format_messages(
                            [HumanMessage(block["content"])],
                        )[1][0]["content"]
                        content.append({**block, "content": tool_content})
                    elif block["type"] in (
                        "code_execution_tool_result",
                        "bash_code_execution_tool_result",
                        "text_editor_code_execution_tool_result",
                        "mcp_tool_result",
                        "web_search_tool_result",
                        "web_fetch_tool_result",
                    ):
                        content.append(
                            {
                                k: v
                                for k, v in block.items()
                                if k
                                in (
                                    "type",
                                    "content",
                                    "tool_use_id",
                                    "is_error",  # for mcp_tool_result
                                    "cache_control",
                                    "retrieved_at",  # for web_fetch_tool_result
                                )
                            },
                        )
                    else:
                        content.append(block)
                else:
                    msg = (
                        f"Content blocks must be str or dict, instead was: "
                        f"{type(block)}"
                    )
                    raise ValueError(
                        msg,
                    )
        else:
            content = message.content

        # Ensure all tool_calls have a tool_use content block
        if isinstance(message, AIMessage) and message.tool_calls:
            content = content or []
            content = (
                [{"type": "text", "text": message.content}]
                if isinstance(content, str) and content
                else content
            )
            tool_use_ids = [
                cast("dict", block)["id"]
                for block in content
                if cast("dict", block)["type"] == "tool_use"
            ]
            missing_tool_calls = [
                tc for tc in message.tool_calls if tc["id"] not in tool_use_ids
            ]
            cast("list", content).extend(
                _lc_tool_calls_to_anthropic_tool_use_blocks(missing_tool_calls),
            )

        if not content and role == "assistant" and _i < len(merged_messages) - 1:
            # anthropic.BadRequestError: Error code: 400: all messages must have
            # non-empty content except for the optional final assistant message
            continue
        formatted_messages.append({"role": role, "content": content})
    return system, formatted_messages


def _handle_anthropic_bad_request(e: anthropic.BadRequestError) -> None:
    """Handle Anthropic BadRequestError."""
    if ("messages: at least one message is required") in e.message:
        message = "Received only system message(s). "
        warnings.warn(message, stacklevel=2)
        raise e
    raise


class ChatAnthropic(BaseChatModel):
    """Anthropic (Claude) chat models.

    See the [Claude Platform docs](https://platform.claude.com/docs/en/about-claude/models/overview)
    for a list of the latest models, their capabilities, and pricing.

    Setup:
        Install `langchain-anthropic` and set environment variable `ANTHROPIC_API_KEY`.

        ```bash
        pip install -U langchain-anthropic
        export ANTHROPIC_API_KEY="your-api-key"
        ```

    Key init args:
        **Completion params:**

        * [`model`][langchain_anthropic.chat_models.ChatAnthropic.model]: Name of
            Anthropic model to use. e.g. `'claude-sonnet-4-5-20250929'`.
        * [`temperature`][langchain_anthropic.chat_models.ChatAnthropic.temperature]:
            Sampling temperature. Ranges from `0.0` to `1.0`.
        * [`max_tokens`][langchain_anthropic.chat_models.ChatAnthropic.max_tokens]: Max
            number of tokens to generate.

        **Client params:**

        * [`timeout`][langchain_anthropic.chat_models.ChatAnthropic.default_request_timeout]:
            Timeout for requests.
        * [`anthropic_proxy`][langchain_anthropic.chat_models.ChatAnthropic.anthropic_proxy]:
            Proxy to use for the Anthropic clients, will be used for every API call.
            If not passed in will be read from env var `ANTHROPIC_PROXY`.
        * [`max_retries`][langchain_anthropic.chat_models.ChatAnthropic.max_retries]:
            Max number of retries if a request fails.
        * [`api_key`][langchain_anthropic.chat_models.ChatAnthropic.anthropic_api_key]:
            Anthropic API key. If not passed in will be read from env var
            `ANTHROPIC_API_KEY`.
        * [`base_url`][langchain_anthropic.chat_models.ChatAnthropic.anthropic_api_url]:
            Base URL for API requests. Only specify if using a proxy or service emulator.

        See full list of supported init args and their descriptions below.

    ???+ example "Instantiate"

        ```python
        from langchain_anthropic import ChatAnthropic

        model = ChatAnthropic(
            model="claude-sonnet-4-5-20250929",
            temperature=0,
            max_tokens=1024,
            timeout=None,
            max_retries=2,
            # api_key="...",
            # base_url="...",
            # other params...
        )
        ```

    ???+ note

        Any param which is not explicitly supported will be passed directly to
        `Anthropic.messages.create(...)` each time to the model is invoked.

        !!! example

            ```python
            from langchain_anthropic import ChatAnthropic
            import anthropic

            ChatAnthropic(..., extra_headers={}).invoke(...)

            # Results in underlying API call of:

            anthropic.Anthropic(..).messages.create(..., extra_headers={})

            # ... which is also equivalent to:

            ChatAnthropic(...).invoke(..., extra_headers={})
            ```

    ???+ example "Invoke"

        ```python
        messages = [
            (
                "system",
                "You are a helpful translator. Translate the user sentence to French.",
            ),
            (
                "human",
                "I love programming.",
            ),
        ]
        model.invoke(messages)
        ```

        ```python
        AIMessage(
            content="J'aime la programmation.",
            response_metadata={
                "id": "msg_01Trik66aiQ9Z1higrD5XFx3",
                "model": "claude-sonnet-4-5-20250929",
                "stop_reason": "end_turn",
                "stop_sequence": None,
                "usage": {"input_tokens": 25, "output_tokens": 11},
            },
            id="run-5886ac5f-3c2e-49f5-8a44-b1e92808c929-0",
            usage_metadata={
                "input_tokens": 25,
                "output_tokens": 11,
                "total_tokens": 36,
            },
        )
        ```

    ???+ example "Stream"

        ```python
        for chunk in model.stream(messages):
            print(chunk.text, end="")
        ```

        ```python
        AIMessageChunk(content="J", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        AIMessageChunk(content="'", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        AIMessageChunk(content="a", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        AIMessageChunk(content="ime", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        AIMessageChunk(content=" la", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        AIMessageChunk(content=" programm", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        AIMessageChunk(content="ation", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        AIMessageChunk(content=".", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        ```

        To aggregate the full message from the stream:

        ```python
        stream = model.stream(messages)
        full = next(stream)
        for chunk in stream:
            full += chunk
        full
        ```

        ```python
        AIMessageChunk(content="J'aime la programmation.", id="run-b34faef0-882f-4869-a19c-ed2b856e6361")
        ```

    ???+ example "Async invocation"

        ```python
        await model.ainvoke(messages)

        # stream:
        # async for chunk in (await model.astream(messages))

        # batch:
        # await model.abatch([messages])
        ```

        ```python
        AIMessage(
            content="J'aime la programmation.",
            response_metadata={
                "id": "msg_01Trik66aiQ9Z1higrD5XFx3",
                "model": "claude-sonnet-4-5-20250929",
                "stop_reason": "end_turn",
                "stop_sequence": None,
                "usage": {"input_tokens": 25, "output_tokens": 11},
            },
            id="run-5886ac5f-3c2e-49f5-8a44-b1e92808c929-0",
            usage_metadata={
                "input_tokens": 25,
                "output_tokens": 11,
                "total_tokens": 36,
            },
        )
        ```

    ???+ example "Tool calling"

        ```python hl_lines="16"
        from pydantic import BaseModel, Field


        class GetWeather(BaseModel):
            '''Get the current weather in a given location'''

            location: str = Field(..., description="The city and state, e.g. San Francisco, CA")


        class GetPopulation(BaseModel):
            '''Get the current population in a given location'''

            location: str = Field(..., description="The city and state, e.g. San Francisco, CA")


        model_with_tools = model.bind_tools([GetWeather, GetPopulation])
        ai_msg = model_with_tools.invoke("Which city is hotter today and which is bigger: LA or NY?")
        ai_msg.tool_calls
        ```

        ```python
        [
            {
                "name": "GetWeather",
                "args": {"location": "Los Angeles, CA"},
                "id": "toolu_01KzpPEAgzura7hpBqwHbWdo",
            },
            {
                "name": "GetWeather",
                "args": {"location": "New York, NY"},
                "id": "toolu_01JtgbVGVJbiSwtZk3Uycezx",
            },
            {
                "name": "GetPopulation",
                "args": {"location": "Los Angeles, CA"},
                "id": "toolu_01429aygngesudV9nTbCKGuw",
            },
            {
                "name": "GetPopulation",
                "args": {"location": "New York, NY"},
                "id": "toolu_01JPktyd44tVMeBcPPnFSEJG",
            },
        ]
        ```

        See [`ChatAnthropic.bind_tools()`][langchain_anthropic.chat_models.ChatAnthropic.bind_tools]
        for more info.

        !!! note "Strict tool use"

            Anthropic supports a strict tool use feature that guarantees tool names
            and arguments are validated and correctly typed.

            See [`ChatAnthropic.bind_tools()`][langchain_anthropic.chat_models.ChatAnthropic.bind_tools]
            for more info.

    ???+ example "Token-efficient tool use (beta)"

        See LangChain [docs](https://docs.langchain.com/oss/python/integrations/chat/anthropic#token-efficient-tool-use)
        for more detail.

        ```python hl_lines="9"
        from langchain_anthropic import ChatAnthropic
        from langchain_core.tools import tool

        model = ChatAnthropic(
            model="claude-sonnet-4-5-20250929",
            temperature=0,
            model_kwargs={
                "extra_headers": {
                    "anthropic-beta": "token-efficient-tools-2025-02-19"
                }
            }
        )

        @tool
        def get_weather(location: str) -> str:
            \"\"\"Get the weather at a location.\"\"\"
            return "It's sunny."

        model_with_tools = model.bind_tools([get_weather])
        response = model_with_tools.invoke(
            "What's the weather in San Francisco?"
        )
        print(response.tool_calls)
        print(f'Total tokens: {response.usage_metadata["total_tokens"]}')
        ```

        ```txt
        [{'name': 'get_weather', 'args': {'location': 'San Francisco'}, 'id': 'toolu_01HLjQMSb1nWmgevQUtEyz17', 'type': 'tool_call'}]
        Total tokens: 408
        ```

    ???+ example "Image input"

        See the [multimodal guide](https://docs.langchain.com/oss/python/langchain/models#multimodal)
        for more detail.

        ```python
        import base64

        import httpx
        from langchain_anthropic import ChatAnthropic
        from langchain_core.messages import HumanMessage

        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
        image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")

        model = ChatAnthropic(model="claude-sonnet-4-5-20250929")
        message = HumanMessage(
            content=[
                {
                    "type": "text",
                    "text": "Can you highlight the differences between these two images?",
                },
                {
                    "type": "image",
                    "base64": image_data,
                    "mime_type": "image/jpeg",
                },
                {
                    "type": "image",
                    "url": image_url,
                },
            ],
        )
        ai_msg = model.invoke([message])
        ai_msg.content
        ```

        ```python
        "After examining both images carefully, I can see that they are actually identical."
        ```

        ??? example "Upload with Files API"

            You can also pass in files that are managed through Anthropic's
            [Files API](https://platform.claude.com/docs/en/build-with-claude/files):

            ```python
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(
                model="claude-sonnet-4-5-20250929",
                betas=["files-api-2025-04-14"],
            )
            input_message = {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Describe this document.",
                    },
                    {
                        "type": "image",
                        "id": "file_abc123...",
                    },
                ],
            }
            model.invoke([input_message])
            ```

    ???+ example "PDF input"

        See the [multimodal guide](https://docs.langchain.com/oss/python/langchain/models#multimodal)
        for more detail.

        ```python
        from base64 import b64encode
        from langchain_anthropic import ChatAnthropic
        from langchain_core.messages import HumanMessage
        import requests

        url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
        data = b64encode(requests.get(url).content).decode()

        model = ChatAnthropic(model="claude-sonnet-4-5-20250929")
        ai_msg = model.invoke(
            [
                HumanMessage(
                    [
                        "Summarize this document.",
                        {
                            "type": "file",
                            "mime_type": "application/pdf",
                            "base64": data,
                        },
                    ]
                )
            ]
        )
        ai_msg.content
        ```

        ```python
        "This appears to be a simple document..."
        ```

        ??? example "Upload with Files API"

            You can also pass in files that are managed through Anthropic's
            [Files API](https://platform.claude.com/docs/en/build-with-claude/files):

            ```python
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(
                model="claude-sonnet-4-5-20250929",
                betas=["files-api-2025-04-14"],
            )
            input_message = {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Describe this document.",
                    },
                    {
                        "type": "file",
                        "id": "file_abc123...",
                    },
                ],
            }
            model.invoke([input_message])
            ```

    ???+ example "Extended thinking"

        Certain [Claude models](https://platform.claude.com/docs/en/build-with-claude/extended-thinking#supported-models)
        support an [extended thinking](https://platform.claude.com/docs/en/build-with-claude/extended-thinking)
        feature, which will output the step-by-step reasoning process that led to its
        final answer.

        To use it, specify the `thinking` parameter when initializing `ChatAnthropic`.

        It can also be passed in as a kwarg during invocation.

        **You will need to specify a token budget** to use this feature.

        !!! example

            ```python hl_lines="5-6"
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(
                model="claude-sonnet-4-5-20250929",
                max_tokens=5000,
                thinking={"type": "enabled", "budget_tokens": 2000},
            )

            response = model.invoke("What is the cube root of 50.653?")
            response.content
            ```

            ```python
            [
                {
                    "signature": "...",
                    "thinking": "To find the cube root of 50.653...",
                    "type": "thinking",
                },
                {"text": "The cube root of 50.653 is ...", "type": "text"},
            ]
            ```

        !!! warning "Differences in thinking across model versions"

            The Claude Messages API handles thinking differently across Claude Sonnet
            3.7 and Claude 4 models.

            Refer to the [Claude docs](https://platform.claude.com/docs/en/build-with-claude/extended-thinking#differences-in-thinking-across-model-versions)
            for more info.

    ???+ example "Effort"

        Certain Claude models support an [effort](https://platform.claude.com/docs/en/build-with-claude/effort)
        feature, which will control how many tokens Claude uses when responding.

        !!! example

            ```python hl_lines="6"
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(
                model="claude-opus-4-5-20251101",
                max_tokens=4096,
                effort="medium",  # Options: "high", "medium", "low"
            )

            response = model.invoke("Analyze the trade-offs between microservices and monolithic architectures")
            print(response.content)
            ```

        See the [Claude docs](https://platform.claude.com/docs/en/build-with-claude/effort)
        for more detail on when to use different effort levels.

    ???+ example "Prompt caching"

        Prompt caching reduces processing time and costs for repetitive tasks or prompts
        with consistent elements

        !!! note
            Only certain models support prompt caching.
            See the [Claude documentation](https://platform.claude.com/docs/en/build-with-claude/prompt-caching#supported-models)
            for a full list.

        ```python hl_lines="16"
        from langchain_anthropic import ChatAnthropic

        model = ChatAnthropic(model="claude-sonnet-4-5-20250929")

        messages = [
            {
                "role": "system",
                "content": [
                    {
                        "type": "text",
                        "text": "Below is some long context:",
                    },
                    {
                        "type": "text",
                        "text": f"{long_text}",
                        "cache_control": {"type": "ephemeral"},
                    },
                ],
            },
            {
                "role": "user",
                "content": "What's that about?",
            },
        ]

        response = model.invoke(messages)
        response.usage_metadata["input_token_details"]
        ```

        ```python
        {"cache_read": 0, "cache_creation": 1458}
        ```

        Alternatively, you may enable prompt caching at invocation time. You may want to
        conditionally cache based on runtime conditions, such as the length of the
        context. This is useful for app-level decisions about what to
        cache.

        ```python hl_lines="3"
        response = model.invoke(
            messages,
            cache_control={"type": "ephemeral"},
        )
        ```

        ??? example "Extended caching"

            The cache lifetime is 5 minutes by default. If this is too short, you can
            apply one hour caching by setting `ttl` to `'1h'`.

            ```python hl_lines="12"
            model = ChatAnthropic(
                model="claude-sonnet-4-5-20250929",
            )

            messages = [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": f"{long_text}",
                            "cache_control": {"type": "ephemeral", "ttl": "1h"},
                        },
                    ],
                }
            ]

            response = model.invoke(messages)
            ```

            Details of cached token counts will be included on the `InputTokenDetails`
            of response's `usage_metadata`:

            ```python
            response = model.invoke(messages)
            response.usage_metadata
            ```

            ```python
            {
                "input_tokens": 1500,
                "output_tokens": 200,
                "total_tokens": 1700,
                "input_token_details": {
                    "cache_read": 0,
                    "cache_creation": 1000,
                    "ephemeral_1h_input_tokens": 750,
                    "ephemeral_5m_input_tokens": 250,
                },
            }
            ```

            See [Claude documentation](https://platform.claude.com/docs/en/build-with-claude/prompt-caching#1-hour-cache-duration-beta)
            for detail.

    ???+ example "Token usage metadata"

        ```python
        ai_msg = model.invoke(messages)
        ai_msg.usage_metadata
        ```

        ```python
        {"input_tokens": 25, "output_tokens": 11, "total_tokens": 36}
        ```

        Message chunks containing token usage will be included during streaming by
        default:

        ```python
        stream = model.stream(messages)
        full = next(stream)
        for chunk in stream:
            full += chunk
        full.usage_metadata
        ```

        ```python
        {"input_tokens": 25, "output_tokens": 11, "total_tokens": 36}
        ```

        These can be disabled by setting [`stream_usage=False`][langchain_anthropic.chat_models.ChatAnthropic.stream_usage]
        in the stream method or when initializing `ChatAnthropic`.

    ???+ example "Citations"

        Anthropic supports a [citations](https://platform.claude.com/docs/en/build-with-claude/citations)
        feature that lets Claude attach context to its answers based on source
        documents supplied by the user.

        When passing a [Claude document content block](https://platform.claude.com/docs/en/build-with-claude/citations#document-types)
        with `#!json "citations": {"enabled": True}` included in the query, Claude may
        generate citations in its response.

        ```python hl_lines="9-19"
        from langchain_anthropic import ChatAnthropic

        model = ChatAnthropic(model="claude-3-5-haiku-20241022")

        messages = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "document",
                        "source": {
                            "type": "text",
                            "media_type": "text/plain",
                            "data": "The grass is green. The sky is blue.",
                        },
                        "title": "My Document",
                        "context": "This is a trustworthy document.",
                        "citations": {"enabled": True},
                    },
                    {"type": "text", "text": "What color is the grass and sky?"},
                ],
            }
        ]
        response = model.invoke(messages)
        response.content
        ```

        ```python hl_lines="6-15 21-30"
        [
            {"text": "Based on the document, ", "type": "text"},
            {
                "text": "the grass is green",
                "type": "text",
                "citations": [
                    {
                        "type": "char_location",
                        "cited_text": "The grass is green. ",
                        "document_index": 0,
                        "document_title": "My Document",
                        "start_char_index": 0,
                        "end_char_index": 20,
                    }
                ],
            },
            {"text": ", and ", "type": "text"},
            {
                "text": "the sky is blue",
                "type": "text",
                "citations": [
                    {
                        "type": "char_location",
                        "cited_text": "The sky is blue.",
                        "document_index": 0,
                        "document_title": "My Document",
                        "start_char_index": 20,
                        "end_char_index": 36,
                    }
                ],
            },
            {"text": ".", "type": "text"},
        ]
        ```

    ???+ example "Context management"

        Anthropic supports a context editing feature that will automatically manage the
        model's context window (e.g., by clearing tool results).

        See [Anthropic documentation](https://platform.claude.com/docs/en/build-with-claude/context-editing)
        for details and configuration options.

        ```python hl_lines="5-6"
        from langchain_anthropic import ChatAnthropic

        model = ChatAnthropic(
            model="claude-sonnet-4-5-20250929",
            betas=["context-management-2025-06-27"],
            context_management={"edits": [{"type": "clear_tool_uses_20250919"}]},
        )
        model_with_tools = model.bind_tools([{"type": "web_search_20250305", "name": "web_search"}])
        response = model_with_tools.invoke("Search for recent developments in AI")
        ```

    ???+ example "Response metadata"

        ```python
        ai_msg = model.invoke(messages)
        ai_msg.response_metadata
        ```

        ```python
        {
            "id": "msg_013xU6FHEGEq76aP4RgFerVT",
            "model": "claude-sonnet-4-5-20250929",
            "stop_reason": "end_turn",
            "stop_sequence": None,
            "usage": {"input_tokens": 25, "output_tokens": 11},
        }
        ```

    ???+ example "Extended context windows (beta)"

        Claude Sonnet 4 supports a 1-million token context window, available in beta for
        organizations in usage tier 4 and organizations with custom rate limits.

        ```python hl_lines="5"
        from langchain_anthropic import ChatAnthropic

        model = ChatAnthropic(
            model="claude-sonnet-4-5-20250929",
            betas=["context-1m-2025-08-07"],  # Enable 1M context beta
        )

        long_document = \"\"\"
        This is a very long document that would benefit from the extended 1M
        context window...
        [imagine this continues for hundreds of thousands of tokens]
        \"\"\"

        messages = [
            HumanMessage(f\"\"\"
        Please analyze this document and provide a summary:

        {long_document}

        What are the key themes and main conclusions?
        \"\"\")
        ]

        response = model.invoke(messages)
        ```

        See [Claude documentation](https://platform.claude.com/docs/en/build-with-claude/context-windows#1m-token-context-window)
        for detail.

    ???+ example "Structured output"

        ```python hl_lines="13"
        from typing import Optional
        from pydantic import BaseModel, Field


        class Joke(BaseModel):
            '''Joke to tell user.'''

            setup: str = Field(description="The setup of the joke")
            punchline: str = Field(description="The punchline to the joke")
            rating: int | None = Field(description="How funny the joke is, from 1 to 10")


        structured_model = model.with_structured_output(Joke)
        structured_model.invoke("Tell me a joke about cats")
        ```

        ```python
        Joke(
            setup="Why was the cat sitting on the computer?",
            punchline="To keep an eye on the mouse!",
            rating=None,
        )
        ```

        See [`ChatAnthropic.with_structured_output()`][langchain_anthropic.chat_models.ChatAnthropic.with_structured_output]
        for more info.

        !!! note "Native structured output"

            Anthropic supports a native structured output feature that guarantees
            responses adhere to a given schema.

            See [`ChatAnthropic.with_structured_output()`][langchain_anthropic.chat_models.ChatAnthropic.with_structured_output]
            for more info.

    ???+ example "Built-in tools"

        See LangChain [docs](https://docs.langchain.com/oss/python/integrations/chat/anthropic#built-in-tools)
        for more detail.

        ??? example "Web search"

            ```python hl_lines="5-9"
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(model="claude-3-5-haiku-20241022")

            tool = {
                "type": "web_search_20250305",
                "name": "web_search",
                "max_uses": 3,
            }
            model_with_tools = model.bind_tools([tool])

            response = model_with_tools.invoke("How do I update a web app to TypeScript 5.5?")
            ```

            See the [Claude docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/web-search-tool)
            for more info.

        ??? example "Web fetch (beta)"

            ```python hl_lines="7-11"
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(
                model="claude-3-5-haiku-20241022",
            )

            tool = {
                "type": "web_fetch_20250910",
                "name": "web_fetch",
                "max_uses": 3,
            }
            model_with_tools = model.bind_tools([tool])

            response = model_with_tools.invoke("Please analyze the content at https://example.com/article")
            ```

            !!! note "Automatic beta header"

                The required `web-fetch-2025-09-10` beta header is automatically
                appended to the request when using the `web_fetch_20250910` tool type.
                You don't need to manually specify it in the `betas` parameter.

            See the [Claude docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/web-fetch-tool)
            for more info.

        ??? example "Code execution"

            ```python hl_lines="3-6"
            model = ChatAnthropic(model="claude-sonnet-4-5-20250929")

            tool = {
                "type": "code_execution_20250522",
                "name": "code_execution",
            }
            model_with_tools = model.bind_tools([tool])

            response = model_with_tools.invoke(
                "Calculate the mean and standard deviation of [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"
            )
            ```

            !!! note "Automatic beta header"

                The required `code-execution-2025-05-22` beta header is automatically
                appended to the request when using the `code_execution_20250522` tool
                type. You don't need to manually specify it in the `betas` parameter.

            See the [Claude docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/code-execution-tool)
            for more info.

        ??? example "Memory tool"

            ```python hl_lines="5-8"
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(model="claude-sonnet-4-5-20250929")

            tool = {
                "type": "memory_20250818",
                "name": "memory",
            }
            model_with_tools = model.bind_tools([tool])

            response = model_with_tools.invoke("What are my interests?")
            ```

            !!! note "Automatic beta header"

                The required `context-management-2025-06-27` beta header is automatically
                appended to the request when using the `memory_20250818` tool type.
                You don't need to manually specify it in the `betas` parameter.

            See the [Claude docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/memory-tool)
            for more info.

        ??? example "Remote MCP"

            ```python hl_lines="3-14 18-19"
            from langchain_anthropic import ChatAnthropic

            mcp_servers = [
                {
                    "type": "url",
                    "url": "https://mcp.deepwiki.com/mcp",
                    "name": "deepwiki",
                    "tool_configuration": {  # optional configuration
                        "enabled": True,
                        "allowed_tools": ["ask_question"],
                    },
                    "authorization_token": "PLACEHOLDER",  # optional authorization
                }
            ]

            model = ChatAnthropic(
                model="claude-sonnet-4-5-20250929",
                betas=["mcp-client-2025-04-04"],  # Enable MCP client beta
                mcp_servers=mcp_servers,  # Pass in MCP server configurations
            )

            response = model.invoke(
                "What transport protocols does the 2025-03-26 version of the MCP "
                "spec (modelcontextprotocol/modelcontextprotocol) support?"
            )
            ```

            See the [Claude docs](https://platform.claude.com/docs/en/agents-and-tools/mcp-connector)
            for more info.

        ??? example "Text editor"

            ```python hl_lines="5-8"
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(model="claude-sonnet-4-5-20250929")

            tool = {
                "type": "text_editor_20250124",
                "name": "str_replace_editor",
            }
            model_with_tools = model.bind_tools([tool])

            response = model_with_tools.invoke(
                "There's a syntax error in my primes.py file. Can you help me fix it?"
            )
            print(response.text)
            response.tool_calls
            ```

            ```txt
            I'd be happy to help you fix the syntax error in your primes.py file. First, let's look at the current content of the file to identify the error.
            ```

            ```txt
            [{'name': 'str_replace_editor',
            'args': {'command': 'view', 'path': '/repo/primes.py'},
            'id': 'toolu_01VdNgt1YV7kGfj9LFLm6HyQ',
            'type': 'tool_call'}]
            ```

            See the [Claude docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/text-editor-tool)
            for more info.

        ??? example "Tool search"

            Tool search enables Claude to dynamically discover and load tools on-demand
            instead of loading all tool definitions upfront. See the
            [LangChain docs](https://docs.langchain.com/oss/python/integrations/chat/anthropic#tool-search)
            for more detail.

            ```python hl_lines="8-11 26 36"
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(
                model="claude-sonnet-4-5-20250929",
            )

            tools = [
                {
                    "type": "tool_search_tool_regex_20251119",
                    "name": "tool_search_tool_regex",
                },
                {
                    "name": "get_weather",
                    "description": "Get the current weather for a location",
                    "input_schema": {
                        "type": "object",
                        "properties": {
                            "location": {"type": "string", "description": "City name"},
                            "unit": {
                                "type": "string",
                                "enum": ["celsius", "fahrenheit"],
                            },
                        },
                        "required": ["location"],
                    },
                    "defer_loading": True,  # Tool is loaded on-demand
                },
                {
                    "name": "search_files",
                    "description": "Search through files in the workspace",
                    "input_schema": {
                        "type": "object",
                        "properties": {
                            "query": {"type": "string"},
                        },
                        "required": ["query"],
                    },
                    "defer_loading": True,  # Tool is loaded on-demand
                },
                ...,
            ]

            model_with_tools = model.bind_tools(tools)
            response = model_with_tools.invoke("What's the weather in San Francisco?")
            ```

            !!! note "Automatic beta header"

                The required `advanced-tool-use-2025-11-20` beta header is automatically
                appended to the request when using tool search tools.

            !!! tip "Best practices"

                - Tools with `defer_loading: True` are only loaded when Claude discovers them via search
                - Keep your 3-5 most frequently used tools as non-deferred for optimal performance
                - Both variants search tool names, descriptions, argument names, and argument descriptions

            See the [Claude docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/tool-search-tool)
            for more info.
    """  # noqa: E501

    model_config = ConfigDict(
        populate_by_name=True,
    )

    model: str = Field(alias="model_name")
    """Model name to use."""

    max_tokens: int | None = Field(default=None, alias="max_tokens_to_sample")
    """Denotes the number of tokens to predict per generation."""

    temperature: float | None = None
    """A non-negative float that tunes the degree of randomness in generation."""

    top_k: int | None = None
    """Number of most likely tokens to consider at each step."""

    top_p: float | None = None
    """Total probability mass of tokens to consider at each step."""

    default_request_timeout: float | None = Field(None, alias="timeout")
    """Timeout for requests to Claude API."""

    # sdk default = 2: https://github.com/anthropics/anthropic-sdk-python?tab=readme-ov-file#retries
    max_retries: int = 2
    """Number of retries allowed for requests sent to the Claude API."""

    stop_sequences: list[str] | None = Field(None, alias="stop")
    """Default stop sequences."""

    anthropic_api_url: str | None = Field(
        alias="base_url",
        default_factory=from_env(
            ["ANTHROPIC_API_URL", "ANTHROPIC_BASE_URL"],
            default="https://api.anthropic.com",
        ),
    )
    """Base URL for API requests. Only specify if using a proxy or service emulator.

    If a value isn't passed in, will attempt to read the value first from
    `ANTHROPIC_API_URL` and if that is not set, `ANTHROPIC_BASE_URL`.
    """

    anthropic_api_key: SecretStr = Field(
        alias="api_key",
        default_factory=secret_from_env("ANTHROPIC_API_KEY", default=""),
    )
    """Automatically read from env var `ANTHROPIC_API_KEY` if not provided."""

    anthropic_proxy: str | None = Field(
        default_factory=from_env("ANTHROPIC_PROXY", default=None)
    )
    """Proxy to use for the Anthropic clients, will be used for every API call.

    If not provided, will attempt to read from the `ANTHROPIC_PROXY` environment
    variable.
    """

    default_headers: Mapping[str, str] | None = None
    """Headers to pass to the Anthropic clients, will be used for every API call."""

    betas: list[str] | None = None
    """List of beta features to enable. If specified, invocations will be routed
    through `client.beta.messages.create`.

    Example: `#!python betas=["mcp-client-2025-04-04"]`
    """
    # Can also be passed in w/ model_kwargs, but having it as a param makes better devx
    #
    # Precedence order:
    # 1. Call-time kwargs (e.g., llm.invoke(..., betas=[...]))
    # 2. model_kwargs (e.g., ChatAnthropic(model_kwargs={"betas": [...]}))
    # 3. Direct parameter (e.g., ChatAnthropic(betas=[...]))

    model_kwargs: dict[str, Any] = Field(default_factory=dict)

    streaming: bool = False
    """Whether to use streaming or not."""

    stream_usage: bool = True
    """Whether to include usage metadata in streaming output.

    If `True`, additional message chunks will be generated during the stream including
    usage metadata.
    """

    thinking: dict[str, Any] | None = Field(default=None)
    """Parameters for Claude reasoning,

    e.g., `#!python {"type": "enabled", "budget_tokens": 10_000}`
    """

    effort: Literal["high", "medium", "low"] | None = None
    """Control how many tokens Claude uses when responding.

    This parameter will be merged into the `output_config` parameter when making
    API calls.

    Example: `effort="medium"`

    !!! note

        Setting `effort` to `'high'` produces exactly the same behavior as omitting the
        parameter altogether.

    !!! note "Model Support"

        This feature is currently only supported by Claude Opus 4.5.

    !!! note "Automatic beta header"

        The required `effort-2025-11-24` beta header is
        automatically appended to the request when using `effort`, so you
        don't need to manually specify it in the `betas` parameter.
    """

    mcp_servers: list[dict[str, Any]] | None = None
    """List of MCP servers to use for the request.

    Example: `#!python mcp_servers=[{"type": "url", "url": "https://mcp.example.com/mcp",
    "name": "example-mcp"}]`
    """

    context_management: dict[str, Any] | None = None
    """Configuration for
    [context management](https://platform.claude.com/docs/en/build-with-claude/context-editing).
    """

    @property
    def _llm_type(self) -> str:
        """Return type of chat model."""
        return "anthropic-chat"

    @property
    def lc_secrets(self) -> dict[str, str]:
        """Return a mapping of secret keys to environment variables."""
        return {
            "anthropic_api_key": "ANTHROPIC_API_KEY",
            "mcp_servers": "ANTHROPIC_MCP_SERVERS",
        }

    @classmethod
    def is_lc_serializable(cls) -> bool:
        """Whether the class is serializable in langchain."""
        return True

    @classmethod
    def get_lc_namespace(cls) -> list[str]:
        """Get the namespace of the LangChain object.

        Returns:
            `["langchain", "chat_models", "anthropic"]`
        """
        return ["langchain", "chat_models", "anthropic"]

    @property
    def _identifying_params(self) -> dict[str, Any]:
        """Get the identifying parameters."""
        return {
            "model": self.model,
            "max_tokens": self.max_tokens,
            "temperature": self.temperature,
            "top_k": self.top_k,
            "top_p": self.top_p,
            "model_kwargs": self.model_kwargs,
            "streaming": self.streaming,
            "max_retries": self.max_retries,
            "default_request_timeout": self.default_request_timeout,
            "thinking": self.thinking,
        }

    def _get_ls_params(
        self,
        stop: list[str] | None = None,
        **kwargs: Any,
    ) -> LangSmithParams:
        """Get standard params for tracing."""
        params = self._get_invocation_params(stop=stop, **kwargs)
        ls_params = LangSmithParams(
            ls_provider="anthropic",
            ls_model_name=params.get("model", self.model),
            ls_model_type="chat",
            ls_temperature=params.get("temperature", self.temperature),
        )
        if ls_max_tokens := params.get("max_tokens", self.max_tokens):
            ls_params["ls_max_tokens"] = ls_max_tokens
        if ls_stop := stop or params.get("stop", None):
            ls_params["ls_stop"] = ls_stop
        return ls_params

    @model_validator(mode="before")
    @classmethod
    def set_default_max_tokens(cls, values: dict[str, Any]) -> Any:
        """Set default `max_tokens`."""
        if values.get("max_tokens") is None:
            model = values.get("model") or values.get("model_name")
            values["max_tokens"] = _default_max_tokens_for(model)
        return values

    @model_validator(mode="before")
    @classmethod
    def build_extra(cls, values: dict) -> Any:
        """Build model kwargs."""
        all_required_field_names = get_pydantic_field_names(cls)
        return _build_model_kwargs(values, all_required_field_names)

    @model_validator(mode="after")
    def _set_model_profile(self) -> Self:
        """Set model profile if not overridden."""
        if self.profile is None:
            self.profile = _get_default_model_profile(self.model)
        return self

    @cached_property
    def _client_params(self) -> dict[str, Any]:
        client_params: dict[str, Any] = {
            "api_key": self.anthropic_api_key.get_secret_value(),
            "base_url": self.anthropic_api_url,
            "max_retries": self.max_retries,
            "default_headers": (self.default_headers or None),
        }
        # value <= 0 indicates the param should be ignored. None is a meaningful value
        # for Anthropic client and treated differently than not specifying the param at
        # all.
        if self.default_request_timeout is None or self.default_request_timeout > 0:
            client_params["timeout"] = self.default_request_timeout

        return client_params

    @cached_property
    def _client(self) -> anthropic.Client:
        client_params = self._client_params
        http_client_params = {"base_url": client_params["base_url"]}
        if "timeout" in client_params:
            http_client_params["timeout"] = client_params["timeout"]
        if self.anthropic_proxy:
            http_client_params["anthropic_proxy"] = self.anthropic_proxy
        http_client = _get_default_httpx_client(**http_client_params)
        params = {
            **client_params,
            "http_client": http_client,
        }
        return anthropic.Client(**params)

    @cached_property
    def _async_client(self) -> anthropic.AsyncClient:
        client_params = self._client_params
        http_client_params = {"base_url": client_params["base_url"]}
        if "timeout" in client_params:
            http_client_params["timeout"] = client_params["timeout"]
        if self.anthropic_proxy:
            http_client_params["anthropic_proxy"] = self.anthropic_proxy
        http_client = _get_default_async_httpx_client(**http_client_params)
        params = {
            **client_params,
            "http_client": http_client,
        }
        return anthropic.AsyncClient(**params)

    def _get_request_payload(
        self,
        input_: LanguageModelInput,
        *,
        stop: list[str] | None = None,
        **kwargs: dict,
    ) -> dict:
        """Get the request payload for the Anthropic API."""
        messages = self._convert_input(input_).to_messages()

        for idx, message in enumerate(messages):
            # Translate v1 content
            if (
                isinstance(message, AIMessage)
                and message.response_metadata.get("output_version") == "v1"
            ):
                tcs: list[types.ToolCall] = [
                    {
                        "type": "tool_call",
                        "name": tool_call["name"],
                        "args": tool_call["args"],
                        "id": tool_call.get("id"),
                    }
                    for tool_call in message.tool_calls
                ]
                messages[idx] = message.model_copy(
                    update={
                        "content": _convert_from_v1_to_anthropic(
                            cast(list[types.ContentBlock], message.content),
                            tcs,
                            message.response_metadata.get("model_provider"),
                        )
                    }
                )

        system, formatted_messages = _format_messages(messages)

        # If cache_control is provided in kwargs, add it to last message
        # and content block.
        if "cache_control" in kwargs and formatted_messages:
            if isinstance(formatted_messages[-1]["content"], list):
                formatted_messages[-1]["content"][-1]["cache_control"] = kwargs.pop(
                    "cache_control"
                )
            elif isinstance(formatted_messages[-1]["content"], str):
                formatted_messages[-1]["content"] = [
                    {
                        "type": "text",
                        "text": formatted_messages[-1]["content"],
                        "cache_control": kwargs.pop("cache_control"),
                    }
                ]
            else:
                pass

        # If cache_control remains in kwargs, it would be passed as a top-level param
        # to the API, but Anthropic expects it nested within a message
        _ = kwargs.pop("cache_control", None)

        payload = {
            "model": self.model,
            "max_tokens": self.max_tokens,
            "messages": formatted_messages,
            "temperature": self.temperature,
            "top_k": self.top_k,
            "top_p": self.top_p,
            "stop_sequences": stop or self.stop_sequences,
            "betas": self.betas,
            "context_management": self.context_management,
            "mcp_servers": self.mcp_servers,
            "system": system,
            **self.model_kwargs,
            **kwargs,
        }
        if self.thinking is not None:
            payload["thinking"] = self.thinking

        # Handle output_config and effort parameter
        # Priority: self.effort > payload output_config
        output_config = payload.get("output_config", {})
        output_config = output_config.copy() if isinstance(output_config, dict) else {}

        if self.effort:
            output_config["effort"] = self.effort

        if output_config:
            payload["output_config"] = output_config

            # Auto-append required beta for effort
            if "effort" in output_config:
                required_beta = "effort-2025-11-24"
                if payload["betas"]:
                    # Merge with existing betas
                    if required_beta not in payload["betas"]:
                        payload["betas"] = [*payload["betas"], required_beta]
                else:
                    payload["betas"] = [required_beta]

        if "response_format" in payload:
            # response_format present when using agents.create_agent's ProviderStrategy
            # ---
            # ProviderStrategy converts to OpenAI-style format, which passes kwargs to
            # ChatAnthropic, ending up in our payload
            response_format = payload.pop("response_format")
            if (
                isinstance(response_format, dict)
                and response_format.get("type") == "json_schema"
                and "schema" in response_format.get("json_schema", {})
            ):
                response_format = cast(dict, response_format["json_schema"]["schema"])
            # Convert OpenAI-style response_format to Anthropic's output_format
            payload["output_format"] = _convert_to_anthropic_output_format(
                response_format
            )

        if "output_format" in payload:
            # Native structured output requires the structured outputs beta
            if payload["betas"]:
                if "structured-outputs-2025-11-13" not in payload["betas"]:
                    # Merge with existing betas
                    payload["betas"] = [
                        *payload["betas"],
                        "structured-outputs-2025-11-13",
                    ]
            else:
                payload["betas"] = ["structured-outputs-2025-11-13"]

        # Check if any tools have strict mode enabled
        if "tools" in payload and isinstance(payload["tools"], list):
            has_strict_tool = any(
                isinstance(tool, dict) and tool.get("strict") is True
                for tool in payload["tools"]
            )
            if has_strict_tool:
                # Strict tool use requires the structured outputs beta
                if payload["betas"]:
                    if "structured-outputs-2025-11-13" not in payload["betas"]:
                        # Merge with existing betas
                        payload["betas"] = [
                            *payload["betas"],
                            "structured-outputs-2025-11-13",
                        ]
                else:
                    payload["betas"] = ["structured-outputs-2025-11-13"]

            # Auto-append required betas for specific tool types
            for tool in payload["tools"]:
                if isinstance(tool, dict) and "type" in tool:
                    tool_type = tool["type"]
                    if tool_type in _TOOL_TYPE_TO_BETA:
                        required_beta = _TOOL_TYPE_TO_BETA[tool_type]
                        if payload["betas"]:
                            # Append to existing betas if not already present
                            if required_beta not in payload["betas"]:
                                payload["betas"] = [*payload["betas"], required_beta]
                        else:
                            payload["betas"] = [required_beta]

        # Auto-append required beta for mcp_servers
        if payload.get("mcp_servers"):
            required_beta = "mcp-client-2025-11-20"
            if payload["betas"]:
                # Append to existing betas if not already present
                if required_beta not in payload["betas"]:
                    payload["betas"] = [*payload["betas"], required_beta]
            else:
                payload["betas"] = [required_beta]

        return {k: v for k, v in payload.items() if v is not None}

    def _create(self, payload: dict) -> Any:
        if "betas" in payload:
            return self._client.beta.messages.create(**payload)
        return self._client.messages.create(**payload)

    async def _acreate(self, payload: dict) -> Any:
        if "betas" in payload:
            return await self._async_client.beta.messages.create(**payload)
        return await self._async_client.messages.create(**payload)

    def _stream(
        self,
        messages: list[BaseMessage],
        stop: list[str] | None = None,
        run_manager: CallbackManagerForLLMRun | None = None,
        *,
        stream_usage: bool | None = None,
        **kwargs: Any,
    ) -> Iterator[ChatGenerationChunk]:
        if stream_usage is None:
            stream_usage = self.stream_usage
        kwargs["stream"] = True
        payload = self._get_request_payload(messages, stop=stop, **kwargs)
        try:
            stream = self._create(payload)
            coerce_content_to_string = (
                not _tools_in_params(payload)
                and not _documents_in_params(payload)
                and not _thinking_in_params(payload)
            )
            block_start_event = None
            for event in stream:
                msg, block_start_event = _make_message_chunk_from_anthropic_event(
                    event,
                    stream_usage=stream_usage,
                    coerce_content_to_string=coerce_content_to_string,
                    block_start_event=block_start_event,
                )
                if msg is not None:
                    chunk = ChatGenerationChunk(message=msg)
                    if run_manager and isinstance(msg.content, str):
                        run_manager.on_llm_new_token(msg.content, chunk=chunk)
                    yield chunk
        except anthropic.BadRequestError as e:
            _handle_anthropic_bad_request(e)

    async def _astream(
        self,
        messages: list[BaseMessage],
        stop: list[str] | None = None,
        run_manager: AsyncCallbackManagerForLLMRun | None = None,
        *,
        stream_usage: bool | None = None,
        **kwargs: Any,
    ) -> AsyncIterator[ChatGenerationChunk]:
        if stream_usage is None:
            stream_usage = self.stream_usage
        kwargs["stream"] = True
        payload = self._get_request_payload(messages, stop=stop, **kwargs)
        try:
            stream = await self._acreate(payload)
            coerce_content_to_string = (
                not _tools_in_params(payload)
                and not _documents_in_params(payload)
                and not _thinking_in_params(payload)
            )
            block_start_event = None
            async for event in stream:
                msg, block_start_event = _make_message_chunk_from_anthropic_event(
                    event,
                    stream_usage=stream_usage,
                    coerce_content_to_string=coerce_content_to_string,
                    block_start_event=block_start_event,
                )
                if msg is not None:
                    chunk = ChatGenerationChunk(message=msg)
                    if run_manager and isinstance(msg.content, str):
                        await run_manager.on_llm_new_token(msg.content, chunk=chunk)
                    yield chunk
        except anthropic.BadRequestError as e:
            _handle_anthropic_bad_request(e)

    def _format_output(self, data: Any, **kwargs: Any) -> ChatResult:
        """Format the output from the Anthropic API to LC."""
        data_dict = data.model_dump()
        content = data_dict["content"]

        # Remove citations if they are None - introduced in anthropic sdk 0.45
        for block in content:
            if (
                isinstance(block, dict)
                and "citations" in block
                and block["citations"] is None
            ):
                block.pop("citations")
            if (
                isinstance(block, dict)
                and block.get("type") == "thinking"
                and "text" in block
                and block["text"] is None
            ):
                block.pop("text")

        llm_output = {
            k: v for k, v in data_dict.items() if k not in ("content", "role", "type")
        }
        response_metadata = {"model_provider": "anthropic"}
        if "model" in llm_output and "model_name" not in llm_output:
            llm_output["model_name"] = llm_output["model"]
        if (
            len(content) == 1
            and content[0]["type"] == "text"
            and not content[0].get("citations")
        ):
            msg = AIMessage(
                content=content[0]["text"], response_metadata=response_metadata
            )
        elif any(block["type"] == "tool_use" for block in content):
            tool_calls = extract_tool_calls(content)
            msg = AIMessage(
                content=content,
                tool_calls=tool_calls,
                response_metadata=response_metadata,
            )
        else:
            msg = AIMessage(content=content, response_metadata=response_metadata)
        msg.usage_metadata = _create_usage_metadata(data.usage)
        return ChatResult(
            generations=[ChatGeneration(message=msg)],
            llm_output=llm_output,
        )

    def _generate(
        self,
        messages: list[BaseMessage],
        stop: list[str] | None = None,
        run_manager: CallbackManagerForLLMRun | None = None,
        **kwargs: Any,
    ) -> ChatResult:
        payload = self._get_request_payload(messages, stop=stop, **kwargs)
        try:
            data = self._create(payload)
        except anthropic.BadRequestError as e:
            _handle_anthropic_bad_request(e)
        return self._format_output(data, **kwargs)

    async def _agenerate(
        self,
        messages: list[BaseMessage],
        stop: list[str] | None = None,
        run_manager: AsyncCallbackManagerForLLMRun | None = None,
        **kwargs: Any,
    ) -> ChatResult:
        payload = self._get_request_payload(messages, stop=stop, **kwargs)
        try:
            data = await self._acreate(payload)
        except anthropic.BadRequestError as e:
            _handle_anthropic_bad_request(e)
        return self._format_output(data, **kwargs)

    def _get_llm_for_structured_output_when_thinking_is_enabled(
        self,
        schema: dict | type,
        formatted_tool: AnthropicTool,
    ) -> Runnable[LanguageModelInput, BaseMessage]:
        thinking_admonition = (
            "Anthropic structured output relies on forced tool calling, "
            "which is not supported when `thinking` is enabled. This method will raise "
            "langchain_core.exceptions.OutputParserException if tool calls are not "
            "generated. Consider disabling `thinking` or adjust your prompt to ensure "
            "the tool is called."
        )
        warnings.warn(thinking_admonition, stacklevel=2)
        llm = self.bind_tools(
            [schema],
            ls_structured_output_format={
                "kwargs": {"method": "function_calling"},
                "schema": formatted_tool,
            },
        )

        def _raise_if_no_tool_calls(message: AIMessage) -> AIMessage:
            if not message.tool_calls:
                raise OutputParserException(thinking_admonition)
            return message

        return llm | _raise_if_no_tool_calls

    def bind_tools(
        self,
        tools: Sequence[dict[str, Any] | type | Callable | BaseTool],
        *,
        tool_choice: dict[str, str] | str | None = None,
        parallel_tool_calls: bool | None = None,
        strict: bool | None = None,
        **kwargs: Any,
    ) -> Runnable[LanguageModelInput, AIMessage]:
        r"""Bind tool-like objects to this chat model.

        Args:
            tools: A list of tool definitions to bind to this chat model.

                Supports Anthropic format tool schemas and any tool definition handled
                by `langchain_core.utils.function_calling.convert_to_openai_tool`.
            tool_choice: Which tool to require the model to call. Options are:

                - Name of the tool as a string or as dict `{"type": "tool", "name": "<<tool_name>>"}`: calls corresponding tool
                - `'auto'`, `{"type: "auto"}`, or `None`: automatically selects a tool (including no tool)
                - `'any'` or `{"type: "any"}`: force at least one tool to be called
            parallel_tool_calls: Set to `False` to disable parallel tool use.

                Defaults to `None` (no specification, which allows parallel tool use).

                !!! version-added "Added in `langchain-anthropic` 0.3.2"
            strict: If `True`, Claude's schema adherence is applied to tool calls.

                See the [Claude docs](https://platform.claude.com/docs/en/build-with-claude/structured-outputs#when-to-use-json-outputs-vs-strict-tool-use).
            kwargs: Any additional parameters are passed directly to `bind`.

        ???+ example
            ```python
            from langchain_anthropic import ChatAnthropic
            from pydantic import BaseModel, Field


            class GetWeather(BaseModel):
                '''Get the current weather in a given location'''

                location: str = Field(..., description="The city and state, e.g. San Francisco, CA")


            class GetPrice(BaseModel):
                '''Get the price of a specific product.'''

                product: str = Field(..., description="The product to look up.")


            model = ChatAnthropic(model="claude-sonnet-4-5-20250929", temperature=0)
            model_with_tools = model.bind_tools([GetWeather, GetPrice])
            model_with_tools.invoke(
                "What is the weather like in San Francisco",
            )
            # -> AIMessage(
            #     content=[
            #         {'text': '<thinking>\nBased on the user\'s question, the relevant function to call is GetWeather, which requires the "location" parameter.\n\nThe user has directly specified the location as "San Francisco". Since San Francisco is a well known city, I can reasonably infer they mean San Francisco, CA without needing the state specified.\n\nAll the required parameters are provided, so I can proceed with the API call.\n</thinking>', 'type': 'text'},
            #         {'text': None, 'type': 'tool_use', 'id': 'toolu_01SCgExKzQ7eqSkMHfygvYuu', 'name': 'GetWeather', 'input': {'location': 'San Francisco, CA'}}
            #     ],
            #     response_metadata={'id': 'msg_01GM3zQtoFv8jGQMW7abLnhi', 'model': 'claude-sonnet-4-5-20250929', 'stop_reason': 'tool_use', 'stop_sequence': None, 'usage': {'input_tokens': 487, 'output_tokens': 145}},
            #     id='run-87b1331e-9251-4a68-acef-f0a018b639cc-0'
            # )
            ```

        ??? example "Force tool call with tool_choice `'any'`"

            ```python
            from langchain_anthropic import ChatAnthropic
            from pydantic import BaseModel, Field


            class GetWeather(BaseModel):
                '''Get the current weather in a given location'''

                location: str = Field(..., description="The city and state, e.g. San Francisco, CA")


            class GetPrice(BaseModel):
                '''Get the price of a specific product.'''

                product: str = Field(..., description="The product to look up.")


            model = ChatAnthropic(model="claude-sonnet-4-5-20250929", temperature=0)
            model_with_tools = model.bind_tools([GetWeather, GetPrice], tool_choice="any")
            model_with_tools.invoke(
                "what is the weather like in San Francisco",
            )
            ```

        ??? example "Force specific tool call with `tool_choice` `'<name_of_tool>'`"

            ```python
            from langchain_anthropic import ChatAnthropic
            from pydantic import BaseModel, Field


            class GetWeather(BaseModel):
                '''Get the current weather in a given location'''

                location: str = Field(..., description="The city and state, e.g. San Francisco, CA")


            class GetPrice(BaseModel):
                '''Get the price of a specific product.'''

                product: str = Field(..., description="The product to look up.")


            model = ChatAnthropic(model="claude-sonnet-4-5-20250929", temperature=0)
            model_with_tools = model.bind_tools([GetWeather, GetPrice], tool_choice="GetWeather")
            model_with_tools.invoke("What is the weather like in San Francisco")
            ```

        ??? example "Cache specific tools"

            ```python
            from langchain_anthropic import ChatAnthropic, convert_to_anthropic_tool
            from pydantic import BaseModel, Field


            class GetWeather(BaseModel):
                '''Get the current weather in a given location'''

                location: str = Field(..., description="The city and state, e.g. San Francisco, CA")


            class GetPrice(BaseModel):
                '''Get the price of a specific product.'''

                product: str = Field(..., description="The product to look up.")


            # We'll convert our pydantic class to the anthropic tool format
            # before passing to bind_tools so that we can set the 'cache_control'
            # field on our tool.
            cached_price_tool = convert_to_anthropic_tool(GetPrice)

            # Currently the only supported "cache_control" value is {"type": "ephemeral"}
            cached_price_tool["cache_control"] = {"type": "ephemeral"}

            # Need to pass in extra headers to enable use of the beta cache control API.
            model = ChatAnthropic(
                model="claude-sonnet-4-5-20250929",
                temperature=0,
            )
            model_with_tools = model.bind_tools([GetWeather, cached_price_tool])
            model_with_tools.invoke("What is the weather like in San Francisco")
            ```

            This outputs:

            ```python
            AIMessage(
                content=[
                    {
                        "text": "Certainly! I can help you find out the current weather in San Francisco. To get this information, I'll use the GetWeather function. Let me fetch that data for you right away.",
                        "type": "text",
                    },
                    {
                        "id": "toolu_01TS5h8LNo7p5imcG7yRiaUM",
                        "input": {"location": "San Francisco, CA"},
                        "name": "GetWeather",
                        "type": "tool_use",
                    },
                ],
                response_metadata={
                    "id": "msg_01Xg7Wr5inFWgBxE5jH9rpRo",
                    "model": "claude-sonnet-4-5-20250929",
                    "stop_reason": "tool_use",
                    "stop_sequence": None,
                    "usage": {
                        "input_tokens": 171,
                        "output_tokens": 96,
                        "cache_creation_input_tokens": 1470,
                        "cache_read_input_tokens": 0,
                    },
                },
                id="run-b36a5b54-5d69-470e-a1b0-b932d00b089e-0",
                tool_calls=[
                    {
                        "name": "GetWeather",
                        "args": {"location": "San Francisco, CA"},
                        "id": "toolu_01TS5h8LNo7p5imcG7yRiaUM",
                        "type": "tool_call",
                    }
                ],
                usage_metadata={
                    "input_tokens": 171,
                    "output_tokens": 96,
                    "total_tokens": 267,
                },
            )
            ```

            If we invoke the tool again, we can see that the "usage" information in the `AIMessage.response_metadata` shows that we had a cache hit:

            ```python hl_lines="23"
            AIMessage(
                content=[
                    {
                        "text": "To get the current weather in San Francisco, I can use the GetWeather function. Let me check that for you.",
                        "type": "text",
                    },
                    {
                        "id": "toolu_01HtVtY1qhMFdPprx42qU2eA",
                        "input": {"location": "San Francisco, CA"},
                        "name": "GetWeather",
                        "type": "tool_use",
                    },
                ],
                response_metadata={
                    "id": "msg_016RfWHrRvW6DAGCdwB6Ac64",
                    "model": "claude-sonnet-4-5-20250929",
                    "stop_reason": "tool_use",
                    "stop_sequence": None,
                    "usage": {
                        "input_tokens": 171,
                        "output_tokens": 82,
                        "cache_creation_input_tokens": 0,
                        "cache_read_input_tokens": 1470,
                    },
                },
                id="run-88b1f825-dcb7-4277-ac27-53df55d22001-0",
                tool_calls=[
                    {
                        "name": "GetWeather",
                        "args": {"location": "San Francisco, CA"},
                        "id": "toolu_01HtVtY1qhMFdPprx42qU2eA",
                        "type": "tool_call",
                    }
                ],
                usage_metadata={
                    "input_tokens": 171,
                    "output_tokens": 82,
                    "total_tokens": 253,
                },
            )
            ```

        ??? example "Computer use tool"

            Claude supports computer use capabilities, allowing it to interact with
            desktop environments through screenshots, mouse control, and keyboard input.

            !!! warning "Execution environment required"

                LangChain handles the API integration, but **you must provide**:

                - A sandboxed computing environment (Docker, VM, etc.)
                - A virtual display (e.g., Xvfb)
                - Code to execute tool calls (screenshot, clicks, typing)
                - An agent loop to pass results back to Claude

                Anthropic provides a [reference implementation](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo).

            !!! note

                Computer use requires:

                - Claude Opus 4.5, Claude 4, or Claude Sonnet 3.7
                - A sandboxed computing environment with virtual display

            See the [Claude docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/computer-use-tool)
            for setup instructions, model capability, and best practices.

            ```python
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(model="claude-sonnet-4-5-20250929")

            # LangChain handles the API call and tool binding
            computer_tool = {
                "type": "computer_20250124",
                "name": "computer",
                "display_width_px": 1024,
                "display_height_px": 768,
                "display_number": 1,
            }

            model_with_computer = model.bind_tools([computer_tool])
            response = model_with_computer.invoke("Take a screenshot to see what's on the screen")

            # response.tool_calls contains the action Claude wants to perform
            # You must execute this action in your environment and pass the result back
            ```

            !!! note "Automatic beta header"

                The required beta header is automatically appended based on the tool
                version. For `computer_20250124` and `computer_20251124`, the respective
                `computer-use-2025-01-24` and `computer-use-2025-11-24` beta header is
                added automatically.

        ??? example "Strict tool use"

            Strict tool use guarantees that tool names and arguments are validated
            and correctly typed.

            !!! note

                Strict tool use requires:

                - Claude Sonnet 4.5 or Opus 4.1
                - `langchain-anthropic>=1.1.0`

            To enable strict tool use, specify `strict=True` when calling `bind_tools`.

            ```python hl_lines="11"
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(
                model="claude-sonnet-4-5",
            )

            def get_weather(location: str) -> str:
                \"\"\"Get the weather at a location.\"\"\"
                return "It's sunny."

            model_with_tools = model.bind_tools([get_weather], strict=True)
            ```

            !!! note "Automatic beta header"

                The required `structured-outputs-2025-11-13` beta header is
                automatically appended to the request when using `strict=True`, so you
                don't need to manually specify it in the `betas` parameter.

            See LangChain [docs](https://docs.langchain.com/oss/python/integrations/chat/anthropic#strict-tool-use)
            for more detail.
        """  # noqa: E501
        formatted_tools = [
            tool
            if _is_builtin_tool(tool)
            else convert_to_anthropic_tool(tool, strict=strict)
            for tool in tools
        ]
        if not tool_choice:
            pass
        elif isinstance(tool_choice, dict):
            kwargs["tool_choice"] = tool_choice
        elif isinstance(tool_choice, str) and tool_choice in ("any", "auto"):
            kwargs["tool_choice"] = {"type": tool_choice}
        elif isinstance(tool_choice, str):
            kwargs["tool_choice"] = {"type": "tool", "name": tool_choice}
        else:
            msg = (
                f"Unrecognized 'tool_choice' type {tool_choice=}. Expected dict, "
                f"str, or None."
            )
            raise ValueError(
                msg,
            )

        if parallel_tool_calls is not None:
            disable_parallel_tool_use = not parallel_tool_calls
            if "tool_choice" in kwargs:
                kwargs["tool_choice"]["disable_parallel_tool_use"] = (
                    disable_parallel_tool_use
                )
            else:
                kwargs["tool_choice"] = {
                    "type": "auto",
                    "disable_parallel_tool_use": disable_parallel_tool_use,
                }

        return self.bind(tools=formatted_tools, **kwargs)

    def with_structured_output(
        self,
        schema: dict | type,
        *,
        include_raw: bool = False,
        method: Literal["function_calling", "json_schema"] = "function_calling",
        **kwargs: Any,
    ) -> Runnable[LanguageModelInput, dict | BaseModel]:
        """Model wrapper that returns outputs formatted to match the given schema.

        Args:
            schema: The output schema. Can be passed in as:

                - An Anthropic tool schema,
                - An OpenAI function/tool schema,
                - A JSON Schema,
                - A `TypedDict` class,
                - Or a Pydantic class.

                If `schema` is a Pydantic class then the model output will be a
                Pydantic instance of that class, and the model-generated fields will be
                validated by the Pydantic class. Otherwise the model output will be a
                dict and will not be validated.

                See `langchain_core.utils.function_calling.convert_to_openai_tool` for
                more on how to properly specify types and descriptions of schema fields
                when specifying a Pydantic or `TypedDict` class.
            include_raw:
                If `False` then only the parsed structured output is returned.

                If an error occurs during model output parsing it will be raised.

                If `True` then both the raw model response (a `BaseMessage`) and the
                parsed model response will be returned.

                If an error occurs during output parsing it will be caught and returned
                as well.

                The final output is always a `dict` with keys `'raw'`, `'parsed'`, and
                `'parsing_error'`.
            method: The structured output method to use. Options are:

                - `'function_calling'` (default): Use forced tool calling to get
                    structured output.
                - `'json_schema'`: Use Claude's dedicated
                    [structured output](https://platform.claude.com/docs/en/build-with-claude/structured-outputs)
                    feature.

            kwargs: Additional keyword arguments are ignored.

        Returns:
            A `Runnable` that takes same inputs as a
                `langchain_core.language_models.chat.BaseChatModel`.

                If `include_raw` is `False` and `schema` is a Pydantic class, `Runnable`
                outputs an instance of `schema` (i.e., a Pydantic object). Otherwise, if
                `include_raw` is `False` then `Runnable` outputs a `dict`.

                If `include_raw` is `True`, then `Runnable` outputs a `dict` with keys:

                - `'raw'`: `BaseMessage`
                - `'parsed'`: `None` if there was a parsing error, otherwise the type
                    depends on the `schema` as described above.
                - `'parsing_error'`: `BaseException | None`

        ??? example "Pydantic schema (`include_raw=False`)"

            ```python
            from langchain_anthropic import ChatAnthropic
            from pydantic import BaseModel


            class AnswerWithJustification(BaseModel):
                '''An answer to the user question along with justification for the answer.'''

                answer: str
                justification: str


            model = ChatAnthropic(model="claude-sonnet-4-5-20250929", temperature=0)
            structured_model = model.with_structured_output(AnswerWithJustification)

            structured_model.invoke("What weighs more a pound of bricks or a pound of feathers")
            # -> AnswerWithJustification(
            #     answer='They weigh the same',
            #     justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
            # )
            ```

        ??? example "Pydantic schema (`include_raw=True`)"

            ```python
            from langchain_anthropic import ChatAnthropic
            from pydantic import BaseModel


            class AnswerWithJustification(BaseModel):
                '''An answer to the user question along with justification for the answer.'''

                answer: str
                justification: str


            model = ChatAnthropic(model="claude-sonnet-4-5-20250929", temperature=0)
            structured_model = model.with_structured_output(AnswerWithJustification, include_raw=True)

            structured_model.invoke("What weighs more a pound of bricks or a pound of feathers")
            # -> {
            #     'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
            #     'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
            #     'parsing_error': None
            # }
            ```

        ??? example "Dictionary schema (`include_raw=False`)"

            ```python
            from langchain_anthropic import ChatAnthropic

            schema = {
                "name": "AnswerWithJustification",
                "description": "An answer to the user question along with justification for the answer.",
                "input_schema": {
                    "type": "object",
                    "properties": {
                        "answer": {"type": "string"},
                        "justification": {"type": "string"},
                    },
                    "required": ["answer", "justification"],
                },
            }
            model = ChatAnthropic(model="claude-sonnet-4-5-20250929", temperature=0)
            structured_model = model.with_structured_output(schema)

            structured_model.invoke("What weighs more a pound of bricks or a pound of feathers")
            # -> {
            #     'answer': 'They weigh the same',
            #     'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
            # }
            ```

        ??? example "Native structured output with `method='json_schema'`"

            Anthropic supports a native structured output feature that guarantees
            responses adhere to a given schema.

            !!! note

                Native structured output requires:

                - Claude Sonnet 4.5 or Opus 4.1
                - `langchain-anthropic>=1.1.0`

            To enable native structured output, specify `method="json_schema"` when
            calling `with_structured_output`. (Under the hood, LangChain will
            append the required `structured-outputs-2025-11-13` beta header)

            ```python hl_lines="13"
            from langchain_anthropic import ChatAnthropic
            from pydantic import BaseModel, Field

            model = ChatAnthropic(model="claude-sonnet-4-5")

            class Movie(BaseModel):
                \"\"\"A movie with details.\"\"\"
                title: str = Field(..., description="The title of the movie")
                year: int = Field(..., description="The year the movie was released")
                director: str = Field(..., description="The director of the movie")
                rating: float = Field(..., description="The movie's rating out of 10")

            model_with_structure = model.with_structured_output(Movie, method="json_schema")
            response = model_with_structure.invoke("Provide details about the movie Inception")
            print(response)
            # -> Movie(title="Inception", year=2010, director="Christopher Nolan", rating=8.8)
            ```
        """  # noqa: E501
        if method == "json_mode":
            warning_message = (
                "Unrecognized structured output method 'json_mode'. Defaulting to "
                "'json_schema' method."
            )
            warnings.warn(warning_message, stacklevel=2)
            method = "json_schema"

        if method == "function_calling":
            formatted_tool = convert_to_anthropic_tool(schema)
            tool_name = formatted_tool["name"]
            if self.thinking is not None and self.thinking.get("type") == "enabled":
                llm = self._get_llm_for_structured_output_when_thinking_is_enabled(
                    schema,
                    formatted_tool,
                )
            else:
                llm = self.bind_tools(
                    [schema],
                    tool_choice=tool_name,
                    ls_structured_output_format={
                        "kwargs": {"method": "function_calling"},
                        "schema": formatted_tool,
                    },
                )

            if isinstance(schema, type) and is_basemodel_subclass(schema):
                output_parser: OutputParserLike = PydanticToolsParser(
                    tools=[schema],
                    first_tool_only=True,
                )
            else:
                output_parser = JsonOutputKeyToolsParser(
                    key_name=tool_name,
                    first_tool_only=True,
                )
        elif method == "json_schema":
            llm = self.bind(
                output_format=_convert_to_anthropic_output_format(schema),
                ls_structured_output_format={
                    "kwargs": {"method": "json_schema"},
                    "schema": convert_to_openai_tool(schema),
                },
            )
            if isinstance(schema, type) and is_basemodel_subclass(schema):
                output_parser = PydanticOutputParser(pydantic_object=schema)
            else:
                output_parser = JsonOutputParser()
        else:
            error_message = (
                f"Unrecognized structured output method '{method}'. "
                f"Expected 'function_calling' or 'json_schema'."
            )
            raise ValueError(error_message)

        if include_raw:
            parser_assign = RunnablePassthrough.assign(
                parsed=itemgetter("raw") | output_parser,
                parsing_error=lambda _: None,
            )
            parser_none = RunnablePassthrough.assign(parsed=lambda _: None)
            parser_with_fallback = parser_assign.with_fallbacks(
                [parser_none],
                exception_key="parsing_error",
            )
            return RunnableMap(raw=llm) | parser_with_fallback
        return llm | output_parser

    def get_num_tokens_from_messages(
        self,
        messages: list[BaseMessage],
        tools: Sequence[dict[str, Any] | type | Callable | BaseTool] | None = None,
        **kwargs: Any,
    ) -> int:
        """Count tokens in a sequence of input messages.

        Args:
            messages: The message inputs to tokenize.
            tools: If provided, sequence of `dict`, `BaseModel`, function, or `BaseTool`
                objects to be converted to tool schemas.
            kwargs: Additional keyword arguments are passed to the Anthropic
                `messages.count_tokens` method.

        ???+ example "Basic usage"

            ```python
            from langchain_anthropic import ChatAnthropic
            from langchain_core.messages import HumanMessage, SystemMessage

            model = ChatAnthropic(model="claude-sonnet-4-5-20250929")

            messages = [
                SystemMessage(content="You are a scientist"),
                HumanMessage(content="Hello, Claude"),
            ]
            model.get_num_tokens_from_messages(messages)
            ```

            ```txt
            14
            ```

        ??? example "Pass tool schemas"

            ```python
            from langchain_anthropic import ChatAnthropic
            from langchain_core.messages import HumanMessage
            from langchain_core.tools import tool

            model = ChatAnthropic(model="claude-sonnet-4-5-20250929")

            @tool(parse_docstring=True)
            def get_weather(location: str) -> str:
                \"\"\"Get the current weather in a given location

                Args:
                    location: The city and state, e.g. San Francisco, CA
                \"\"\"
                return "Sunny"

            messages = [
                HumanMessage(content="What's the weather like in San Francisco?"),
            ]
            model.get_num_tokens_from_messages(messages, tools=[get_weather])
            ```

            ```txt
            403
            ```

        !!! warning "Behavior changed in `langchain-anthropic` 0.3.0"

            Uses Anthropic's [token counting API](https://platform.claude.com/docs/en/build-with-claude/token-counting) to count tokens in messages.

        """  # noqa: D214,E501
        formatted_system, formatted_messages = _format_messages(messages)
        if isinstance(formatted_system, str):
            kwargs["system"] = formatted_system
        if tools:
            kwargs["tools"] = [convert_to_anthropic_tool(tool) for tool in tools]
        if self.context_management is not None:
            kwargs["context_management"] = self.context_management

        if self.betas is not None:
            beta_response = self._client.beta.messages.count_tokens(
                betas=self.betas,
                model=self.model,
                messages=formatted_messages,  # type: ignore[arg-type]
                **kwargs,
            )
            return beta_response.input_tokens
        response = self._client.messages.count_tokens(
            model=self.model,
            messages=formatted_messages,  # type: ignore[arg-type]
            **kwargs,
        )
        return response.input_tokens


def convert_to_anthropic_tool(
    tool: dict[str, Any] | type | Callable | BaseTool,
    *,
    strict: bool | None = None,
) -> AnthropicTool:
    """Convert a tool-like object to an Anthropic tool definition.

    Args:
        tool: A tool-like object to convert. Can be an Anthropic tool dict,
            a Pydantic model, a function, or a `BaseTool`.
        strict: If `True`, enables strict schema adherence for the tool.

            !!! note

                Requires Claude Sonnet 4.5 or Opus 4.1.

    Returns:
        An Anthropic tool definition dict.
    """
    # already in Anthropic tool format
    if isinstance(tool, dict) and all(
        k in tool for k in ("name", "description", "input_schema")
    ):
        anthropic_formatted = AnthropicTool(tool)  # type: ignore[misc]
    else:
        oai_formatted = convert_to_openai_tool(tool, strict=strict)["function"]
        anthropic_formatted = AnthropicTool(
            name=oai_formatted["name"],
            input_schema=oai_formatted["parameters"],
        )
        if "description" in oai_formatted:
            anthropic_formatted["description"] = oai_formatted["description"]
        if "strict" in oai_formatted and isinstance(strict, bool):
            anthropic_formatted["strict"] = oai_formatted["strict"]
    return anthropic_formatted


def _tools_in_params(params: dict) -> bool:
    return (
        "tools" in params
        or ("extra_body" in params and params["extra_body"].get("tools"))
        or "mcp_servers" in params
    )


def _thinking_in_params(params: dict) -> bool:
    return params.get("thinking", {}).get("type") == "enabled"


def _documents_in_params(params: dict) -> bool:
    for message in params.get("messages", []):
        if isinstance(message.get("content"), list):
            for block in message["content"]:
                if (
                    isinstance(block, dict)
                    and block.get("type") == "document"
                    and block.get("citations", {}).get("enabled")
                ):
                    return True
    return False


class _AnthropicToolUse(TypedDict):
    type: Literal["tool_use"]
    name: str
    input: dict
    id: str


def _lc_tool_calls_to_anthropic_tool_use_blocks(
    tool_calls: list[ToolCall],
) -> list[_AnthropicToolUse]:
    return [
        _AnthropicToolUse(
            type="tool_use",
            name=tool_call["name"],
            input=tool_call["args"],
            id=cast("str", tool_call["id"]),
        )
        for tool_call in tool_calls
    ]


def _convert_to_anthropic_output_format(schema: dict | type) -> dict[str, Any]:
    """Convert JSON schema, Pydantic model, or `TypedDict` into Claude `output_format`.

    See Claude docs on [structured outputs](https://platform.claude.com/docs/en/build-with-claude/structured-outputs).
    """
    from anthropic import transform_schema

    is_pydantic_class = isinstance(schema, type) and is_basemodel_subclass(schema)
    if is_pydantic_class or isinstance(schema, dict):
        json_schema = transform_schema(schema)
    else:
        # TypedDict
        json_schema = transform_schema(convert_to_json_schema(schema))
    return {"type": "json_schema", "schema": json_schema}


def _make_message_chunk_from_anthropic_event(
    event: anthropic.types.RawMessageStreamEvent,
    *,
    stream_usage: bool = True,
    coerce_content_to_string: bool,
    block_start_event: anthropic.types.RawMessageStreamEvent | None = None,
) -> tuple[AIMessageChunk | None, anthropic.types.RawMessageStreamEvent | None]:
    """Convert Anthropic streaming event to `AIMessageChunk`.

    Args:
        event: Raw streaming event from Anthropic SDK
        stream_usage: Whether to include usage metadata in the output chunks.
        coerce_content_to_string: Whether to convert structured content to plain
            text strings.

            When `True`, only text content is preserved; when `False`, structured
            content like tool calls and citations are maintained.
        block_start_event: Previous content block start event, used for tracking
            tool use blocks and maintaining context across related events.

    Returns:
        Tuple with
            - `AIMessageChunk`: Converted message chunk with appropriate content and
                metadata, or `None` if the event doesn't produce a chunk
            - `RawMessageStreamEvent`: Updated `block_start_event` for tracking content
                blocks across sequential events, or `None` if not applicable

    Note:
        Not all Anthropic events result in message chunks. Events like internal
        state changes return `None` for the message chunk while potentially
        updating the `block_start_event` for context tracking.
    """
    message_chunk: AIMessageChunk | None = None
    # Reference: Anthropic SDK streaming implementation
    # https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/lib/streaming/_messages.py  # noqa: E501
    if event.type == "message_start" and stream_usage:
        # Capture model name, but don't include usage_metadata yet
        # as it will be properly reported in message_delta with complete info
        if hasattr(event.message, "model"):
            response_metadata: dict[str, Any] = {"model_name": event.message.model}
        else:
            response_metadata = {}

        message_chunk = AIMessageChunk(
            content="" if coerce_content_to_string else [],
            response_metadata=response_metadata,
        )

    elif (
        event.type == "content_block_start"
        and event.content_block is not None
        and (
            "tool_result" in event.content_block.type
            or "tool_use" in event.content_block.type
            or "document" in event.content_block.type
            or "redacted_thinking" in event.content_block.type
        )
    ):
        if coerce_content_to_string:
            warnings.warn("Received unexpected tool content block.", stacklevel=2)

        content_block = event.content_block.model_dump()
        content_block["index"] = event.index
        if event.content_block.type == "tool_use":
            tool_call_chunk = create_tool_call_chunk(
                index=event.index,
                id=event.content_block.id,
                name=event.content_block.name,
                args="",
            )
            tool_call_chunks = [tool_call_chunk]
        else:
            tool_call_chunks = []
        message_chunk = AIMessageChunk(
            content=[content_block],
            tool_call_chunks=tool_call_chunks,
        )
        block_start_event = event

    # Process incremental content updates
    elif event.type == "content_block_delta":
        # Text and citation deltas (incremental text content)
        if event.delta.type in ("text_delta", "citations_delta"):
            if coerce_content_to_string and hasattr(event.delta, "text"):
                text = getattr(event.delta, "text", "")
                message_chunk = AIMessageChunk(content=text)
            else:
                content_block = event.delta.model_dump()
                content_block["index"] = event.index

                # All citation deltas are part of a text block
                content_block["type"] = "text"
                if "citation" in content_block:
                    # Assign citations to a list if present
                    content_block["citations"] = [content_block.pop("citation")]
                message_chunk = AIMessageChunk(content=[content_block])

        # Reasoning
        elif event.delta.type in {"thinking_delta", "signature_delta"}:
            content_block = event.delta.model_dump()
            content_block["index"] = event.index
            content_block["type"] = "thinking"
            message_chunk = AIMessageChunk(content=[content_block])

        # Tool input JSON (streaming tool arguments)
        elif event.delta.type == "input_json_delta":
            content_block = event.delta.model_dump()
            content_block["index"] = event.index
            start_event_block = (
                getattr(block_start_event, "content_block", None)
                if block_start_event
                else None
            )
            if (
                start_event_block is not None
                and getattr(start_event_block, "type", None) == "tool_use"
            ):
                tool_call_chunk = create_tool_call_chunk(
                    index=event.index,
                    id=None,
                    name=None,
                    args=event.delta.partial_json,
                )
                tool_call_chunks = [tool_call_chunk]
            else:
                tool_call_chunks = []
            message_chunk = AIMessageChunk(
                content=[content_block],
                tool_call_chunks=tool_call_chunks,
            )

    # Process final usage metadata and completion info
    elif event.type == "message_delta" and stream_usage:
        usage_metadata = _create_usage_metadata(event.usage)
        response_metadata = {
            "stop_reason": event.delta.stop_reason,
            "stop_sequence": event.delta.stop_sequence,
        }
        if context_management := getattr(event, "context_management", None):
            response_metadata["context_management"] = context_management.model_dump()
        message_chunk = AIMessageChunk(
            content="" if coerce_content_to_string else [],
            usage_metadata=usage_metadata,
            response_metadata=response_metadata,
        )
        if message_chunk.response_metadata.get("stop_reason"):
            # Mark final Anthropic stream chunk
            message_chunk.chunk_position = "last"
    # Unhandled event types (e.g., `content_block_stop`, `ping` events)
    # https://platform.claude.com/docs/en/build-with-claude/streaming#other-events
    else:
        pass

    if message_chunk:
        message_chunk.response_metadata["model_provider"] = "anthropic"
    return message_chunk, block_start_event


def _create_usage_metadata(anthropic_usage: BaseModel) -> UsageMetadata:
    """Create LangChain `UsageMetadata` from Anthropic `Usage` data.

    Note:
        Anthropic's `input_tokens` excludes cached tokens, so we manually add
        `cache_read` and `cache_creation` tokens to get the true total.
    """
    input_token_details: dict = {
        "cache_read": getattr(anthropic_usage, "cache_read_input_tokens", None),
        "cache_creation": getattr(anthropic_usage, "cache_creation_input_tokens", None),
    }

    # Add cache TTL information if provided (5-minute and 1-hour ephemeral cache)
    cache_creation = getattr(anthropic_usage, "cache_creation", None)

    # Currently just copying over the 5m and 1h keys, but if more are added in the
    # future we'll need to expand this tuple
    cache_creation_keys = ("ephemeral_5m_input_tokens", "ephemeral_1h_input_tokens")
    if cache_creation:
        if isinstance(cache_creation, BaseModel):
            cache_creation = cache_creation.model_dump()
        for k in cache_creation_keys:
            input_token_details[k] = cache_creation.get(k)

    # Calculate total input tokens: Anthropic's `input_tokens` excludes cached tokens,
    # so we need to add them back to get the true total input token count
    input_tokens = (
        (getattr(anthropic_usage, "input_tokens", 0) or 0)  # Base input tokens
        + (input_token_details["cache_read"] or 0)  # Tokens read from cache
        + (input_token_details["cache_creation"] or 0)  # Tokens used to create cache
    )
    output_tokens = getattr(anthropic_usage, "output_tokens", 0) or 0

    return UsageMetadata(
        input_tokens=input_tokens,
        output_tokens=output_tokens,
        total_tokens=input_tokens + output_tokens,
        input_token_details=InputTokenDetails(
            **{k: v for k, v in input_token_details.items() if v is not None},
        ),
    )