langchain/libs/partners/anthropic/langchain_anthropic/chat_models.py

"""Anthropic chat models."""

from __future__ import annotations

import copy
import json
import re
import warnings
from collections.abc import AsyncIterator, Callable, Iterator, Mapping, Sequence
from functools import cached_property
from operator import itemgetter
from typing import Any, Final, Literal, cast

import anthropic
from langchain_core.callbacks import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
)
from langchain_core.exceptions import OutputParserException
from langchain_core.language_models import (
    LanguageModelInput,
    ModelProfile,
    ModelProfileRegistry,
)
from langchain_core.language_models.chat_models import BaseChatModel, LangSmithParams
from langchain_core.messages import (
    AIMessage,
    AIMessageChunk,
    BaseMessage,
    HumanMessage,
    SystemMessage,
    ToolCall,
    ToolMessage,
    is_data_content_block,
)
from langchain_core.messages import content as types
from langchain_core.messages.ai import InputTokenDetails, UsageMetadata
from langchain_core.messages.tool import tool_call_chunk as create_tool_call_chunk
from langchain_core.output_parsers import (
    JsonOutputKeyToolsParser,
    JsonOutputParser,
    PydanticOutputParser,
    PydanticToolsParser,
)
from langchain_core.output_parsers.base import OutputParserLike
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough
from langchain_core.tools import BaseTool
from langchain_core.utils import from_env, get_pydantic_field_names, secret_from_env
from langchain_core.utils.function_calling import (
    convert_to_json_schema,
    convert_to_openai_tool,
)
from langchain_core.utils.pydantic import is_basemodel_subclass
from langchain_core.utils.utils import _build_model_kwargs
from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
from typing_extensions import NotRequired, Self, TypedDict

from langchain_anthropic._client_utils import (
    _get_default_async_httpx_client,
    _get_default_httpx_client,
)
from langchain_anthropic._compat import _convert_from_v1_to_anthropic
from langchain_anthropic.data._profiles import _PROFILES
from langchain_anthropic.output_parsers import extract_tool_calls

_message_type_lookups = {
    "human": "user",
    "ai": "assistant",
    "AIMessageChunk": "assistant",
    "HumanMessageChunk": "user",
}

_MODEL_PROFILES = cast(ModelProfileRegistry, _PROFILES)


def _get_default_model_profile(model_name: str) -> ModelProfile:
    default = _MODEL_PROFILES.get(model_name) or {}
    return default.copy()


_MODEL_DEFAULT_MAX_OUTPUT_TOKENS: Final[dict[str, int]] = {
    # Listed old to new
    "claude-3-haiku": 4096,  # Claude Haiku 3
    "claude-3-5-haiku": 8192,  # Claude Haiku 3.5
    "claude-3-7-sonnet": 64000,  # Claude Sonnet 3.7
    "claude-sonnet-4": 64000,  # Claude Sonnet 4
    "claude-opus-4": 32000,  # Claude Opus 4
    "claude-opus-4-1": 32000,  # Claude Opus 4.1
    "claude-sonnet-4-5": 64000,  # Claude Sonnet 4.5
    "claude-haiku-4-5": 64000,  # Claude Haiku 4.5
}
_FALLBACK_MAX_OUTPUT_TOKENS: Final[int] = 4096


def _default_max_tokens_for(model: str | None) -> int:
    """Return the default max output tokens for an Anthropic model (with fallback).

    See the Claude docs for [Max Tokens limits](https://platform.claude.com/docs/en/about-claude/models/overview#model-comparison-table).
    """
    if not model:
        return _FALLBACK_MAX_OUTPUT_TOKENS

    parts = model.split("-")
    family = "-".join(parts[:-1]) if len(parts) > 1 else model

    return _MODEL_DEFAULT_MAX_OUTPUT_TOKENS.get(family, _FALLBACK_MAX_OUTPUT_TOKENS)


class AnthropicTool(TypedDict):
    """Anthropic tool definition."""

    name: str

    input_schema: dict[str, Any]

    description: NotRequired[str]

    strict: NotRequired[bool]

    cache_control: NotRequired[dict[str, str]]


# Some tool types require specific beta headers to be enabled
# Mapping of tool type patterns to required beta headers
_TOOL_TYPE_TO_BETA: dict[str, str] = {
    "web_fetch_20250910": "web-fetch-2025-09-10",
    "code_execution_20250522": "code-execution-2025-05-22",
    "code_execution_20250825": "code-execution-2025-08-25",
    "memory_20250818": "context-management-2025-06-27",
}


def _is_builtin_tool(tool: Any) -> bool:
    """Check if a tool is a built-in Anthropic tool.

    [Claude docs for built-in tools](https://platform.claude.com/docs/en/agents-and-tools/tool-use/overview)
    """
    if not isinstance(tool, dict):
        return False

    tool_type = tool.get("type")
    if not tool_type or not isinstance(tool_type, str):
        return False

    _builtin_tool_prefixes = [
        "text_editor_",
        "computer_",
        "bash_",
        "web_search_",
        "web_fetch_",
        "code_execution_",
        "memory_",
    ]
    return any(tool_type.startswith(prefix) for prefix in _builtin_tool_prefixes)


def _format_image(url: str) -> dict:
    """Convert part["image_url"]["url"] strings (OpenAI format) to Anthropic format.

    {
        "type": "base64",
        "media_type": "image/jpeg",
        "data": "/9j/4AAQSkZJRg...",
    }

    Or

    {
        "type": "url",
        "url": "https://example.com/image.jpg",
    }
    """
    # Base64 encoded image
    base64_regex = r"^data:(?P<media_type>image/.+);base64,(?P<data>.+)$"
    base64_match = re.match(base64_regex, url)

    if base64_match:
        return {
            "type": "base64",
            "media_type": base64_match.group("media_type"),
            "data": base64_match.group("data"),
        }

    # Url
    url_regex = r"^https?://.*$"
    url_match = re.match(url_regex, url)

    if url_match:
        return {
            "type": "url",
            "url": url,
        }

    msg = (
        "Malformed url parameter."
        " Must be either an image URL (https://example.com/image.jpg)"
        " or base64 encoded string (data:image/png;base64,'/9j/4AAQSk'...)"
    )
    raise ValueError(
        msg,
    )


def _merge_messages(
    messages: Sequence[BaseMessage],
) -> list[SystemMessage | AIMessage | HumanMessage]:
    """Merge runs of human/tool messages into single human messages with content blocks."""  # noqa: E501
    merged: list = []
    for curr in messages:
        if isinstance(curr, ToolMessage):
            if (
                isinstance(curr.content, list)
                and curr.content
                and all(
                    isinstance(block, dict) and block.get("type") == "tool_result"
                    for block in curr.content
                )
            ):
                curr = HumanMessage(curr.content)  # type: ignore[misc]
            else:
                curr = HumanMessage(  # type: ignore[misc]
                    [
                        {
                            "type": "tool_result",
                            "content": curr.content,
                            "tool_use_id": curr.tool_call_id,
                            "is_error": curr.status == "error",
                        },
                    ],
                )
        last = merged[-1] if merged else None
        if any(
            all(isinstance(m, c) for m in (curr, last))
            for c in (SystemMessage, HumanMessage)
        ):
            if isinstance(cast("BaseMessage", last).content, str):
                new_content: list = [
                    {"type": "text", "text": cast("BaseMessage", last).content},
                ]
            else:
                new_content = copy.copy(cast("list", cast("BaseMessage", last).content))
            if isinstance(curr.content, str):
                new_content.append({"type": "text", "text": curr.content})
            else:
                new_content.extend(curr.content)
            merged[-1] = curr.model_copy(update={"content": new_content})
        else:
            merged.append(curr)
    return merged


def _format_data_content_block(block: dict) -> dict:
    """Format standard data content block to format expected by Anthropic."""
    if block["type"] == "image":
        if "url" in block:
            if block["url"].startswith("data:"):
                # Data URI
                formatted_block = {
                    "type": "image",
                    "source": _format_image(block["url"]),
                }
            else:
                formatted_block = {
                    "type": "image",
                    "source": {"type": "url", "url": block["url"]},
                }
        elif "base64" in block or block.get("source_type") == "base64":
            formatted_block = {
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": block["mime_type"],
                    "data": block.get("base64") or block.get("data", ""),
                },
            }
        elif "file_id" in block:
            formatted_block = {
                "type": "image",
                "source": {
                    "type": "file",
                    "file_id": block["file_id"],
                },
            }
        elif block.get("source_type") == "id":
            formatted_block = {
                "type": "image",
                "source": {
                    "type": "file",
                    "file_id": block["id"],
                },
            }
        else:
            msg = (
                "Anthropic only supports 'url', 'base64', or 'id' keys for image "
                "content blocks."
            )
            raise ValueError(
                msg,
            )

    elif block["type"] == "file":
        if "url" in block:
            formatted_block = {
                "type": "document",
                "source": {
                    "type": "url",
                    "url": block["url"],
                },
            }
        elif "base64" in block or block.get("source_type") == "base64":
            formatted_block = {
                "type": "document",
                "source": {
                    "type": "base64",
                    "media_type": block.get("mime_type") or "application/pdf",
                    "data": block.get("base64") or block.get("data", ""),
                },
            }
        elif block.get("source_type") == "text":
            formatted_block = {
                "type": "document",
                "source": {
                    "type": "text",
                    "media_type": block.get("mime_type") or "text/plain",
                    "data": block["text"],
                },
            }
        elif "file_id" in block:
            formatted_block = {
                "type": "document",
                "source": {
                    "type": "file",
                    "file_id": block["file_id"],
                },
            }
        elif block.get("source_type") == "id":
            formatted_block = {
                "type": "document",
                "source": {
                    "type": "file",
                    "file_id": block["id"],
                },
            }
        else:
            msg = (
                "Anthropic only supports 'url', 'base64', or 'id' keys for file "
                "content blocks."
            )
            raise ValueError(msg)

    elif block["type"] == "text-plain":
        formatted_block = {
            "type": "document",
            "source": {
                "type": "text",
                "media_type": block.get("mime_type") or "text/plain",
                "data": block["text"],
            },
        }

    else:
        msg = f"Block of type {block['type']} is not supported."
        raise ValueError(msg)

    if formatted_block:
        for key in ["cache_control", "citations", "title", "context"]:
            if key in block:
                formatted_block[key] = block[key]
            elif (metadata := block.get("extras")) and key in metadata:
                formatted_block[key] = metadata[key]
            elif (metadata := block.get("metadata")) and key in metadata:
                # Backward compat
                formatted_block[key] = metadata[key]

    return formatted_block


def _format_messages(
    messages: Sequence[BaseMessage],
) -> tuple[str | list[dict] | None, list[dict]]:
    """Format messages for Anthropic's API."""
    system: str | list[dict] | None = None
    formatted_messages: list[dict] = []
    merged_messages = _merge_messages(messages)
    for _i, message in enumerate(merged_messages):
        if message.type == "system":
            if system is not None:
                msg = "Received multiple non-consecutive system messages."
                raise ValueError(msg)
            if isinstance(message.content, list):
                system = [
                    (
                        block
                        if isinstance(block, dict)
                        else {"type": "text", "text": block}
                    )
                    for block in message.content
                ]
            else:
                system = message.content
            continue

        role = _message_type_lookups[message.type]
        content: str | list

        if not isinstance(message.content, str):
            # parse as dict
            if not isinstance(message.content, list):
                msg = "Anthropic message content must be str or list of dicts"
                raise ValueError(
                    msg,
                )

            # populate content
            content = []
            for block in message.content:
                if isinstance(block, str):
                    content.append({"type": "text", "text": block})
                elif isinstance(block, dict):
                    if "type" not in block:
                        msg = "Dict content block must have a type key"
                        raise ValueError(msg)
                    if block["type"] == "image_url":
                        # convert format
                        source = _format_image(block["image_url"]["url"])
                        content.append({"type": "image", "source": source})
                    elif is_data_content_block(block):
                        content.append(_format_data_content_block(block))
                    elif block["type"] == "tool_use":
                        # If a tool_call with the same id as a tool_use content block
                        # exists, the tool_call is preferred.
                        if isinstance(message, AIMessage) and block["id"] in [
                            tc["id"] for tc in message.tool_calls
                        ]:
                            overlapping = [
                                tc
                                for tc in message.tool_calls
                                if tc["id"] == block["id"]
                            ]
                            content.extend(
                                _lc_tool_calls_to_anthropic_tool_use_blocks(
                                    overlapping,
                                ),
                            )
                        else:
                            if tool_input := block.get("input"):
                                args = tool_input
                            elif "partial_json" in block:
                                try:
                                    args = json.loads(block["partial_json"] or "{}")
                                except json.JSONDecodeError:
                                    args = {}
                            else:
                                args = {}
                            content.append(
                                _AnthropicToolUse(
                                    type="tool_use",
                                    name=block["name"],
                                    input=args,
                                    id=block["id"],
                                )
                            )
                    elif block["type"] in ("server_tool_use", "mcp_tool_use"):
                        formatted_block = {
                            k: v
                            for k, v in block.items()
                            if k
                            in (
                                "type",
                                "id",
                                "input",
                                "name",
                                "server_name",  # for mcp_tool_use
                                "cache_control",
                            )
                        }
                        # Attempt to parse streamed output
                        if block.get("input") == {} and "partial_json" in block:
                            try:
                                input_ = json.loads(block["partial_json"])
                                if input_:
                                    formatted_block["input"] = input_
                            except json.JSONDecodeError:
                                pass
                        content.append(formatted_block)
                    elif block["type"] == "text":
                        text = block.get("text", "")
                        # Only add non-empty strings for now as empty ones are not
                        # accepted.
                        # https://github.com/anthropics/anthropic-sdk-python/issues/461
                        if text.strip():
                            formatted_block = {
                                k: v
                                for k, v in block.items()
                                if k in ("type", "text", "cache_control", "citations")
                            }
                            # Clean up citations to remove null file_id fields
                            if formatted_block.get("citations"):
                                cleaned_citations = []
                                for citation in formatted_block["citations"]:
                                    cleaned_citation = {
                                        k: v
                                        for k, v in citation.items()
                                        if not (k == "file_id" and v is None)
                                    }
                                    cleaned_citations.append(cleaned_citation)
                                formatted_block["citations"] = cleaned_citations
                            content.append(formatted_block)
                    elif block["type"] == "thinking":
                        content.append(
                            {
                                k: v
                                for k, v in block.items()
                                if k
                                in ("type", "thinking", "cache_control", "signature")
                            },
                        )
                    elif block["type"] == "redacted_thinking":
                        content.append(
                            {
                                k: v
                                for k, v in block.items()
                                if k in ("type", "cache_control", "data")
                            },
                        )
                    elif block["type"] == "tool_result":
                        tool_content = _format_messages(
                            [HumanMessage(block["content"])],
                        )[1][0]["content"]
                        content.append({**block, "content": tool_content})
                    elif block["type"] in (
                        "code_execution_tool_result",
                        "bash_code_execution_tool_result",
                        "text_editor_code_execution_tool_result",
                        "mcp_tool_result",
                        "web_search_tool_result",
                        "web_fetch_tool_result",
                    ):
                        content.append(
                            {
                                k: v
                                for k, v in block.items()
                                if k
                                in (
                                    "type",
                                    "content",
                                    "tool_use_id",
                                    "is_error",  # for mcp_tool_result
                                    "cache_control",
                                    "retrieved_at",  # for web_fetch_tool_result
                                )
                            },
                        )
                    else:
                        content.append(block)
                else:
                    msg = (
                        f"Content blocks must be str or dict, instead was: "
                        f"{type(block)}"
                    )
                    raise ValueError(
                        msg,
                    )
        else:
            content = message.content

        # Ensure all tool_calls have a tool_use content block
        if isinstance(message, AIMessage) and message.tool_calls:
            content = content or []
            content = (
                [{"type": "text", "text": message.content}]
                if isinstance(content, str) and content
                else content
            )
            tool_use_ids = [
                cast("dict", block)["id"]
                for block in content
                if cast("dict", block)["type"] == "tool_use"
            ]
            missing_tool_calls = [
                tc for tc in message.tool_calls if tc["id"] not in tool_use_ids
            ]
            cast("list", content).extend(
                _lc_tool_calls_to_anthropic_tool_use_blocks(missing_tool_calls),
            )

        if not content and role == "assistant" and _i < len(merged_messages) - 1:
            # anthropic.BadRequestError: Error code: 400: all messages must have
            # non-empty content except for the optional final assistant message
            continue
        formatted_messages.append({"role": role, "content": content})
    return system, formatted_messages


def _handle_anthropic_bad_request(e: anthropic.BadRequestError) -> None:
    """Handle Anthropic BadRequestError."""
    if ("messages: at least one message is required") in e.message:
        message = "Received only system message(s). "
        warnings.warn(message, stacklevel=2)
        raise e
    raise


class ChatAnthropic(BaseChatModel):
    """Anthropic (Claude) chat models.

    See the [Claude Platform docs](https://platform.claude.com/docs/en/about-claude/models/overview)
    for a list of the latest models, their capabilities, and pricing.

    Setup:
        Install `langchain-anthropic` and set environment variable `ANTHROPIC_API_KEY`.

        ```bash
        pip install -U langchain-anthropic
        export ANTHROPIC_API_KEY="your-api-key"
        ```

    Key init args:
        **Completion params:**

        * [`model`][langchain_anthropic.chat_models.ChatAnthropic.model]: Name of
            Anthropic model to use. e.g. `'claude-sonnet-4-5-20250929'`.
        * [`temperature`][langchain_anthropic.chat_models.ChatAnthropic.temperature]:
            Sampling temperature. Ranges from `0.0` to `1.0`.
        * [`max_tokens`][langchain_anthropic.chat_models.ChatAnthropic.max_tokens]: Max
            number of tokens to generate.

        **Client params:**

        * [`timeout`][langchain_anthropic.chat_models.ChatAnthropic.default_request_timeout]:
            Timeout for requests.
        * [`anthropic_proxy`][langchain_anthropic.chat_models.ChatAnthropic.anthropic_proxy]:
            Proxy to use for the Anthropic clients, will be used for every API call.
            If not passed in will be read from env var `ANTHROPIC_PROXY`.
        * [`max_retries`][langchain_anthropic.chat_models.ChatAnthropic.max_retries]:
            Max number of retries if a request fails.
        * [`api_key`][langchain_anthropic.chat_models.ChatAnthropic.anthropic_api_key]:
            Anthropic API key. If not passed in will be read from env var
            `ANTHROPIC_API_KEY`.
        * [`base_url`][langchain_anthropic.chat_models.ChatAnthropic.anthropic_api_url]:
            Base URL for API requests. Only specify if using a proxy or service emulator.

        See full list of supported init args and their descriptions below.

    ???+ example "Instantiate"

        ```python
        from langchain_anthropic import ChatAnthropic

        model = ChatAnthropic(
            model="claude-sonnet-4-5-20250929",
            temperature=0,
            max_tokens=1024,
            timeout=None,
            max_retries=2,
            # api_key="...",
            # base_url="...",
            # other params...
        )
        ```

    ???+ note

        Any param which is not explicitly supported will be passed directly to
        `Anthropic.messages.create(...)` each time to the model is invoked.

        !!! example

            ```python
            from langchain_anthropic import ChatAnthropic
            import anthropic

            ChatAnthropic(..., extra_headers={}).invoke(...)

            # Results in underlying API call of:

            anthropic.Anthropic(..).messages.create(..., extra_headers={})

            # ... which is also equivalent to:

            ChatAnthropic(...).invoke(..., extra_headers={})
            ```

    ???+ example "Invoke"

        ```python
        messages = [
            (
                "system",
                "You are a helpful translator. Translate the user sentence to French.",
            ),
            (
                "human",
                "I love programming.",
            ),
        ]
        model.invoke(messages)
        ```

        ```python
        AIMessage(
            content="J'aime la programmation.",
            response_metadata={
                "id": "msg_01Trik66aiQ9Z1higrD5XFx3",
                "model": "claude-sonnet-4-5-20250929",
                "stop_reason": "end_turn",
                "stop_sequence": None,
                "usage": {"input_tokens": 25, "output_tokens": 11},
            },
            id="run-5886ac5f-3c2e-49f5-8a44-b1e92808c929-0",
            usage_metadata={
                "input_tokens": 25,
                "output_tokens": 11,
                "total_tokens": 36,
            },
        )
        ```

    ???+ example "Stream"

        ```python
        for chunk in model.stream(messages):
            print(chunk.text, end="")
        ```

        ```python
        AIMessageChunk(content="J", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        AIMessageChunk(content="'", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        AIMessageChunk(content="a", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        AIMessageChunk(content="ime", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        AIMessageChunk(content=" la", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        AIMessageChunk(content=" programm", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        AIMessageChunk(content="ation", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        AIMessageChunk(content=".", id="run-272ff5f9-8485-402c-b90d-eac8babc5b25")
        ```

        To aggregate the full message from the stream:

        ```python
        stream = model.stream(messages)
        full = next(stream)
        for chunk in stream:
            full += chunk
        full
        ```

        ```python
        AIMessageChunk(content="J'aime la programmation.", id="run-b34faef0-882f-4869-a19c-ed2b856e6361")
        ```

    ???+ example "Async invocation"

        ```python
        await model.ainvoke(messages)

        # stream:
        # async for chunk in (await model.astream(messages))

        # batch:
        # await model.abatch([messages])
        ```

        ```python
        AIMessage(
            content="J'aime la programmation.",
            response_metadata={
                "id": "msg_01Trik66aiQ9Z1higrD5XFx3",
                "model": "claude-sonnet-4-5-20250929",
                "stop_reason": "end_turn",
                "stop_sequence": None,
                "usage": {"input_tokens": 25, "output_tokens": 11},
            },
            id="run-5886ac5f-3c2e-49f5-8a44-b1e92808c929-0",
            usage_metadata={
                "input_tokens": 25,
                "output_tokens": 11,
                "total_tokens": 36,
            },
        )
        ```

    ???+ example "Tool calling"

        ```python hl_lines="16"
        from pydantic import BaseModel, Field


        class GetWeather(BaseModel):
            '''Get the current weather in a given location'''

            location: str = Field(..., description="The city and state, e.g. San Francisco, CA")


        class GetPopulation(BaseModel):
            '''Get the current population in a given location'''

            location: str = Field(..., description="The city and state, e.g. San Francisco, CA")


        model_with_tools = model.bind_tools([GetWeather, GetPopulation])
        ai_msg = model_with_tools.invoke("Which city is hotter today and which is bigger: LA or NY?")
        ai_msg.tool_calls
        ```

        ```python
        [
            {
                "name": "GetWeather",
                "args": {"location": "Los Angeles, CA"},
                "id": "toolu_01KzpPEAgzura7hpBqwHbWdo",
            },
            {
                "name": "GetWeather",
                "args": {"location": "New York, NY"},
                "id": "toolu_01JtgbVGVJbiSwtZk3Uycezx",
            },
            {
                "name": "GetPopulation",
                "args": {"location": "Los Angeles, CA"},
                "id": "toolu_01429aygngesudV9nTbCKGuw",
            },
            {
                "name": "GetPopulation",
                "args": {"location": "New York, NY"},
                "id": "toolu_01JPktyd44tVMeBcPPnFSEJG",
            },
        ]
        ```

        See [`ChatAnthropic.bind_tools()`][langchain_anthropic.chat_models.ChatAnthropic.bind_tools]
        for more info.

        !!! note "Strict tool use"

            Anthropic supports a strict tool use feature that guarantees tool names
            and arguments are validated and correctly typed.

            See [`ChatAnthropic.bind_tools()`][langchain_anthropic.chat_models.ChatAnthropic.bind_tools]
            for more info.

    ???+ example "Token-efficient tool use (beta)"

        See LangChain [docs](https://docs.langchain.com/oss/python/integrations/chat/anthropic#token-efficient-tool-use)
        for more detail.

        ```python hl_lines="9"
        from langchain_anthropic import ChatAnthropic
        from langchain_core.tools import tool

        model = ChatAnthropic(
            model="claude-sonnet-4-5-20250929",
            temperature=0,
            model_kwargs={
                "extra_headers": {
                    "anthropic-beta": "token-efficient-tools-2025-02-19"
                }
            }
        )

        @tool
        def get_weather(location: str) -> str:
            \"\"\"Get the weather at a location.\"\"\"
            return "It's sunny."

        model_with_tools = model.bind_tools([get_weather])
        response = model_with_tools.invoke(
            "What's the weather in San Francisco?"
        )
        print(response.tool_calls)
        print(f'Total tokens: {response.usage_metadata["total_tokens"]}')
        ```

        ```txt
        [{'name': 'get_weather', 'args': {'location': 'San Francisco'}, 'id': 'toolu_01HLjQMSb1nWmgevQUtEyz17', 'type': 'tool_call'}]
        Total tokens: 408
        ```

    ???+ example "Image input"

        See the [multimodal guide](https://docs.langchain.com/oss/python/langchain/models#multimodal)
        for more detail.

        ```python
        import base64

        import httpx
        from langchain_anthropic import ChatAnthropic
        from langchain_core.messages import HumanMessage

        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
        image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")

        model = ChatAnthropic(model="claude-sonnet-4-5-20250929")
        message = HumanMessage(
            content=[
                {
                    "type": "text",
                    "text": "Can you highlight the differences between these two images?",
                },
                {
                    "type": "image",
                    "base64": image_data,
                    "mime_type": "image/jpeg",
                },
                {
                    "type": "image",
                    "url": image_url,
                },
            ],
        )
        ai_msg = model.invoke([message])
        ai_msg.content
        ```

        ```python
        "After examining both images carefully, I can see that they are actually identical."
        ```

        ??? example "Upload with Files API"

            You can also pass in files that are managed through Anthropic's
            [Files API](https://platform.claude.com/docs/en/build-with-claude/files):

            ```python
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(
                model="claude-sonnet-4-5-20250929",
                betas=["files-api-2025-04-14"],
            )
            input_message = {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Describe this document.",
                    },
                    {
                        "type": "image",
                        "id": "file_abc123...",
                    },
                ],
            }
            model.invoke([input_message])
            ```

    ???+ example "PDF input"

        See the [multimodal guide](https://docs.langchain.com/oss/python/langchain/models#multimodal)
        for more detail.

        ```python
        from base64 import b64encode
        from langchain_anthropic import ChatAnthropic
        from langchain_core.messages import HumanMessage
        import requests

        url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
        data = b64encode(requests.get(url).content).decode()

        model = ChatAnthropic(model="claude-sonnet-4-5-20250929")
        ai_msg = model.invoke(
            [
                HumanMessage(
                    [
                        "Summarize this document.",
                        {
                            "type": "file",
                            "mime_type": "application/pdf",
                            "base64": data,
                        },
                    ]
                )
            ]
        )
        ai_msg.content
        ```

        ```python
        "This appears to be a simple document..."
        ```

        ??? example "Upload with Files API"

            You can also pass in files that are managed through Anthropic's
            [Files API](https://platform.claude.com/docs/en/build-with-claude/files):

            ```python
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(
                model="claude-sonnet-4-5-20250929",
                betas=["files-api-2025-04-14"],
            )
            input_message = {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Describe this document.",
                    },
                    {
                        "type": "file",
                        "id": "file_abc123...",
                    },
                ],
            }
            model.invoke([input_message])
            ```

    ???+ example "Extended thinking"

        Certain [Claude models](https://platform.claude.com/docs/en/build-with-claude/extended-thinking#supported-models)
        support an [extended thinking](https://platform.claude.com/docs/en/build-with-claude/extended-thinking)
        feature, which will output the step-by-step reasoning process that led to its
        final answer.

        To use it, specify the `thinking` parameter when initializing `ChatAnthropic`.

        It can also be passed in as a kwarg during invocation.

        **You will need to specify a token budget** to use this feature.

        !!! example

            ```python hl_lines="5-6"
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(
                model="claude-sonnet-4-5-20250929",
                max_tokens=5000,
                thinking={"type": "enabled", "budget_tokens": 2000},
            )

            response = model.invoke("What is the cube root of 50.653?")
            response.content
            ```

            ```python
            [
                {
                    "signature": "...",
                    "thinking": "To find the cube root of 50.653...",
                    "type": "thinking",
                },
                {"text": "The cube root of 50.653 is ...", "type": "text"},
            ]
            ```

        !!! warning "Differences in thinking across model versions"

            The Claude Messages API handles thinking differently across Claude Sonnet
            3.7 and Claude 4 models.

            Refer to the [Claude docs](https://platform.claude.com/docs/en/build-with-claude/extended-thinking#differences-in-thinking-across-model-versions)
            for more info.

    ???+ example "Prompt caching"

        Prompt caching reduces processing time and costs for repetitive tasks or prompts
        with consistent elements

        !!! note
            Only certain models support prompt caching.
            See the [Claude documentation](https://platform.claude.com/docs/en/build-with-claude/prompt-caching#supported-models)
            for a full list.

        ```python hl_lines="16"
        from langchain_anthropic import ChatAnthropic

        model = ChatAnthropic(model="claude-sonnet-4-5-20250929")

        messages = [
            {
                "role": "system",
                "content": [
                    {
                        "type": "text",
                        "text": "Below is some long context:",
                    },
                    {
                        "type": "text",
                        "text": f"{long_text}",
                        "cache_control": {"type": "ephemeral"},
                    },
                ],
            },
            {
                "role": "user",
                "content": "What's that about?",
            },
        ]

        response = model.invoke(messages)
        response.usage_metadata["input_token_details"]
        ```

        ```python
        {"cache_read": 0, "cache_creation": 1458}
        ```

        Alternatively, you may enable prompt caching at invocation time. You may want to
        conditionally cache based on runtime conditions, such as the length of the
        context. This is useful for app-level decisions about what to
        cache.

        ```python hl_lines="3"
        response = model.invoke(
            messages,
            cache_control={"type": "ephemeral"},
        )
        ```

        ??? example "Extended caching"

            The cache lifetime is 5 minutes by default. If this is too short, you can
            apply one hour caching by setting `ttl` to `'1h'`.

            ```python hl_lines="12"
            model = ChatAnthropic(
                model="claude-sonnet-4-5-20250929",
            )

            messages = [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": f"{long_text}",
                            "cache_control": {"type": "ephemeral", "ttl": "1h"},
                        },
                    ],
                }
            ]

            response = model.invoke(messages)
            ```

            Details of cached token counts will be included on the `InputTokenDetails`
            of response's `usage_metadata`:

            ```python
            response = model.invoke(messages)
            response.usage_metadata
            ```

            ```python
            {
                "input_tokens": 1500,
                "output_tokens": 200,
                "total_tokens": 1700,
                "input_token_details": {
                    "cache_read": 0,
                    "cache_creation": 1000,
                    "ephemeral_1h_input_tokens": 750,
                    "ephemeral_5m_input_tokens": 250,
                },
            }
            ```

            See [Claude documentation](https://platform.claude.com/docs/en/build-with-claude/prompt-caching#1-hour-cache-duration-beta)
            for detail.

    ???+ example "Token usage metadata"

        ```python
        ai_msg = model.invoke(messages)
        ai_msg.usage_metadata
        ```

        ```python
        {"input_tokens": 25, "output_tokens": 11, "total_tokens": 36}
        ```

        Message chunks containing token usage will be included during streaming by
        default:

        ```python
        stream = model.stream(messages)
        full = next(stream)
        for chunk in stream:
            full += chunk
        full.usage_metadata
        ```

        ```python
        {"input_tokens": 25, "output_tokens": 11, "total_tokens": 36}
        ```

        These can be disabled by setting [`stream_usage=False`][langchain_anthropic.chat_models.ChatAnthropic.stream_usage]
        in the stream method or when initializing `ChatAnthropic`.

    ???+ example "Citations"

        Anthropic supports a [citations](https://platform.claude.com/docs/en/build-with-claude/citations)
        feature that lets Claude attach context to its answers based on source
        documents supplied by the user.

        When passing a [Claude document content block](https://platform.claude.com/docs/en/build-with-claude/citations#document-types)
        with `#!json "citations": {"enabled": True}` included in the query, Claude may
        generate citations in its response.

        ```python hl_lines="9-19"
        from langchain_anthropic import ChatAnthropic

        model = ChatAnthropic(model="claude-3-5-haiku-20241022")

        messages = [
            {
                "role": "user",
                "content": [
                    {
                        "type": "document",
                        "source": {
                            "type": "text",
                            "media_type": "text/plain",
                            "data": "The grass is green. The sky is blue.",
                        },
                        "title": "My Document",
                        "context": "This is a trustworthy document.",
                        "citations": {"enabled": True},
                    },
                    {"type": "text", "text": "What color is the grass and sky?"},
                ],
            }
        ]
        response = model.invoke(messages)
        response.content
        ```

        ```python hl_lines="6-15 21-30"
        [
            {"text": "Based on the document, ", "type": "text"},
            {
                "text": "the grass is green",
                "type": "text",
                "citations": [
                    {
                        "type": "char_location",
                        "cited_text": "The grass is green. ",
                        "document_index": 0,
                        "document_title": "My Document",
                        "start_char_index": 0,
                        "end_char_index": 20,
                    }
                ],
            },
            {"text": ", and ", "type": "text"},
            {
                "text": "the sky is blue",
                "type": "text",
                "citations": [
                    {
                        "type": "char_location",
                        "cited_text": "The sky is blue.",
                        "document_index": 0,
                        "document_title": "My Document",
                        "start_char_index": 20,
                        "end_char_index": 36,
                    }
                ],
            },
            {"text": ".", "type": "text"},
        ]
        ```

    ???+ example "Context management"

        Anthropic supports a context editing feature that will automatically manage the
        model's context window (e.g., by clearing tool results).

        See [Anthropic documentation](https://platform.claude.com/docs/en/build-with-claude/context-editing)
        for details and configuration options.

        ```python hl_lines="5-6"
        from langchain_anthropic import ChatAnthropic

        model = ChatAnthropic(
            model="claude-sonnet-4-5-20250929",
            betas=["context-management-2025-06-27"],
            context_management={"edits": [{"type": "clear_tool_uses_20250919"}]},
        )
        model_with_tools = model.bind_tools([{"type": "web_search_20250305", "name": "web_search"}])
        response = model_with_tools.invoke("Search for recent developments in AI")
        ```

    ???+ example "Response metadata"

        ```python
        ai_msg = model.invoke(messages)
        ai_msg.response_metadata
        ```

        ```python
        {
            "id": "msg_013xU6FHEGEq76aP4RgFerVT",
            "model": "claude-sonnet-4-5-20250929",
            "stop_reason": "end_turn",
            "stop_sequence": None,
            "usage": {"input_tokens": 25, "output_tokens": 11},
        }
        ```

    ???+ example "Extended context windows (beta)"

        Claude Sonnet 4 supports a 1-million token context window, available in beta for
        organizations in usage tier 4 and organizations with custom rate limits.

        ```python hl_lines="5"
        from langchain_anthropic import ChatAnthropic

        model = ChatAnthropic(
            model="claude-sonnet-4-5-20250929",
            betas=["context-1m-2025-08-07"],  # Enable 1M context beta
        )

        long_document = \"\"\"
        This is a very long document that would benefit from the extended 1M
        context window...
        [imagine this continues for hundreds of thousands of tokens]
        \"\"\"

        messages = [
            HumanMessage(f\"\"\"
        Please analyze this document and provide a summary:

        {long_document}

        What are the key themes and main conclusions?
        \"\"\")
        ]

        response = model.invoke(messages)
        ```

        See [Claude documentation](https://platform.claude.com/docs/en/build-with-claude/context-windows#1m-token-context-window)
        for detail.

    ???+ example "Structured output"

        ```python hl_lines="13"
        from typing import Optional
        from pydantic import BaseModel, Field


        class Joke(BaseModel):
            '''Joke to tell user.'''

            setup: str = Field(description="The setup of the joke")
            punchline: str = Field(description="The punchline to the joke")
            rating: int | None = Field(description="How funny the joke is, from 1 to 10")


        structured_model = model.with_structured_output(Joke)
        structured_model.invoke("Tell me a joke about cats")
        ```

        ```python
        Joke(
            setup="Why was the cat sitting on the computer?",
            punchline="To keep an eye on the mouse!",
            rating=None,
        )
        ```

        See [`ChatAnthropic.with_structured_output()`][langchain_anthropic.chat_models.ChatAnthropic.with_structured_output]
        for more info.

        !!! note "Native structured output"

            Anthropic supports a native structured output feature that guarantees
            responses adhere to a given schema.

            See [`ChatAnthropic.with_structured_output()`][langchain_anthropic.chat_models.ChatAnthropic.with_structured_output]
            for more info.

    ???+ example "Built-in tools"

        See LangChain [docs](https://docs.langchain.com/oss/python/integrations/chat/anthropic#built-in-tools)
        for more detail.

        ??? example "Web search"

            ```python hl_lines="5-9"
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(model="claude-3-5-haiku-20241022")

            tool = {
                "type": "web_search_20250305",
                "name": "web_search",
                "max_uses": 3,
            }
            model_with_tools = model.bind_tools([tool])

            response = model_with_tools.invoke("How do I update a web app to TypeScript 5.5?")
            ```

            See the [Claude docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/web-search-tool)
            for more info.

        ??? example "Web fetch (beta)"

            ```python hl_lines="7-11"
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(
                model="claude-3-5-haiku-20241022",
            )

            tool = {
                "type": "web_fetch_20250910",
                "name": "web_fetch",
                "max_uses": 3,
            }
            model_with_tools = model.bind_tools([tool])

            response = model_with_tools.invoke("Please analyze the content at https://example.com/article")
            ```

            !!! note "Automatic beta header"

                The required `web-fetch-2025-09-10` beta header is automatically
                appended to the request when using the `web_fetch_20250910` tool type.
                You don't need to manually specify it in the `betas` parameter.

            See the [Claude docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/web-fetch-tool)
            for more info.

        ??? example "Code execution"

            ```python hl_lines="3-6"
            model = ChatAnthropic(model="claude-sonnet-4-5-20250929")

            tool = {
                "type": "code_execution_20250522",
                "name": "code_execution",
            }
            model_with_tools = model.bind_tools([tool])

            response = model_with_tools.invoke(
                "Calculate the mean and standard deviation of [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]"
            )
            ```

            !!! note "Automatic beta header"

                The required `code-execution-2025-05-22` beta header is automatically
                appended to the request when using the `code_execution_20250522` tool
                type. You don't need to manually specify it in the `betas` parameter.

            See the [Claude docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/code-execution-tool)
            for more info.

        ??? example "Memory tool"

            ```python hl_lines="5-8"
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(model="claude-sonnet-4-5-20250929")

            tool = {
                "type": "memory_20250818",
                "name": "memory",
            }
            model_with_tools = model.bind_tools([tool])

            response = model_with_tools.invoke("What are my interests?")
            ```

            !!! note "Automatic beta header"

                The required `context-management-2025-06-27` beta header is automatically
                appended to the request when using the `memory_20250818` tool type.
                You don't need to manually specify it in the `betas` parameter.

            See the [Claude docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/memory-tool)
            for more info.

        ??? example "Remote MCP"

            ```python hl_lines="3-14 18-19"
            from langchain_anthropic import ChatAnthropic

            mcp_servers = [
                {
                    "type": "url",
                    "url": "https://mcp.deepwiki.com/mcp",
                    "name": "deepwiki",
                    "tool_configuration": {  # optional configuration
                        "enabled": True,
                        "allowed_tools": ["ask_question"],
                    },
                    "authorization_token": "PLACEHOLDER",  # optional authorization
                }
            ]

            model = ChatAnthropic(
                model="claude-sonnet-4-5-20250929",
                betas=["mcp-client-2025-04-04"],  # Enable MCP client beta
                mcp_servers=mcp_servers,  # Pass in MCP server configurations
            )

            response = model.invoke(
                "What transport protocols does the 2025-03-26 version of the MCP "
                "spec (modelcontextprotocol/modelcontextprotocol) support?"
            )
            ```

            See the [Claude docs](https://platform.claude.com/docs/en/agents-and-tools/mcp-connector)
            for more info.

        ??? example "Text editor"

            ```python hl_lines="5-8"
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(model="claude-sonnet-4-5-20250929")

            tool = {
                "type": "text_editor_20250124",
                "name": "str_replace_editor",
            }
            model_with_tools = model.bind_tools([tool])

            response = model_with_tools.invoke(
                "There's a syntax error in my primes.py file. Can you help me fix it?"
            )
            print(response.text)
            response.tool_calls
            ```

            ```txt
            I'd be happy to help you fix the syntax error in your primes.py file. First, let's look at the current content of the file to identify the error.
            ```

            ```txt
            [{'name': 'str_replace_editor',
            'args': {'command': 'view', 'path': '/repo/primes.py'},
            'id': 'toolu_01VdNgt1YV7kGfj9LFLm6HyQ',
            'type': 'tool_call'}]
            ```

            See the [Claude docs](https://platform.claude.com/docs/en/agents-and-tools/tool-use/text-editor-tool)
            for more info.
    """  # noqa: E501

    model_config = ConfigDict(
        populate_by_name=True,
    )

    model: str = Field(alias="model_name")
    """Model name to use."""

    max_tokens: int | None = Field(default=None, alias="max_tokens_to_sample")
    """Denotes the number of tokens to predict per generation."""

    temperature: float | None = None
    """A non-negative float that tunes the degree of randomness in generation."""

    top_k: int | None = None
    """Number of most likely tokens to consider at each step."""

    top_p: float | None = None
    """Total probability mass of tokens to consider at each step."""

    default_request_timeout: float | None = Field(None, alias="timeout")
    """Timeout for requests to Claude API."""

    # sdk default = 2: https://github.com/anthropics/anthropic-sdk-python?tab=readme-ov-file#retries
    max_retries: int = 2
    """Number of retries allowed for requests sent to the Claude API."""

    stop_sequences: list[str] | None = Field(None, alias="stop")
    """Default stop sequences."""

    anthropic_api_url: str | None = Field(
        alias="base_url",
        default_factory=from_env(
            ["ANTHROPIC_API_URL", "ANTHROPIC_BASE_URL"],
            default="https://api.anthropic.com",
        ),
    )
    """Base URL for API requests. Only specify if using a proxy or service emulator.

    If a value isn't passed in, will attempt to read the value first from
    `ANTHROPIC_API_URL` and if that is not set, `ANTHROPIC_BASE_URL`.
    """

    anthropic_api_key: SecretStr = Field(
        alias="api_key",
        default_factory=secret_from_env("ANTHROPIC_API_KEY", default=""),
    )
    """Automatically read from env var `ANTHROPIC_API_KEY` if not provided."""

    anthropic_proxy: str | None = Field(
        default_factory=from_env("ANTHROPIC_PROXY", default=None)
    )
    """Proxy to use for the Anthropic clients, will be used for every API call.

    If not provided, will attempt to read from the `ANTHROPIC_PROXY` environment
    variable.
    """

    default_headers: Mapping[str, str] | None = None
    """Headers to pass to the Anthropic clients, will be used for every API call."""

    betas: list[str] | None = None
    """List of beta features to enable. If specified, invocations will be routed
    through `client.beta.messages.create`.

    Example: `#!python betas=["mcp-client-2025-04-04"]`
    """
    # Can also be passed in w/ model_kwargs, but having it as a param makes better devx
    #
    # Precedence order:
    # 1. Call-time kwargs (e.g., llm.invoke(..., betas=[...]))
    # 2. model_kwargs (e.g., ChatAnthropic(model_kwargs={"betas": [...]}))
    # 3. Direct parameter (e.g., ChatAnthropic(betas=[...]))

    model_kwargs: dict[str, Any] = Field(default_factory=dict)

    streaming: bool = False
    """Whether to use streaming or not."""

    stream_usage: bool = True
    """Whether to include usage metadata in streaming output.

    If `True`, additional message chunks will be generated during the stream including
    usage metadata.
    """

    thinking: dict[str, Any] | None = Field(default=None)
    """Parameters for Claude reasoning,

    e.g., `#!python {"type": "enabled", "budget_tokens": 10_000}`
    """

    mcp_servers: list[dict[str, Any]] | None = None
    """List of MCP servers to use for the request.

    Example: `#!python mcp_servers=[{"type": "url", "url": "https://mcp.example.com/mcp",
    "name": "example-mcp"}]`

    !!! note

        This feature requires the beta header `'mcp-client-2025-11-20'` to be set in
        [`betas`][langchain_anthropic.chat_models.ChatAnthropic.betas].
    """

    context_management: dict[str, Any] | None = None
    """Configuration for
    [context management](https://platform.claude.com/docs/en/build-with-claude/context-editing).

    !!! note

        This feature requires the beta header `'context-management-2025-06-27'` to be
        set in [`betas`][langchain_anthropic.chat_models.ChatAnthropic.betas].
    """

    @property
    def _llm_type(self) -> str:
        """Return type of chat model."""
        return "anthropic-chat"

    @property
    def lc_secrets(self) -> dict[str, str]:
        """Return a mapping of secret keys to environment variables."""
        return {
            "anthropic_api_key": "ANTHROPIC_API_KEY",
            "mcp_servers": "ANTHROPIC_MCP_SERVERS",
        }

    @classmethod
    def is_lc_serializable(cls) -> bool:
        """Whether the class is serializable in langchain."""
        return True

    @classmethod
    def get_lc_namespace(cls) -> list[str]:
        """Get the namespace of the LangChain object.

        Returns:
            `["langchain", "chat_models", "anthropic"]`
        """
        return ["langchain", "chat_models", "anthropic"]

    @property
    def _identifying_params(self) -> dict[str, Any]:
        """Get the identifying parameters."""
        return {
            "model": self.model,
            "max_tokens": self.max_tokens,
            "temperature": self.temperature,
            "top_k": self.top_k,
            "top_p": self.top_p,
            "model_kwargs": self.model_kwargs,
            "streaming": self.streaming,
            "max_retries": self.max_retries,
            "default_request_timeout": self.default_request_timeout,
            "thinking": self.thinking,
        }

    def _get_ls_params(
        self,
        stop: list[str] | None = None,
        **kwargs: Any,
    ) -> LangSmithParams:
        """Get standard params for tracing."""
        params = self._get_invocation_params(stop=stop, **kwargs)
        ls_params = LangSmithParams(
            ls_provider="anthropic",
            ls_model_name=params.get("model", self.model),
            ls_model_type="chat",
            ls_temperature=params.get("temperature", self.temperature),
        )
        if ls_max_tokens := params.get("max_tokens", self.max_tokens):
            ls_params["ls_max_tokens"] = ls_max_tokens
        if ls_stop := stop or params.get("stop", None):
            ls_params["ls_stop"] = ls_stop
        return ls_params

    @model_validator(mode="before")
    @classmethod
    def set_default_max_tokens(cls, values: dict[str, Any]) -> Any:
        """Set default `max_tokens`."""
        if values.get("max_tokens") is None:
            model = values.get("model") or values.get("model_name")
            values["max_tokens"] = _default_max_tokens_for(model)
        return values

    @model_validator(mode="before")
    @classmethod
    def build_extra(cls, values: dict) -> Any:
        """Build model kwargs."""
        all_required_field_names = get_pydantic_field_names(cls)
        return _build_model_kwargs(values, all_required_field_names)

    @model_validator(mode="after")
    def _set_model_profile(self) -> Self:
        """Set model profile if not overridden."""
        if self.profile is None:
            self.profile = _get_default_model_profile(self.model)
        return self

    @cached_property
    def _client_params(self) -> dict[str, Any]:
        client_params: dict[str, Any] = {
            "api_key": self.anthropic_api_key.get_secret_value(),
            "base_url": self.anthropic_api_url,
            "max_retries": self.max_retries,
            "default_headers": (self.default_headers or None),
        }
        # value <= 0 indicates the param should be ignored. None is a meaningful value
        # for Anthropic client and treated differently than not specifying the param at
        # all.
        if self.default_request_timeout is None or self.default_request_timeout > 0:
            client_params["timeout"] = self.default_request_timeout

        return client_params

    @cached_property
    def _client(self) -> anthropic.Client:
        client_params = self._client_params
        http_client_params = {"base_url": client_params["base_url"]}
        if "timeout" in client_params:
            http_client_params["timeout"] = client_params["timeout"]
        if self.anthropic_proxy:
            http_client_params["anthropic_proxy"] = self.anthropic_proxy
        http_client = _get_default_httpx_client(**http_client_params)
        params = {
            **client_params,
            "http_client": http_client,
        }
        return anthropic.Client(**params)

    @cached_property
    def _async_client(self) -> anthropic.AsyncClient:
        client_params = self._client_params
        http_client_params = {"base_url": client_params["base_url"]}
        if "timeout" in client_params:
            http_client_params["timeout"] = client_params["timeout"]
        if self.anthropic_proxy:
            http_client_params["anthropic_proxy"] = self.anthropic_proxy
        http_client = _get_default_async_httpx_client(**http_client_params)
        params = {
            **client_params,
            "http_client": http_client,
        }
        return anthropic.AsyncClient(**params)

    def _get_request_payload(
        self,
        input_: LanguageModelInput,
        *,
        stop: list[str] | None = None,
        **kwargs: dict,
    ) -> dict:
        """Get the request payload for the Anthropic API."""
        messages = self._convert_input(input_).to_messages()

        for idx, message in enumerate(messages):
            # Translate v1 content
            if (
                isinstance(message, AIMessage)
                and message.response_metadata.get("output_version") == "v1"
            ):
                tcs: list[types.ToolCall] = [
                    {
                        "type": "tool_call",
                        "name": tool_call["name"],
                        "args": tool_call["args"],
                        "id": tool_call.get("id"),
                    }
                    for tool_call in message.tool_calls
                ]
                messages[idx] = message.model_copy(
                    update={
                        "content": _convert_from_v1_to_anthropic(
                            cast(list[types.ContentBlock], message.content),
                            tcs,
                            message.response_metadata.get("model_provider"),
                        )
                    }
                )

        system, formatted_messages = _format_messages(messages)

        # If cache_control is provided in kwargs, add it to last message
        # and content block.
        if "cache_control" in kwargs and formatted_messages:
            if isinstance(formatted_messages[-1]["content"], list):
                formatted_messages[-1]["content"][-1]["cache_control"] = kwargs.pop(
                    "cache_control"
                )
            elif isinstance(formatted_messages[-1]["content"], str):
                formatted_messages[-1]["content"] = [
                    {
                        "type": "text",
                        "text": formatted_messages[-1]["content"],
                        "cache_control": kwargs.pop("cache_control"),
                    }
                ]
            else:
                pass

        # If cache_control remains in kwargs, it would be passed as a top-level param
        # to the API, but Anthropic expects it nested within a message
        _ = kwargs.pop("cache_control", None)

        payload = {
            "model": self.model,
            "max_tokens": self.max_tokens,
            "messages": formatted_messages,
            "temperature": self.temperature,
            "top_k": self.top_k,
            "top_p": self.top_p,
            "stop_sequences": stop or self.stop_sequences,
            "betas": self.betas,
            "context_management": self.context_management,
            "mcp_servers": self.mcp_servers,
            "system": system,
            **self.model_kwargs,
            **kwargs,
        }
        if self.thinking is not None:
            payload["thinking"] = self.thinking

        if "response_format" in payload:
            # response_format present when using agents.create_agent's ProviderStrategy
            # ---
            # ProviderStrategy converts to OpenAI-style format, which passes kwargs to
            # ChatAnthropic, ending up in our payload
            response_format = payload.pop("response_format")
            if (
                isinstance(response_format, dict)
                and response_format.get("type") == "json_schema"
                and "schema" in response_format.get("json_schema", {})
            ):
                response_format = cast(dict, response_format["json_schema"]["schema"])
            # Convert OpenAI-style response_format to Anthropic's output_format
            payload["output_format"] = _convert_to_anthropic_output_format(
                response_format
            )

        if "output_format" in payload:
            # Native structured output requires the structured outputs beta
            if payload["betas"]:
                if "structured-outputs-2025-11-13" not in payload["betas"]:
                    # Merge with existing betas
                    payload["betas"] = [
                        *payload["betas"],
                        "structured-outputs-2025-11-13",
                    ]
            else:
                payload["betas"] = ["structured-outputs-2025-11-13"]

        # Check if any tools have strict mode enabled
        if "tools" in payload and isinstance(payload["tools"], list):
            has_strict_tool = any(
                isinstance(tool, dict) and tool.get("strict") is True
                for tool in payload["tools"]
            )
            if has_strict_tool:
                # Strict tool use requires the structured outputs beta
                if payload["betas"]:
                    if "structured-outputs-2025-11-13" not in payload["betas"]:
                        # Merge with existing betas
                        payload["betas"] = [
                            *payload["betas"],
                            "structured-outputs-2025-11-13",
                        ]
                else:
                    payload["betas"] = ["structured-outputs-2025-11-13"]

            # Auto-append required betas for specific tool types
            for tool in payload["tools"]:
                if isinstance(tool, dict) and "type" in tool:
                    tool_type = tool["type"]
                    if tool_type in _TOOL_TYPE_TO_BETA:
                        required_beta = _TOOL_TYPE_TO_BETA[tool_type]
                        if payload["betas"]:
                            # Append to existing betas if not already present
                            if required_beta not in payload["betas"]:
                                payload["betas"] = [*payload["betas"], required_beta]
                        else:
                            payload["betas"] = [required_beta]

        # Auto-append required beta for mcp_servers
        if payload.get("mcp_servers"):
            required_beta = "mcp-client-2025-11-20"
            if payload["betas"]:
                # Append to existing betas if not already present
                if required_beta not in payload["betas"]:
                    payload["betas"] = [*payload["betas"], required_beta]
            else:
                payload["betas"] = [required_beta]

        return {k: v for k, v in payload.items() if v is not None}

    def _create(self, payload: dict) -> Any:
        if "betas" in payload:
            return self._client.beta.messages.create(**payload)
        return self._client.messages.create(**payload)

    async def _acreate(self, payload: dict) -> Any:
        if "betas" in payload:
            return await self._async_client.beta.messages.create(**payload)
        return await self._async_client.messages.create(**payload)

    def _stream(
        self,
        messages: list[BaseMessage],
        stop: list[str] | None = None,
        run_manager: CallbackManagerForLLMRun | None = None,
        *,
        stream_usage: bool | None = None,
        **kwargs: Any,
    ) -> Iterator[ChatGenerationChunk]:
        if stream_usage is None:
            stream_usage = self.stream_usage
        kwargs["stream"] = True
        payload = self._get_request_payload(messages, stop=stop, **kwargs)
        try:
            stream = self._create(payload)
            coerce_content_to_string = (
                not _tools_in_params(payload)
                and not _documents_in_params(payload)
                and not _thinking_in_params(payload)
            )
            block_start_event = None
            for event in stream:
                msg, block_start_event = _make_message_chunk_from_anthropic_event(
                    event,
                    stream_usage=stream_usage,
                    coerce_content_to_string=coerce_content_to_string,
                    block_start_event=block_start_event,
                )
                if msg is not None:
                    chunk = ChatGenerationChunk(message=msg)
                    if run_manager and isinstance(msg.content, str):
                        run_manager.on_llm_new_token(msg.content, chunk=chunk)
                    yield chunk
        except anthropic.BadRequestError as e:
            _handle_anthropic_bad_request(e)

    async def _astream(
        self,
        messages: list[BaseMessage],
        stop: list[str] | None = None,
        run_manager: AsyncCallbackManagerForLLMRun | None = None,
        *,
        stream_usage: bool | None = None,
        **kwargs: Any,
    ) -> AsyncIterator[ChatGenerationChunk]:
        if stream_usage is None:
            stream_usage = self.stream_usage
        kwargs["stream"] = True
        payload = self._get_request_payload(messages, stop=stop, **kwargs)
        try:
            stream = await self._acreate(payload)
            coerce_content_to_string = (
                not _tools_in_params(payload)
                and not _documents_in_params(payload)
                and not _thinking_in_params(payload)
            )
            block_start_event = None
            async for event in stream:
                msg, block_start_event = _make_message_chunk_from_anthropic_event(
                    event,
                    stream_usage=stream_usage,
                    coerce_content_to_string=coerce_content_to_string,
                    block_start_event=block_start_event,
                )
                if msg is not None:
                    chunk = ChatGenerationChunk(message=msg)
                    if run_manager and isinstance(msg.content, str):
                        await run_manager.on_llm_new_token(msg.content, chunk=chunk)
                    yield chunk
        except anthropic.BadRequestError as e:
            _handle_anthropic_bad_request(e)

    def _format_output(self, data: Any, **kwargs: Any) -> ChatResult:
        """Format the output from the Anthropic API to LC."""
        data_dict = data.model_dump()
        content = data_dict["content"]

        # Remove citations if they are None - introduced in anthropic sdk 0.45
        for block in content:
            if (
                isinstance(block, dict)
                and "citations" in block
                and block["citations"] is None
            ):
                block.pop("citations")
            if (
                isinstance(block, dict)
                and block.get("type") == "thinking"
                and "text" in block
                and block["text"] is None
            ):
                block.pop("text")

        llm_output = {
            k: v for k, v in data_dict.items() if k not in ("content", "role", "type")
        }
        response_metadata = {"model_provider": "anthropic"}
        if "model" in llm_output and "model_name" not in llm_output:
            llm_output["model_name"] = llm_output["model"]
        if (
            len(content) == 1
            and content[0]["type"] == "text"
            and not content[0].get("citations")
        ):
            msg = AIMessage(
                content=content[0]["text"], response_metadata=response_metadata
            )
        elif any(block["type"] == "tool_use" for block in content):
            tool_calls = extract_tool_calls(content)
            msg = AIMessage(
                content=content,
                tool_calls=tool_calls,
                response_metadata=response_metadata,
            )
        else:
            msg = AIMessage(content=content, response_metadata=response_metadata)
        msg.usage_metadata = _create_usage_metadata(data.usage)
        return ChatResult(
            generations=[ChatGeneration(message=msg)],
            llm_output=llm_output,
        )

    def _generate(
        self,
        messages: list[BaseMessage],
        stop: list[str] | None = None,
        run_manager: CallbackManagerForLLMRun | None = None,
        **kwargs: Any,
    ) -> ChatResult:
        payload = self._get_request_payload(messages, stop=stop, **kwargs)
        try:
            data = self._create(payload)
        except anthropic.BadRequestError as e:
            _handle_anthropic_bad_request(e)
        return self._format_output(data, **kwargs)

    async def _agenerate(
        self,
        messages: list[BaseMessage],
        stop: list[str] | None = None,
        run_manager: AsyncCallbackManagerForLLMRun | None = None,
        **kwargs: Any,
    ) -> ChatResult:
        payload = self._get_request_payload(messages, stop=stop, **kwargs)
        try:
            data = await self._acreate(payload)
        except anthropic.BadRequestError as e:
            _handle_anthropic_bad_request(e)
        return self._format_output(data, **kwargs)

    def _get_llm_for_structured_output_when_thinking_is_enabled(
        self,
        schema: dict | type,
        formatted_tool: AnthropicTool,
    ) -> Runnable[LanguageModelInput, BaseMessage]:
        thinking_admonition = (
            "Anthropic structured output relies on forced tool calling, "
            "which is not supported when `thinking` is enabled. This method will raise "
            "langchain_core.exceptions.OutputParserException if tool calls are not "
            "generated. Consider disabling `thinking` or adjust your prompt to ensure "
            "the tool is called."
        )
        warnings.warn(thinking_admonition, stacklevel=2)
        llm = self.bind_tools(
            [schema],
            ls_structured_output_format={
                "kwargs": {"method": "function_calling"},
                "schema": formatted_tool,
            },
        )

        def _raise_if_no_tool_calls(message: AIMessage) -> AIMessage:
            if not message.tool_calls:
                raise OutputParserException(thinking_admonition)
            return message

        return llm | _raise_if_no_tool_calls

    def bind_tools(
        self,
        tools: Sequence[dict[str, Any] | type | Callable | BaseTool],
        *,
        tool_choice: dict[str, str] | str | None = None,
        parallel_tool_calls: bool | None = None,
        strict: bool | None = None,
        **kwargs: Any,
    ) -> Runnable[LanguageModelInput, AIMessage]:
        r"""Bind tool-like objects to this chat model.

        Args:
            tools: A list of tool definitions to bind to this chat model.

                Supports Anthropic format tool schemas and any tool definition handled
                by `langchain_core.utils.function_calling.convert_to_openai_tool`.
            tool_choice: Which tool to require the model to call. Options are:

                - Name of the tool as a string or as dict `{"type": "tool", "name": "<<tool_name>>"}`: calls corresponding tool
                - `'auto'`, `{"type: "auto"}`, or `None`: automatically selects a tool (including no tool)
                - `'any'` or `{"type: "any"}`: force at least one tool to be called
            parallel_tool_calls: Set to `False` to disable parallel tool use.

                Defaults to `None` (no specification, which allows parallel tool use).

                !!! version-added "Added in `langchain-anthropic` 0.3.2"
            strict: If `True`, Claude's schema adherence is applied to tool calls.

                See the [Claude docs](https://platform.claude.com/docs/en/build-with-claude/structured-outputs#when-to-use-json-outputs-vs-strict-tool-use).
            kwargs: Any additional parameters are passed directly to `bind`.

        ???+ example
            ```python
            from langchain_anthropic import ChatAnthropic
            from pydantic import BaseModel, Field


            class GetWeather(BaseModel):
                '''Get the current weather in a given location'''

                location: str = Field(..., description="The city and state, e.g. San Francisco, CA")


            class GetPrice(BaseModel):
                '''Get the price of a specific product.'''

                product: str = Field(..., description="The product to look up.")


            model = ChatAnthropic(model="claude-sonnet-4-5-20250929", temperature=0)
            model_with_tools = model.bind_tools([GetWeather, GetPrice])
            model_with_tools.invoke(
                "What is the weather like in San Francisco",
            )
            # -> AIMessage(
            #     content=[
            #         {'text': '<thinking>\nBased on the user\'s question, the relevant function to call is GetWeather, which requires the "location" parameter.\n\nThe user has directly specified the location as "San Francisco". Since San Francisco is a well known city, I can reasonably infer they mean San Francisco, CA without needing the state specified.\n\nAll the required parameters are provided, so I can proceed with the API call.\n</thinking>', 'type': 'text'},
            #         {'text': None, 'type': 'tool_use', 'id': 'toolu_01SCgExKzQ7eqSkMHfygvYuu', 'name': 'GetWeather', 'input': {'location': 'San Francisco, CA'}}
            #     ],
            #     response_metadata={'id': 'msg_01GM3zQtoFv8jGQMW7abLnhi', 'model': 'claude-sonnet-4-5-20250929', 'stop_reason': 'tool_use', 'stop_sequence': None, 'usage': {'input_tokens': 487, 'output_tokens': 145}},
            #     id='run-87b1331e-9251-4a68-acef-f0a018b639cc-0'
            # )
            ```

        ??? example "Force tool call with tool_choice `'any'`"

            ```python
            from langchain_anthropic import ChatAnthropic
            from pydantic import BaseModel, Field


            class GetWeather(BaseModel):
                '''Get the current weather in a given location'''

                location: str = Field(..., description="The city and state, e.g. San Francisco, CA")


            class GetPrice(BaseModel):
                '''Get the price of a specific product.'''

                product: str = Field(..., description="The product to look up.")


            model = ChatAnthropic(model="claude-sonnet-4-5-20250929", temperature=0)
            model_with_tools = model.bind_tools([GetWeather, GetPrice], tool_choice="any")
            model_with_tools.invoke(
                "what is the weather like in San Francisco",
            )
            ```

        ??? example "Force specific tool call with `tool_choice` `'<name_of_tool>'`"

            ```python
            from langchain_anthropic import ChatAnthropic
            from pydantic import BaseModel, Field


            class GetWeather(BaseModel):
                '''Get the current weather in a given location'''

                location: str = Field(..., description="The city and state, e.g. San Francisco, CA")


            class GetPrice(BaseModel):
                '''Get the price of a specific product.'''

                product: str = Field(..., description="The product to look up.")


            model = ChatAnthropic(model="claude-sonnet-4-5-20250929", temperature=0)
            model_with_tools = model.bind_tools([GetWeather, GetPrice], tool_choice="GetWeather")
            model_with_tools.invoke("What is the weather like in San Francisco")
            ```

        ??? example "Cache specific tools"

            ```python
            from langchain_anthropic import ChatAnthropic, convert_to_anthropic_tool
            from pydantic import BaseModel, Field


            class GetWeather(BaseModel):
                '''Get the current weather in a given location'''

                location: str = Field(..., description="The city and state, e.g. San Francisco, CA")


            class GetPrice(BaseModel):
                '''Get the price of a specific product.'''

                product: str = Field(..., description="The product to look up.")


            # We'll convert our pydantic class to the anthropic tool format
            # before passing to bind_tools so that we can set the 'cache_control'
            # field on our tool.
            cached_price_tool = convert_to_anthropic_tool(GetPrice)

            # Currently the only supported "cache_control" value is {"type": "ephemeral"}
            cached_price_tool["cache_control"] = {"type": "ephemeral"}

            # Need to pass in extra headers to enable use of the beta cache control API.
            model = ChatAnthropic(
                model="claude-sonnet-4-5-20250929",
                temperature=0,
            )
            model_with_tools = model.bind_tools([GetWeather, cached_price_tool])
            model_with_tools.invoke("What is the weather like in San Francisco")
            ```

            This outputs:

            ```python
            AIMessage(
                content=[
                    {
                        "text": "Certainly! I can help you find out the current weather in San Francisco. To get this information, I'll use the GetWeather function. Let me fetch that data for you right away.",
                        "type": "text",
                    },
                    {
                        "id": "toolu_01TS5h8LNo7p5imcG7yRiaUM",
                        "input": {"location": "San Francisco, CA"},
                        "name": "GetWeather",
                        "type": "tool_use",
                    },
                ],
                response_metadata={
                    "id": "msg_01Xg7Wr5inFWgBxE5jH9rpRo",
                    "model": "claude-sonnet-4-5-20250929",
                    "stop_reason": "tool_use",
                    "stop_sequence": None,
                    "usage": {
                        "input_tokens": 171,
                        "output_tokens": 96,
                        "cache_creation_input_tokens": 1470,
                        "cache_read_input_tokens": 0,
                    },
                },
                id="run-b36a5b54-5d69-470e-a1b0-b932d00b089e-0",
                tool_calls=[
                    {
                        "name": "GetWeather",
                        "args": {"location": "San Francisco, CA"},
                        "id": "toolu_01TS5h8LNo7p5imcG7yRiaUM",
                        "type": "tool_call",
                    }
                ],
                usage_metadata={
                    "input_tokens": 171,
                    "output_tokens": 96,
                    "total_tokens": 267,
                },
            )
            ```

            If we invoke the tool again, we can see that the "usage" information in the `AIMessage.response_metadata` shows that we had a cache hit:

            ```python hl_lines="23"
            AIMessage(
                content=[
                    {
                        "text": "To get the current weather in San Francisco, I can use the GetWeather function. Let me check that for you.",
                        "type": "text",
                    },
                    {
                        "id": "toolu_01HtVtY1qhMFdPprx42qU2eA",
                        "input": {"location": "San Francisco, CA"},
                        "name": "GetWeather",
                        "type": "tool_use",
                    },
                ],
                response_metadata={
                    "id": "msg_016RfWHrRvW6DAGCdwB6Ac64",
                    "model": "claude-sonnet-4-5-20250929",
                    "stop_reason": "tool_use",
                    "stop_sequence": None,
                    "usage": {
                        "input_tokens": 171,
                        "output_tokens": 82,
                        "cache_creation_input_tokens": 0,
                        "cache_read_input_tokens": 1470,
                    },
                },
                id="run-88b1f825-dcb7-4277-ac27-53df55d22001-0",
                tool_calls=[
                    {
                        "name": "GetWeather",
                        "args": {"location": "San Francisco, CA"},
                        "id": "toolu_01HtVtY1qhMFdPprx42qU2eA",
                        "type": "tool_call",
                    }
                ],
                usage_metadata={
                    "input_tokens": 171,
                    "output_tokens": 82,
                    "total_tokens": 253,
                },
            )
            ```

        ??? example "Strict tool use"

            Strict tool use guarantees that tool names and arguments are validated
            and correctly typed.

            !!! note

                Strict tool use requires:

                - Claude Sonnet 4.5 or Opus 4.1
                - `langchain-anthropic>=1.1.0`

            To enable strict tool use, specify `strict=True` when calling `bind_tools`.

            ```python hl_lines="11"
            from langchain_anthropic import ChatAnthropic

            model = ChatAnthropic(
                model="claude-sonnet-4-5",
            )

            def get_weather(location: str) -> str:
                \"\"\"Get the weather at a location.\"\"\"
                return "It's sunny."

            model_with_tools = model.bind_tools([get_weather], strict=True)
            ```

            !!! note "Automatic beta header"

                The required `structured-outputs-2025-11-13` beta header is
                automatically appended to the request when using `strict=True`, so you
                don't need to manually specify it in the `betas` parameter.

            See LangChain [docs](https://docs.langchain.com/oss/python/integrations/chat/anthropic#strict-tool-use)
            for more detail.
        """  # noqa: E501
        formatted_tools = [
            tool
            if _is_builtin_tool(tool)
            else convert_to_anthropic_tool(tool, strict=strict)
            for tool in tools
        ]
        if not tool_choice:
            pass
        elif isinstance(tool_choice, dict):
            kwargs["tool_choice"] = tool_choice
        elif isinstance(tool_choice, str) and tool_choice in ("any", "auto"):
            kwargs["tool_choice"] = {"type": tool_choice}
        elif isinstance(tool_choice, str):
            kwargs["tool_choice"] = {"type": "tool", "name": tool_choice}
        else:
            msg = (
                f"Unrecognized 'tool_choice' type {tool_choice=}. Expected dict, "
                f"str, or None."
            )
            raise ValueError(
                msg,
            )

        if parallel_tool_calls is not None:
            disable_parallel_tool_use = not parallel_tool_calls
            if "tool_choice" in kwargs:
                kwargs["tool_choice"]["disable_parallel_tool_use"] = (
                    disable_parallel_tool_use
                )
            else:
                kwargs["tool_choice"] = {
                    "type": "auto",
                    "disable_parallel_tool_use": disable_parallel_tool_use,
                }

        return self.bind(tools=formatted_tools, **kwargs)

    def with_structured_output(
        self,
        schema: dict | type,
        *,
        include_raw: bool = False,
        method: Literal["function_calling", "json_schema"] = "function_calling",
        **kwargs: Any,
    ) -> Runnable[LanguageModelInput, dict | BaseModel]:
        """Model wrapper that returns outputs formatted to match the given schema.

        Args:
            schema: The output schema. Can be passed in as:

                - An Anthropic tool schema,
                - An OpenAI function/tool schema,
                - A JSON Schema,
                - A `TypedDict` class,
                - Or a Pydantic class.

                If `schema` is a Pydantic class then the model output will be a
                Pydantic instance of that class, and the model-generated fields will be
                validated by the Pydantic class. Otherwise the model output will be a
                dict and will not be validated.

                See `langchain_core.utils.function_calling.convert_to_openai_tool` for
                more on how to properly specify types and descriptions of schema fields
                when specifying a Pydantic or `TypedDict` class.
            include_raw:
                If `False` then only the parsed structured output is returned.

                If an error occurs during model output parsing it will be raised.

                If `True` then both the raw model response (a `BaseMessage`) and the
                parsed model response will be returned.

                If an error occurs during output parsing it will be caught and returned
                as well.

                The final output is always a `dict` with keys `'raw'`, `'parsed'`, and
                `'parsing_error'`.
            method: The structured output method to use. Options are:

                - `'function_calling'` (default): Use forced tool calling to get
                    structured output.
                - `'json_schema'`: Use Claude's dedicated
                    [structured output](https://platform.claude.com/docs/en/build-with-claude/structured-outputs)
                    feature.

            kwargs: Additional keyword arguments are ignored.

        Returns:
            A `Runnable` that takes same inputs as a
                `langchain_core.language_models.chat.BaseChatModel`.

                If `include_raw` is `False` and `schema` is a Pydantic class, `Runnable`
                outputs an instance of `schema` (i.e., a Pydantic object). Otherwise, if
                `include_raw` is `False` then `Runnable` outputs a `dict`.

                If `include_raw` is `True`, then `Runnable` outputs a `dict` with keys:

                - `'raw'`: `BaseMessage`
                - `'parsed'`: `None` if there was a parsing error, otherwise the type
                    depends on the `schema` as described above.
                - `'parsing_error'`: `BaseException | None`

        ??? example "Pydantic schema (`include_raw=False`)"

            ```python
            from langchain_anthropic import ChatAnthropic
            from pydantic import BaseModel


            class AnswerWithJustification(BaseModel):
                '''An answer to the user question along with justification for the answer.'''

                answer: str
                justification: str


            model = ChatAnthropic(model="claude-sonnet-4-5-20250929", temperature=0)
            structured_model = model.with_structured_output(AnswerWithJustification)

            structured_model.invoke("What weighs more a pound of bricks or a pound of feathers")
            # -> AnswerWithJustification(
            #     answer='They weigh the same',
            #     justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
            # )
            ```

        ??? example "Pydantic schema (`include_raw=True`)"

            ```python
            from langchain_anthropic import ChatAnthropic
            from pydantic import BaseModel


            class AnswerWithJustification(BaseModel):
                '''An answer to the user question along with justification for the answer.'''

                answer: str
                justification: str


            model = ChatAnthropic(model="claude-sonnet-4-5-20250929", temperature=0)
            structured_model = model.with_structured_output(AnswerWithJustification, include_raw=True)

            structured_model.invoke("What weighs more a pound of bricks or a pound of feathers")
            # -> {
            #     'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
            #     'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
            #     'parsing_error': None
            # }
            ```

        ??? example "Dictionary schema (`include_raw=False`)"

            ```python
            from langchain_anthropic import ChatAnthropic

            schema = {
                "name": "AnswerWithJustification",
                "description": "An answer to the user question along with justification for the answer.",
                "input_schema": {
                    "type": "object",
                    "properties": {
                        "answer": {"type": "string"},
                        "justification": {"type": "string"},
                    },
                    "required": ["answer", "justification"],
                },
            }
            model = ChatAnthropic(model="claude-sonnet-4-5-20250929", temperature=0)
            structured_model = model.with_structured_output(schema)

            structured_model.invoke("What weighs more a pound of bricks or a pound of feathers")
            # -> {
            #     'answer': 'They weigh the same',
            #     'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
            # }
            ```

        ??? example "Native structured output with `method='json_schema'`"

            Anthropic supports a native structured output feature that guarantees
            responses adhere to a given schema.

            !!! note

                Native structured output requires:

                - Claude Sonnet 4.5 or Opus 4.1
                - `langchain-anthropic>=1.1.0`

            To enable native structured output, specify `method="json_schema"` when
            calling `with_structured_output`. (Under the hood, LangChain will
            append the required `structured-outputs-2025-11-13` beta header)

            ```python hl_lines="13"
            from langchain_anthropic import ChatAnthropic
            from pydantic import BaseModel, Field

            model = ChatAnthropic(model="claude-sonnet-4-5")

            class Movie(BaseModel):
                \"\"\"A movie with details.\"\"\"
                title: str = Field(..., description="The title of the movie")
                year: int = Field(..., description="The year the movie was released")
                director: str = Field(..., description="The director of the movie")
                rating: float = Field(..., description="The movie's rating out of 10")

            model_with_structure = model.with_structured_output(Movie, method="json_schema")
            response = model_with_structure.invoke("Provide details about the movie Inception")
            print(response)
            # -> Movie(title="Inception", year=2010, director="Christopher Nolan", rating=8.8)
            ```
        """  # noqa: E501
        if method == "json_mode":
            warning_message = (
                "Unrecognized structured output method 'json_mode'. Defaulting to "
                "'json_schema' method."
            )
            warnings.warn(warning_message, stacklevel=2)
            method = "json_schema"

        if method == "function_calling":
            formatted_tool = convert_to_anthropic_tool(schema)
            tool_name = formatted_tool["name"]
            if self.thinking is not None and self.thinking.get("type") == "enabled":
                llm = self._get_llm_for_structured_output_when_thinking_is_enabled(
                    schema,
                    formatted_tool,
                )
            else:
                llm = self.bind_tools(
                    [schema],
                    tool_choice=tool_name,
                    ls_structured_output_format={
                        "kwargs": {"method": "function_calling"},
                        "schema": formatted_tool,
                    },
                )

            if isinstance(schema, type) and is_basemodel_subclass(schema):
                output_parser: OutputParserLike = PydanticToolsParser(
                    tools=[schema],
                    first_tool_only=True,
                )
            else:
                output_parser = JsonOutputKeyToolsParser(
                    key_name=tool_name,
                    first_tool_only=True,
                )
        elif method == "json_schema":
            llm = self.bind(
                output_format=_convert_to_anthropic_output_format(schema),
                ls_structured_output_format={
                    "kwargs": {"method": "json_schema"},
                    "schema": convert_to_openai_tool(schema),
                },
            )
            if isinstance(schema, type) and is_basemodel_subclass(schema):
                output_parser = PydanticOutputParser(pydantic_object=schema)
            else:
                output_parser = JsonOutputParser()
        else:
            error_message = (
                f"Unrecognized structured output method '{method}'. "
                f"Expected 'function_calling' or 'json_schema'."
            )
            raise ValueError(error_message)

        if include_raw:
            parser_assign = RunnablePassthrough.assign(
                parsed=itemgetter("raw") | output_parser,
                parsing_error=lambda _: None,
            )
            parser_none = RunnablePassthrough.assign(parsed=lambda _: None)
            parser_with_fallback = parser_assign.with_fallbacks(
                [parser_none],
                exception_key="parsing_error",
            )
            return RunnableMap(raw=llm) | parser_with_fallback
        return llm | output_parser

    def get_num_tokens_from_messages(
        self,
        messages: list[BaseMessage],
        tools: Sequence[dict[str, Any] | type | Callable | BaseTool] | None = None,
        **kwargs: Any,
    ) -> int:
        """Count tokens in a sequence of input messages.

        Args:
            messages: The message inputs to tokenize.
            tools: If provided, sequence of `dict`, `BaseModel`, function, or `BaseTool`
                objects to be converted to tool schemas.
            kwargs: Additional keyword arguments are passed to the Anthropic
                `messages.count_tokens` method.

        ???+ example "Basic usage"

            ```python
            from langchain_anthropic import ChatAnthropic
            from langchain_core.messages import HumanMessage, SystemMessage

            model = ChatAnthropic(model="claude-sonnet-4-5-20250929")

            messages = [
                SystemMessage(content="You are a scientist"),
                HumanMessage(content="Hello, Claude"),
            ]
            model.get_num_tokens_from_messages(messages)
            ```

            ```txt
            14
            ```

        ??? example "Pass tool schemas"

            ```python
            from langchain_anthropic import ChatAnthropic
            from langchain_core.messages import HumanMessage
            from langchain_core.tools import tool

            model = ChatAnthropic(model="claude-sonnet-4-5-20250929")

            @tool(parse_docstring=True)
            def get_weather(location: str) -> str:
                \"\"\"Get the current weather in a given location

                Args:
                    location: The city and state, e.g. San Francisco, CA
                \"\"\"
                return "Sunny"

            messages = [
                HumanMessage(content="What's the weather like in San Francisco?"),
            ]
            model.get_num_tokens_from_messages(messages, tools=[get_weather])
            ```

            ```txt
            403
            ```

        !!! warning "Behavior changed in `langchain-anthropic` 0.3.0"

            Uses Anthropic's [token counting API](https://platform.claude.com/docs/en/build-with-claude/token-counting) to count tokens in messages.

        """  # noqa: D214,E501
        formatted_system, formatted_messages = _format_messages(messages)
        if isinstance(formatted_system, str):
            kwargs["system"] = formatted_system
        if tools:
            kwargs["tools"] = [convert_to_anthropic_tool(tool) for tool in tools]
        if self.context_management is not None:
            kwargs["context_management"] = self.context_management

        if self.betas is not None:
            beta_response = self._client.beta.messages.count_tokens(
                betas=self.betas,
                model=self.model,
                messages=formatted_messages,  # type: ignore[arg-type]
                **kwargs,
            )
            return beta_response.input_tokens
        response = self._client.messages.count_tokens(
            model=self.model,
            messages=formatted_messages,  # type: ignore[arg-type]
            **kwargs,
        )
        return response.input_tokens


def convert_to_anthropic_tool(
    tool: dict[str, Any] | type | Callable | BaseTool,
    *,
    strict: bool | None = None,
) -> AnthropicTool:
    """Convert a tool-like object to an Anthropic tool definition.

    Args:
        tool: A tool-like object to convert. Can be an Anthropic tool dict,
            a Pydantic model, a function, or a `BaseTool`.
        strict: If `True`, enables strict schema adherence for the tool.

            !!! note

                Requires Claude Sonnet 4.5 or Opus 4.1.

    Returns:
        An Anthropic tool definition dict.
    """
    # already in Anthropic tool format
    if isinstance(tool, dict) and all(
        k in tool for k in ("name", "description", "input_schema")
    ):
        anthropic_formatted = AnthropicTool(tool)  # type: ignore[misc]
    else:
        oai_formatted = convert_to_openai_tool(tool, strict=strict)["function"]
        anthropic_formatted = AnthropicTool(
            name=oai_formatted["name"],
            input_schema=oai_formatted["parameters"],
        )
        if "description" in oai_formatted:
            anthropic_formatted["description"] = oai_formatted["description"]
        if "strict" in oai_formatted and isinstance(strict, bool):
            anthropic_formatted["strict"] = oai_formatted["strict"]
    return anthropic_formatted


def _tools_in_params(params: dict) -> bool:
    return (
        "tools" in params
        or ("extra_body" in params and params["extra_body"].get("tools"))
        or "mcp_servers" in params
    )


def _thinking_in_params(params: dict) -> bool:
    return params.get("thinking", {}).get("type") == "enabled"


def _documents_in_params(params: dict) -> bool:
    for message in params.get("messages", []):
        if isinstance(message.get("content"), list):
            for block in message["content"]:
                if (
                    isinstance(block, dict)
                    and block.get("type") == "document"
                    and block.get("citations", {}).get("enabled")
                ):
                    return True
    return False


class _AnthropicToolUse(TypedDict):
    type: Literal["tool_use"]
    name: str
    input: dict
    id: str


def _lc_tool_calls_to_anthropic_tool_use_blocks(
    tool_calls: list[ToolCall],
) -> list[_AnthropicToolUse]:
    return [
        _AnthropicToolUse(
            type="tool_use",
            name=tool_call["name"],
            input=tool_call["args"],
            id=cast("str", tool_call["id"]),
        )
        for tool_call in tool_calls
    ]


def _convert_to_anthropic_output_format(schema: dict | type) -> dict[str, Any]:
    """Convert JSON schema, Pydantic model, or `TypedDict` into Claude `output_format`.

    See Claude docs on [structured outputs](https://platform.claude.com/docs/en/build-with-claude/structured-outputs).
    """
    from anthropic import transform_schema

    is_pydantic_class = isinstance(schema, type) and is_basemodel_subclass(schema)
    if is_pydantic_class or isinstance(schema, dict):
        json_schema = transform_schema(schema)
    else:
        # TypedDict
        json_schema = transform_schema(convert_to_json_schema(schema))
    return {"type": "json_schema", "schema": json_schema}


def _make_message_chunk_from_anthropic_event(
    event: anthropic.types.RawMessageStreamEvent,
    *,
    stream_usage: bool = True,
    coerce_content_to_string: bool,
    block_start_event: anthropic.types.RawMessageStreamEvent | None = None,
) -> tuple[AIMessageChunk | None, anthropic.types.RawMessageStreamEvent | None]:
    """Convert Anthropic streaming event to `AIMessageChunk`.

    Args:
        event: Raw streaming event from Anthropic SDK
        stream_usage: Whether to include usage metadata in the output chunks.
        coerce_content_to_string: Whether to convert structured content to plain
            text strings.

            When `True`, only text content is preserved; when `False`, structured
            content like tool calls and citations are maintained.
        block_start_event: Previous content block start event, used for tracking
            tool use blocks and maintaining context across related events.

    Returns:
        Tuple with
            - `AIMessageChunk`: Converted message chunk with appropriate content and
                metadata, or `None` if the event doesn't produce a chunk
            - `RawMessageStreamEvent`: Updated `block_start_event` for tracking content
                blocks across sequential events, or `None` if not applicable

    Note:
        Not all Anthropic events result in message chunks. Events like internal
        state changes return `None` for the message chunk while potentially
        updating the `block_start_event` for context tracking.
    """
    message_chunk: AIMessageChunk | None = None
    # Reference: Anthropic SDK streaming implementation
    # https://github.com/anthropics/anthropic-sdk-python/blob/main/src/anthropic/lib/streaming/_messages.py  # noqa: E501
    if event.type == "message_start" and stream_usage:
        # Capture model name, but don't include usage_metadata yet
        # as it will be properly reported in message_delta with complete info
        if hasattr(event.message, "model"):
            response_metadata: dict[str, Any] = {"model_name": event.message.model}
        else:
            response_metadata = {}

        message_chunk = AIMessageChunk(
            content="" if coerce_content_to_string else [],
            response_metadata=response_metadata,
        )

    elif (
        event.type == "content_block_start"
        and event.content_block is not None
        and (
            "tool_result" in event.content_block.type
            or "tool_use" in event.content_block.type
            or "document" in event.content_block.type
            or "redacted_thinking" in event.content_block.type
        )
    ):
        if coerce_content_to_string:
            warnings.warn("Received unexpected tool content block.", stacklevel=2)

        content_block = event.content_block.model_dump()
        content_block["index"] = event.index
        if event.content_block.type == "tool_use":
            tool_call_chunk = create_tool_call_chunk(
                index=event.index,
                id=event.content_block.id,
                name=event.content_block.name,
                args="",
            )
            tool_call_chunks = [tool_call_chunk]
        else:
            tool_call_chunks = []
        message_chunk = AIMessageChunk(
            content=[content_block],
            tool_call_chunks=tool_call_chunks,
        )
        block_start_event = event

    # Process incremental content updates
    elif event.type == "content_block_delta":
        # Text and citation deltas (incremental text content)
        if event.delta.type in ("text_delta", "citations_delta"):
            if coerce_content_to_string and hasattr(event.delta, "text"):
                text = getattr(event.delta, "text", "")
                message_chunk = AIMessageChunk(content=text)
            else:
                content_block = event.delta.model_dump()
                content_block["index"] = event.index

                # All citation deltas are part of a text block
                content_block["type"] = "text"
                if "citation" in content_block:
                    # Assign citations to a list if present
                    content_block["citations"] = [content_block.pop("citation")]
                message_chunk = AIMessageChunk(content=[content_block])

        # Reasoning
        elif event.delta.type in {"thinking_delta", "signature_delta"}:
            content_block = event.delta.model_dump()
            content_block["index"] = event.index
            content_block["type"] = "thinking"
            message_chunk = AIMessageChunk(content=[content_block])

        # Tool input JSON (streaming tool arguments)
        elif event.delta.type == "input_json_delta":
            content_block = event.delta.model_dump()
            content_block["index"] = event.index
            start_event_block = (
                getattr(block_start_event, "content_block", None)
                if block_start_event
                else None
            )
            if (
                start_event_block is not None
                and getattr(start_event_block, "type", None) == "tool_use"
            ):
                tool_call_chunk = create_tool_call_chunk(
                    index=event.index,
                    id=None,
                    name=None,
                    args=event.delta.partial_json,
                )
                tool_call_chunks = [tool_call_chunk]
            else:
                tool_call_chunks = []
            message_chunk = AIMessageChunk(
                content=[content_block],
                tool_call_chunks=tool_call_chunks,
            )

    # Process final usage metadata and completion info
    elif event.type == "message_delta" and stream_usage:
        usage_metadata = _create_usage_metadata(event.usage)
        response_metadata = {
            "stop_reason": event.delta.stop_reason,
            "stop_sequence": event.delta.stop_sequence,
        }
        if context_management := getattr(event, "context_management", None):
            response_metadata["context_management"] = context_management.model_dump()
        message_chunk = AIMessageChunk(
            content="" if coerce_content_to_string else [],
            usage_metadata=usage_metadata,
            response_metadata=response_metadata,
        )
        if message_chunk.response_metadata.get("stop_reason"):
            # Mark final Anthropic stream chunk
            message_chunk.chunk_position = "last"
    # Unhandled event types (e.g., `content_block_stop`, `ping` events)
    # https://platform.claude.com/docs/en/build-with-claude/streaming#other-events
    else:
        pass

    if message_chunk:
        message_chunk.response_metadata["model_provider"] = "anthropic"
    return message_chunk, block_start_event


def _create_usage_metadata(anthropic_usage: BaseModel) -> UsageMetadata:
    """Create LangChain `UsageMetadata` from Anthropic `Usage` data.

    Note:
        Anthropic's `input_tokens` excludes cached tokens, so we manually add
        `cache_read` and `cache_creation` tokens to get the true total.
    """
    input_token_details: dict = {
        "cache_read": getattr(anthropic_usage, "cache_read_input_tokens", None),
        "cache_creation": getattr(anthropic_usage, "cache_creation_input_tokens", None),
    }

    # Add cache TTL information if provided (5-minute and 1-hour ephemeral cache)
    cache_creation = getattr(anthropic_usage, "cache_creation", None)

    # Currently just copying over the 5m and 1h keys, but if more are added in the
    # future we'll need to expand this tuple
    cache_creation_keys = ("ephemeral_5m_input_tokens", "ephemeral_1h_input_tokens")
    if cache_creation:
        if isinstance(cache_creation, BaseModel):
            cache_creation = cache_creation.model_dump()
        for k in cache_creation_keys:
            input_token_details[k] = cache_creation.get(k)

    # Calculate total input tokens: Anthropic's `input_tokens` excludes cached tokens,
    # so we need to add them back to get the true total input token count
    input_tokens = (
        (getattr(anthropic_usage, "input_tokens", 0) or 0)  # Base input tokens
        + (input_token_details["cache_read"] or 0)  # Tokens read from cache
        + (input_token_details["cache_creation"] or 0)  # Tokens used to create cache
    )
    output_tokens = getattr(anthropic_usage, "output_tokens", 0) or 0

    return UsageMetadata(
        input_tokens=input_tokens,
        output_tokens=output_tokens,
        total_tokens=input_tokens + output_tokens,
        input_token_details=InputTokenDetails(
            **{k: v for k, v in input_token_details.items() if v is not None},
        ),
    )