langchain/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py

"""Integration tests for v1 chat models.

This module provides comprehensive integration tests for the new messages and standard
content block system introduced in ``langchain_core.v1.messages`` and
``langchain_core.messages.content_blocks``.
"""

import base64
import json
from typing import Annotated, Any, Literal, Optional, TypedDict, Union, cast
from unittest.mock import MagicMock

import httpx
import langchain_core.messages.content_blocks as types
import pytest
from langchain_core.callbacks import BaseCallbackHandler
from langchain_core.language_models.fake_chat_models import GenericFakeChatModel
from langchain_core.messages.content_blocks import (
    AudioContentBlock,
    Citation,
    CodeInterpreterCall,
    CodeInterpreterOutput,
    CodeInterpreterResult,
    FileContentBlock,
    ImageContentBlock,
    InvalidToolCall,
    NonStandardContentBlock,
    PlainTextContentBlock,
    ReasoningContentBlock,
    TextContentBlock,
    ToolCall,
    ToolCallChunk,
    VideoContentBlock,
    WebSearchCall,
    WebSearchResult,
    create_audio_block,
    create_file_block,
    create_image_block,
    create_non_standard_block,
    create_plaintext_block,
    create_text_block,
    create_tool_call,
    is_reasoning_block,
    is_text_block,
    is_tool_call_block,
)
from langchain_core.output_parsers.string import StrOutputParser
from langchain_core.prompts.chat import ChatPromptTemplate
from langchain_core.tools import tool
from langchain_core.tools.base import BaseTool
from langchain_core.utils.function_calling import (
    convert_to_json_schema,
    tool_example_to_messages,
)
from langchain_core.v1.chat_models import BaseChatModel
from langchain_core.v1.messages import (
    AIMessage,
    AIMessageChunk,
    HumanMessage,
    SystemMessage,
    ToolMessage,
)
from pydantic import BaseModel, Field
from pytest_benchmark.fixture import BenchmarkFixture  # type: ignore[import-untyped]
from vcr.cassette import Cassette

from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1Tests

# Content block type definitions for testing
ContentBlock = Union[
    TextContentBlock,
    ImageContentBlock,
    VideoContentBlock,
    AudioContentBlock,
    PlainTextContentBlock,
    FileContentBlock,
    ReasoningContentBlock,
    NonStandardContentBlock,
    ToolCall,
    InvalidToolCall,
    ToolCallChunk,
    WebSearchCall,
    WebSearchResult,
    Citation,
    CodeInterpreterCall,
    CodeInterpreterOutput,
    CodeInterpreterResult,
]


def _get_joke_class(
    schema_type: Literal["pydantic", "typeddict", "json_schema"],
) -> Any:
    """:private:"""

    class Joke(BaseModel):
        """Joke to tell user."""

        setup: str = Field(description="question to set up a joke")
        punchline: str = Field(description="answer to resolve the joke")

    def validate_joke(result: Any) -> bool:
        return isinstance(result, Joke)

    class JokeDict(TypedDict):
        """Joke to tell user."""

        setup: Annotated[str, ..., "question to set up a joke"]
        punchline: Annotated[str, ..., "answer to resolve the joke"]

    def validate_joke_dict(result: Any) -> bool:
        return all(key in ["setup", "punchline"] for key in result)

    if schema_type == "pydantic":
        return Joke, validate_joke

    if schema_type == "typeddict":
        return JokeDict, validate_joke_dict

    if schema_type == "json_schema":
        return Joke.model_json_schema(), validate_joke_dict
    msg = "Invalid schema type"
    raise ValueError(msg)


class _TestCallbackHandler(BaseCallbackHandler):
    options: list[Optional[dict]]

    def __init__(self) -> None:
        super().__init__()
        self.options = []

    def on_chat_model_start(
        self,
        serialized: Any,
        messages: Any,
        *,
        options: Optional[dict[str, Any]] = None,
        **kwargs: Any,
    ) -> None:
        self.options.append(options)


class _MagicFunctionSchema(BaseModel):
    input: int = Field(..., gt=-1000, lt=1000)


@tool(args_schema=_MagicFunctionSchema)
def magic_function(_input: int) -> int:
    """Applies a magic function to an input."""
    return _input + 2


@tool
def magic_function_no_args() -> int:
    """Calculates a magic function."""
    return 5


def _validate_tool_call_message(message: AIMessage) -> None:
    """Validate that a message contains tool calls in content blocks format."""

    if isinstance(message.content, list):
        tool_call_blocks = [
            block
            for block in message.content
            if isinstance(block, dict) and is_tool_call_block(block)
        ]
        assert len(tool_call_blocks) >= 1

        for tool_call in tool_call_blocks:
            # Ensure each tool call has the required fields
            assert "name" in tool_call
            assert "args" in tool_call
            assert "id" in tool_call
    # (No fallback, since the tools attribute makes the same search as the list
    # comprehension above)


def _validate_tool_call_message_no_args(message: AIMessage) -> None:
    """Validate that a message contains a single tool call with no arguments.

    Used for testing tool calls without arguments, such as
    ``magic_function_no_args``.
    """
    assert len(message.tool_calls) == 1
    tool_call = message.tool_calls[0]
    assert tool_call["name"] == "magic_function_no_args"
    assert tool_call["args"] == {}
    assert tool_call["id"] is not None


@tool
def unicode_customer(customer_name: str, description: str) -> str:
    """Tool for creating a customer with a name containing Unicode characters.

    Args:
        customer_name: The customer's name in their native language.
        description: Description of the customer.

    Returns:
        A confirmation message about the customer creation.
    """
    return f"Created customer: {customer_name} - {description}"


class ChatModelV1IntegrationTests(ChatModelV1Tests):
    """Base class for v1 chat model integration tests.

    TODO: verify this entire docstring!

    Test subclasses must implement the ``chat_model_class`` and
    ``chat_model_params`` properties to specify what model to test and its
    initialization parameters.

    Example:

    .. code-block:: python

        from typing import Type

        from langchain_tests.integration_tests import ChatModelV1IntegrationTests
        from my_package.chat_models import MyChatModel


        class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
            @property
            def chat_model_class(self) -> Type[MyV1ChatModel]:
                # Return the chat model class to test here
                return MyChatModel

            @property
            def chat_model_params(self) -> dict:
                # Return initialization parameters for the v1 model.
                return {"model": "model-001", "temperature": 0}

    .. note::
          API references for individual test methods include troubleshooting tips.


    Test subclasses **must** implement the following two properties:

    chat_model_class
        The chat model class to test, e.g., ``ChatParrotLinkV1``.

        Example:

        .. code-block:: python

            @property
            def chat_model_class(self) -> Type[ChatParrotLinkV1]:
                return ChatParrotLinkV1

    chat_model_params
        Initialization parameters for the chat model.

        Example:

        .. code-block:: python

            @property
            def chat_model_params(self) -> dict:
                return {"model": "bird-brain-001", "temperature": 0}

    In addition, test subclasses can control what features are tested (such as tool
    calling or multi-modality) by selectively overriding the following properties.
    Expand to see details:

    .. dropdown:: has_tool_calling

        TODO

    .. dropdown:: tool_choice_value

        TODO

    .. dropdown:: has_tool_choice

        TODO

    .. dropdown:: has_structured_output

        TODO

    .. dropdown:: structured_output_kwargs

        TODO

    .. dropdown:: supports_json_mode

        TODO

    .. dropdown:: returns_usage_metadata

        TODO

    .. dropdown:: supports_anthropic_inputs

        TODO

    .. dropdown:: supports_image_tool_message

        TODO

    .. dropdown:: supported_usage_metadata_details

        TODO

    .. dropdown:: enable_vcr_tests

        Property controlling whether to enable select tests that rely on
        `VCR <https://vcrpy.readthedocs.io/en/latest/>`_ caching of HTTP calls, such
        as benchmarking tests.

        To enable these tests, follow these steps:

        1. Override the ``enable_vcr_tests`` property to return ``True``:

            .. code-block:: python

                @property
                def enable_vcr_tests(self) -> bool:
                    return True

        2. Configure VCR to exclude sensitive headers and other information from cassettes.

            .. important::
                VCR will by default record authentication headers and other sensitive
                information in cassettes. Read below for how to configure what
                information is recorded in cassettes.

            To add configuration to VCR, add a ``conftest.py`` file to the ``tests/``
            directory and implement the ``vcr_config`` fixture there.

            ``langchain-tests`` excludes the headers ``'authorization'``,
            ``'x-api-key'``, and ``'api-key'`` from VCR cassettes. To pick up this
            configuration, you will need to add ``conftest.py`` as shown below. You can
            also exclude additional headers, override the default exclusions, or apply
            other customizations to the VCR configuration. See example below:

            .. code-block:: python
                :caption: tests/conftest.py

                import pytest
                from langchain_tests.conftest import _base_vcr_config as _base_vcr_config

                _EXTRA_HEADERS = [
                    # Specify additional headers to redact
                    ("user-agent", "PLACEHOLDER"),
                ]


                def remove_response_headers(response: dict) -> dict:
                    # If desired, remove or modify headers in the response.
                    response["headers"] = {}
                    return response


                @pytest.fixture(scope="session")
                def vcr_config(_base_vcr_config: dict) -> dict:  # noqa: F811
                    \"\"\"Extend the default configuration from langchain_tests.\"\"\"
                    config = _base_vcr_config.copy()
                    config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
                    config["before_record_response"] = remove_response_headers

                    return config

            .. dropdown:: Compressing cassettes

                ``langchain-tests`` includes a custom VCR serializer that compresses
                cassettes using gzip. To use it, register the ``yaml.gz`` serializer
                to your VCR fixture and enable this serializer in the config. See
                example below:

                .. code-block:: python
                    :caption: tests/conftest.py

                    import pytest
                    from langchain_tests.conftest import CustomPersister, CustomSerializer
                    from langchain_tests.conftest import _base_vcr_config as _base_vcr_config
                    from vcr import VCR

                    _EXTRA_HEADERS = [
                        # Specify additional headers to redact
                        ("user-agent", "PLACEHOLDER"),
                    ]


                    def remove_response_headers(response: dict) -> dict:
                        # If desired, remove or modify headers in the response.
                        response["headers"] = {}
                        return response


                    @pytest.fixture(scope="session")
                    def vcr_config(_base_vcr_config: dict) -> dict:  # noqa: F811
                        \"\"\"Extend the default configuration from langchain_tests.\"\"\"
                        config = _base_vcr_config.copy()
                        config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
                        config["before_record_response"] = remove_response_headers
                        # New: enable serializer and set file extension
                        config["serializer"] = "yaml.gz"
                        config["path_transformer"] = VCR.ensure_suffix(".yaml.gz")

                        return config


                    def pytest_recording_configure(config: dict, vcr: VCR) -> None:
                        vcr.register_persister(CustomPersister())
                        vcr.register_serializer("yaml.gz", CustomSerializer())


                You can inspect the contents of the compressed cassettes (e.g., to
                ensure no sensitive information is recorded) using

                .. code-block:: bash

                    gunzip -k /path/to/tests/cassettes/TestClass_test.yaml.gz

                or by using the serializer:

                .. code-block:: python

                    from langchain_tests.conftest import CustomPersister, CustomSerializer

                    cassette_path = "/path/to/tests/cassettes/TestClass_test.yaml.gz"
                    requests, responses = CustomPersister().load_cassette(path, CustomSerializer())

        3. Run tests to generate VCR cassettes.

            Example:

            .. code-block:: bash

                uv run python -m pytest tests/integration_tests/test_chat_models.py::TestMyModel::test_stream_time

            This will generate a VCR cassette for the test in
            ``tests/integration_tests/cassettes/``.

            .. important::
                You should inspect the generated cassette to ensure that it does not
                contain sensitive information. If it does, you can modify the
                ``vcr_config`` fixture to exclude headers or modify the response
                before it is recorded.

            You can then commit the cassette to your repository. Subsequent test runs
            will use the cassette instead of making HTTP calls.

    """  # noqa: E501

    @property
    def standard_chat_model_params(self) -> dict:
        """:private:"""
        return {}

    def test_invoke(self, model: BaseChatModel) -> None:
        """Test to verify that ``model.invoke(simple_message)`` works.

        A model should be able to produce a non-empty ``AIMessage`` in response to
        ``"Hello"``. The message should at least contain a ``TextContentBlock`` with
        text populated.

        .. important::
            This should pass for all integrations!

        .. dropdown:: Troubleshooting

            TODO

        """
        result = model.invoke("Hello")
        assert result is not None
        assert isinstance(result, AIMessage)
        assert result.text

    async def test_ainvoke(self, model: BaseChatModel) -> None:
        """Test to verify that ``await model.ainvoke(simple_message)`` works.

        A model should be able to produce a non-empty ``AIMessage`` in response to
        ``"Hello"``. The message should at least contain a ``TextContentBlock`` with
        text populated.

        .. important::
            This should pass for all integrations!

        Passing this test does not indicate a "natively async" implementation, but
        rather that the model can be used in an async context.

        .. dropdown:: Troubleshooting

            First, debug
            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`.
            because ``ainvoke`` has a default implementation that calls ``invoke`` in an
            async context.

        """
        result = await model.ainvoke("Hello")
        assert result is not None
        assert isinstance(result, AIMessage)
        assert result.text

    def test_stream(self, model: BaseChatModel) -> None:
        """Test to verify that ``model.stream(simple_message)`` works.

        .. important::
            This should pass for all integrations!

        Passing this test does not indicate a "streaming" implementation, but rather
        that the model can be used in a streaming context. For instance, a model
        that yields at least one chunk in response to ``"Hello"``.

        .. dropdown:: Troubleshooting

            First, debug
            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`.
            because ``stream`` has a default implementation that calls ``invoke`` and
            yields the result as a single chunk.

        """
        num_chunks = 0
        for chunk in model.stream("Hello"):
            assert chunk is not None
            assert isinstance(chunk, AIMessageChunk)
            assert isinstance(chunk.content, list)
            num_chunks += 1
        assert num_chunks > 0

    async def test_astream(self, model: BaseChatModel) -> None:
        """Test to verify that ``await model.astream(simple_message)`` works.

        .. important::
            This should pass for all integrations!

        Passing this test does not indicate a "natively async" or "streaming"
        implementation, but rather that the model can be used in an async streaming
        context.

        .. dropdown:: Troubleshooting

            First, debug
            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_stream`.
            and
            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_ainvoke`.
            because ``astream`` has a default implementation that calls ``_stream`` in
            an async context if it is implemented, or ``ainvoke`` and yields the result
            as a single ``AIMessageChunk`` chunk if not.

        """
        num_chunks = 0
        async for chunk in model.astream("Hello"):
            assert chunk is not None
            assert isinstance(chunk, AIMessageChunk)
            assert isinstance(chunk.content, list)
            num_chunks += 1
        assert num_chunks > 0

    def test_batch(self, model: BaseChatModel) -> None:
        """Test to verify that ``model.batch([messages])`` works.

        .. important::
            This should pass for all integrations!

        Tests the model's ability to process multiple prompts in a single batch. We
        expect that the ``TextContentBlock`` of each response is populated with text.

        Passing this test does not indicate a "natively batching" or "batching"
        implementation, but rather that the model can be used in a batching context. For
        instance, your model may internally call ``invoke`` for each message in the
        batch, even if the model provider does not support batching natively.

        .. dropdown:: Troubleshooting

            First, debug
            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`
            because ``batch`` has a default implementation that calls ``invoke`` for
            each message in the batch.

            If that test passes but not this one, you should make sure your ``batch``
            method does not raise any exceptions, and that it returns a list of valid
            :class:`~langchain_core.v1.messages.AIMessage` objects.

        """
        batch_results = model.batch(["Hello", "Hey"])
        assert batch_results is not None
        assert isinstance(batch_results, list)
        assert len(batch_results) == 2
        for result in batch_results:
            assert result is not None
            assert isinstance(result, AIMessage)
            assert result.text

    async def test_abatch(self, model: BaseChatModel) -> None:
        """Test to verify that ``await model.abatch([messages])`` works.

        .. important::
            This should pass for all integrations!

        Tests the model's ability to process multiple prompts in a single batch
        asynchronously. We expect that the ``TextContentBlock`` of each response is
        populated with text.

        Passing this test does not indicate a "natively batching" or "batching"
        implementation, but rather that the model can be used in a batching context. For
        instance, your model may internally call ``ainvoke`` for each message in the
        batch, even if the model provider does not support batching natively.

        .. dropdown:: Troubleshooting

            First, debug
            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_batch`
            and
            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_ainvoke`
            because ``abatch`` has a default implementation that calls ``ainvoke`` for
            each message in the batch.

            If those tests pass but not this one, you should make sure your ``abatch``
            method does not raise any exceptions, and that it returns a list of valid
            :class:`~langchain_core.v1.messages.AIMessage` objects.

        """
        batch_results = await model.abatch(["Hello", "Hey"])
        assert batch_results is not None
        assert isinstance(batch_results, list)
        assert len(batch_results) == 2
        for result in batch_results:
            assert result is not None
            assert isinstance(result, AIMessage)
            assert result.text

    def test_conversation(self, model: BaseChatModel) -> None:
        """Test to verify that the model can handle multi-turn conversations.

        .. important::
            This should pass for all integrations!

        Tests the model's ability to process a sequence of alternating human and AI
        messages as context for generating the next response. We expect that the
        ``TextContentBlock`` of each response is populated with text.

        .. dropdown:: Troubleshooting

            First, debug
            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`
            because this test also uses ``model.invoke()``.

            If that test passes but not this one, you should verify that:
            1. Your model correctly processes the message history
            2. The model maintains appropriate context from previous messages
            3. The response is a valid :class:`~langchain_core.v1.messages.AIMessage`

        """
        messages = [
            HumanMessage("hello"),
            AIMessage("hello"),
            HumanMessage("how are you"),
        ]
        result = model.invoke(messages)  # type: ignore[arg-type]
        assert result is not None
        assert isinstance(result, AIMessage)
        assert result.text

    def test_double_messages_conversation(self, model: BaseChatModel) -> None:
        """Test to verify that the model can handle double-message conversations.

        .. important::
            This should pass for all integrations!

        Tests the model's ability to process a sequence of double-system, double-human,
        and double-ai messages as context for generating the next response. We expect
        that the ``TextContentBlock`` of each response is populated with text.

        .. dropdown:: Troubleshooting

            First, debug
            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`
            because this test also uses ``model.invoke()``.

            Second, debug
            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_conversation`
            because this test is the "basic case" without double messages.

            If that test passes those but not this one, you should verify that:
            1. Your model API can handle double messages, or the integration should merge messages before sending them to the API.
            2. The response is a valid :class:`~langchain_core.v1.messages.AIMessage`

        """  # noqa: E501
        messages = [
            SystemMessage("hello"),
            SystemMessage("hello"),
            HumanMessage("hello"),
            HumanMessage("hello"),
            AIMessage("hello"),
            AIMessage("hello"),
            HumanMessage("how are you"),
        ]
        result = model.invoke(messages)  # type: ignore[arg-type]
        assert result is not None
        assert isinstance(result, AIMessage)
        assert result.text

    def test_usage_metadata(self, model: BaseChatModel) -> None:
        """Test to verify that the model returns correct usage metadata.

        This test is optional and should be skipped if the model does not return
        usage metadata (see Configuration below).

        .. versionchanged:: 0.3.17

            Additionally check for the presence of ``model_name`` in the response
            metadata, which is needed for usage tracking in callback handlers.

        .. dropdown:: Configuration

            By default, this test is run.

            To disable this feature, set the ``returns_usage_metadata`` property to
            ``False`` in your test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def returns_usage_metadata(self) -> bool:
                        return False

            This test can also check the format of specific kinds of usage metadata
            based on the ``supported_usage_metadata_details`` property. This property
            should be configured as follows with the types of tokens that the model
            supports tracking:

            TODO: check this!

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def supported_usage_metadata_details(self) -> dict:
                        return {
                            "invoke": [
                                "audio_input",
                                "audio_output",
                                "reasoning_output",
                                "cache_read_input",
                                "cache_creation_input",
                            ],
                            "stream": [
                                "audio_input",
                                "audio_output",
                                "reasoning_output",
                                "cache_read_input",
                                "cache_creation_input",
                            ],
                        }


        .. dropdown:: Troubleshooting

            TODO

        """
        if not self.returns_usage_metadata:
            pytest.skip("Not implemented.")

        result = model.invoke("Hello")
        assert result is not None
        assert isinstance(result, AIMessage)

        assert result.usage_metadata is not None
        assert isinstance(result.usage_metadata["input_tokens"], int)
        assert isinstance(result.usage_metadata["output_tokens"], int)
        assert isinstance(result.usage_metadata["total_tokens"], int)

        # Check model_name is in response_metadata
        # (Needed for langchain_core.callbacks.usage)
        model_name = result.response_metadata.get("model_name")
        assert isinstance(model_name, str)
        assert model_name != "", "model_name is empty"

        # TODO: check these
        # `input_tokens` is the total, possibly including other unclassified or
        # system-level tokens.
        if "audio_input" in self.supported_usage_metadata_details["invoke"]:
            # Checks if the specific chat model integration being tested has declared
            # that it supports reporting token counts specifically for `audio_input`
            msg = self.invoke_with_audio_input()  # To be implemented in test subclass
            assert (usage_metadata := msg.usage_metadata) is not None
            assert (
                input_token_details := usage_metadata.get("input_token_details")
            ) is not None
            assert isinstance(input_token_details.get("audio"), int)
            # Asserts that total input tokens are at least the sum of the token counts
            total_detailed_tokens = sum(
                v for v in input_token_details.values() if isinstance(v, int)
            )
            assert usage_metadata.get("input_tokens", 0) >= total_detailed_tokens
        if "audio_output" in self.supported_usage_metadata_details["invoke"]:
            msg = self.invoke_with_audio_output()
            assert (usage_metadata := msg.usage_metadata) is not None
            assert (
                output_token_details := usage_metadata.get("output_token_details")
            ) is not None
            assert isinstance(output_token_details.get("audio"), int)
            # Asserts that total output tokens are at least the sum of the token counts
            total_detailed_tokens = sum(
                v for v in output_token_details.values() if isinstance(v, int)
            )
            assert usage_metadata.get("output_tokens", 0) >= total_detailed_tokens
        if "reasoning_output" in self.supported_usage_metadata_details["invoke"]:
            msg = self.invoke_with_reasoning_output()
            assert (usage_metadata := msg.usage_metadata) is not None
            assert (
                output_token_details := usage_metadata.get("output_token_details")
            ) is not None
            assert isinstance(output_token_details.get("reasoning"), int)
            # Asserts that total output tokens are at least the sum of the token counts
            total_detailed_tokens = sum(
                v for v in output_token_details.values() if isinstance(v, int)
            )
            assert usage_metadata.get("output_tokens", 0) >= total_detailed_tokens
        if "cache_read_input" in self.supported_usage_metadata_details["invoke"]:
            msg = self.invoke_with_cache_read_input()
            assert (usage_metadata := msg.usage_metadata) is not None
            assert (
                input_token_details := usage_metadata.get("input_token_details")
            ) is not None
            assert isinstance(input_token_details.get("cache_read"), int)
            # Asserts that total input tokens are at least the sum of the token counts
            total_detailed_tokens = sum(
                v for v in input_token_details.values() if isinstance(v, int)
            )
            assert usage_metadata.get("input_tokens", 0) >= total_detailed_tokens
        if "cache_creation_input" in self.supported_usage_metadata_details["invoke"]:
            msg = self.invoke_with_cache_creation_input()
            assert (usage_metadata := msg.usage_metadata) is not None
            assert (
                input_token_details := usage_metadata.get("input_token_details")
            ) is not None
            assert isinstance(input_token_details.get("cache_creation"), int)
            # Asserts that total input tokens are at least the sum of the token counts
            total_detailed_tokens = sum(
                v for v in input_token_details.values() if isinstance(v, int)
            )
            assert usage_metadata.get("input_tokens", 0) >= total_detailed_tokens

    def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
        """Test usage metadata in streaming mode.

        Test to verify that the model returns correct usage metadata in streaming mode.

        .. versionchanged:: 0.3.17

            Additionally check for the presence of ``model_name`` in the response
            metadata, which is needed for usage tracking in callback handlers.

        .. dropdown:: Configuration

            By default, this test is run.
            To disable this feature, set ``returns_usage_metadata`` to ``False`` in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def returns_usage_metadata(self) -> bool:
                        return False

            This test can also check the format of specific kinds of usage metadata
            based on the ``supported_usage_metadata_details`` property. This property
            should be configured as follows with the types of tokens that the model
            supports tracking:

            TODO: check this!

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def supported_usage_metadata_details(self) -> dict:
                        return {
                            "invoke": [
                                "audio_input",
                                "audio_output",
                                "reasoning_output",
                                "cache_read_input",
                                "cache_creation_input",
                            ],
                            "stream": [
                                "audio_input",
                                "audio_output",
                                "reasoning_output",
                                "cache_read_input",
                                "cache_creation_input",
                            ],
                        }

        .. dropdown:: Troubleshooting

            TODO

        """
        if not self.returns_usage_metadata:
            pytest.skip("Not implemented.")

        full: Optional[AIMessageChunk] = None
        for chunk in model.stream("Write me 2 haikus. Only include the haikus."):
            assert isinstance(chunk, AIMessageChunk)
            # Only one chunk is allowed to set usage_metadata.input_tokens
            # if multiple do, it's likely a bug that will result in overcounting
            # input tokens (since the total number of input tokens applies to the full
            # generation, not individual chunks)
            if full and full.usage_metadata and full.usage_metadata["input_tokens"]:
                assert (
                    not chunk.usage_metadata or not chunk.usage_metadata["input_tokens"]
                ), (
                    "Only one chunk should set input_tokens,"
                    " the rest should be 0 or None"
                )
            full = chunk if full is None else cast("AIMessageChunk", full + chunk)

        assert isinstance(full, AIMessageChunk)
        assert full.usage_metadata is not None
        assert isinstance(full.usage_metadata["input_tokens"], int)
        assert isinstance(full.usage_metadata["output_tokens"], int)
        assert isinstance(full.usage_metadata["total_tokens"], int)

        # Check model_name is in response_metadata
        # (Needed for langchain_core.callbacks.usage)
        model_name = full.response_metadata.get("model_name")
        assert isinstance(model_name, str)
        assert model_name != "", "model_name is empty"

        # TODO: check these
        if "audio_input" in self.supported_usage_metadata_details["stream"]:
            msg = self.invoke_with_audio_input(stream=True)
            assert msg.usage_metadata is not None
            assert isinstance(
                msg.usage_metadata.get("input_token_details", {}).get("audio"), int
            )
        if "audio_output" in self.supported_usage_metadata_details["stream"]:
            msg = self.invoke_with_audio_output(stream=True)
            assert msg.usage_metadata is not None
            assert isinstance(
                msg.usage_metadata.get("output_token_details", {}).get("audio"), int
            )
        if "reasoning_output" in self.supported_usage_metadata_details["stream"]:
            msg = self.invoke_with_reasoning_output(stream=True)
            assert msg.usage_metadata is not None
            assert isinstance(
                msg.usage_metadata.get("output_token_details", {}).get("reasoning"), int
            )
        if "cache_read_input" in self.supported_usage_metadata_details["stream"]:
            msg = self.invoke_with_cache_read_input(stream=True)
            assert msg.usage_metadata is not None
            assert isinstance(
                msg.usage_metadata.get("input_token_details", {}).get("cache_read"), int
            )
        if "cache_creation_input" in self.supported_usage_metadata_details["stream"]:
            msg = self.invoke_with_cache_creation_input(stream=True)
            assert msg.usage_metadata is not None
            assert isinstance(
                msg.usage_metadata.get("input_token_details", {}).get("cache_creation"),
                int,
            )

    def test_stop_sequence(self, model: BaseChatModel) -> None:
        """Test that model does not fail when invoked with the ``stop`` parameter,
        which is a standard parameter for stopping generation at a certain token.

        `More on standard parameters <https://python.langchain.com/docs/concepts/chat_models/#standard-parameters>`__

        .. important::
            This should pass for all integrations!

        .. dropdown:: Troubleshooting

            TODO

        """
        result = model.invoke("hi", stop=["you"])
        assert isinstance(result, AIMessage)

        custom_model = self.chat_model_class(
            **{
                **self.chat_model_params,
                "stop": ["you"],
            }
        )
        result = custom_model.invoke("hi")
        assert isinstance(result, AIMessage)

    def test_tool_calling(self, model: BaseChatModel) -> None:
        """Test that the model generates tool calls. This test is skipped if the
        ``has_tool_calling`` property on the test class is set to False.

        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).

        .. dropdown:: Configuration

            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            If this test fails, check that ``bind_tools`` is implemented to correctly
            translate LangChain tool objects into the appropriate schema for your
            chat model.

            This test may fail if the chat model does not support a ``tool_choice``
            parameter. This parameter can be used to force a tool call. If
            ``tool_choice`` is not supported and the model consistently fails this
            test, you can ``xfail`` the test:

            .. code-block:: python

                @pytest.mark.xfail(reason=("Does not support tool_choice."))
                def test_tool_calling(self, model: BaseChatModelV1) -> None:
                    super().test_tool_calling(model)

            Otherwise, in the case that only one tool is bound, ensure that
            ``tool_choice`` supports the string ``'any'`` to force calling that tool.

        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")

        tool_choice_value = None if not self.has_tool_choice else "any"

        model_with_tools = model.bind_tools(
            [magic_function], tool_choice=tool_choice_value
        )
        query = "What is the value of magic_function(3)? Use the tool."
        result = model_with_tools.invoke(query)
        _validate_tool_call_message(result)

        # Test stream()
        full: Optional[AIMessageChunk] = None
        for chunk in model_with_tools.stream(query):
            full = chunk if full is None else full + chunk  # type: ignore[assignment]
        assert isinstance(full, AIMessage)
        _validate_tool_call_message(full)

    async def test_tool_calling_async(self, model: BaseChatModel) -> None:
        """Test that the model generates tool calls. This test is skipped if the
        ``has_tool_calling`` property on the test class is set to False.

        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).

        .. dropdown:: Configuration

            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            If this test fails, check that ``bind_tools`` is implemented to correctly
            translate LangChain tool objects into the appropriate schema for your
            chat model.

            This test may fail if the chat model does not support a ``tool_choice``
            parameter. This parameter can be used to force a tool call. If
            ``tool_choice`` is not supported and the model consistently fails this
            test, you can ``xfail`` the test:

            .. code-block:: python

                @pytest.mark.xfail(reason=("Does not support tool_choice."))
                async def test_tool_calling_async(self, model: BaseChatModelV1) -> None:
                    await super().test_tool_calling_async(model)

            Otherwise, in the case that only one tool is bound, ensure that
            ``tool_choice`` supports the string ``'any'`` to force calling that tool.

        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")

        tool_choice_value = None if not self.has_tool_choice else "any"
        model_with_tools = model.bind_tools(
            [magic_function], tool_choice=tool_choice_value
        )
        query = "What is the value of magic_function(3)? Use the tool."
        result = await model_with_tools.ainvoke(query)
        _validate_tool_call_message(result)

        # Test astream()
        full: Optional[AIMessageChunk] = None
        async for chunk in model_with_tools.astream(query):
            full = chunk if full is None else full + chunk  # type: ignore[assignment]
        assert isinstance(full, AIMessage)
        _validate_tool_call_message(full)

    def test_bind_runnables_as_tools(self, model: BaseChatModel) -> None:
        """Test that the model generates tool calls for tools that are derived from
        LangChain runnables. This test is skipped if the ``has_tool_calling`` property
        on the test class is set to False.

        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).

        .. dropdown:: Configuration

            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            If this test fails, check that ``bind_tools`` is implemented to correctly
            translate LangChain tool objects into the appropriate schema for your
            chat model.

            This test may fail if the chat model does not support a ``tool_choice``
            parameter. This parameter can be used to force a tool call. If
            ``tool_choice`` is not supported and the model consistently fails this
            test, you can ``xfail`` the test:

            .. code-block:: python

                @pytest.mark.xfail(reason=("Does not support tool_choice."))
                def test_bind_runnables_as_tools(self, model: BaseChatModelV1) -> None:
                    super().test_bind_runnables_as_tools(model)

            Otherwise, ensure that the ``tool_choice_value`` property is correctly
            specified on the test class.

        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")

        prompt = ChatPromptTemplate.from_messages(
            [("human", "Hello. Please respond in the style of {answer_style}.")]
        )
        llm = GenericFakeChatModel(messages=iter(["hello matey"]))
        chain = prompt | llm | StrOutputParser()
        tool_ = chain.as_tool(
            name="greeting_generator",
            description="Generate a greeting in a particular style of speaking.",
        )

        if self.has_tool_choice:
            tool_choice: Optional[str] = "any"
        else:
            tool_choice = None

        model_with_tools = model.bind_tools([tool_], tool_choice=tool_choice)
        query = "Using the tool, generate a Pirate greeting."
        result = model_with_tools.invoke(query)
        assert isinstance(result, AIMessage)
        assert result.tool_calls
        tool_call = result.tool_calls[0]
        assert tool_call["args"].get(
            "answer_style"
        )  # TODO: do we need to handle if args is str?  # noqa: E501
        assert is_tool_call_block(tool_call)

    def test_tool_message_histories_string_content(
        self, model: BaseChatModel, my_adder_tool: BaseTool
    ) -> None:
        """Test that message histories are compatible with string tool contents
        (e.g. OpenAI format). If a model passes this test, it should be compatible
        with messages generated from providers following OpenAI format.

        This test should be skipped if the model does not support tool calling
        (see Configuration below).

        .. dropdown:: Configuration

            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            TODO: verify this!

            If this test fails, check that:

            1. The model can correctly handle message histories that include ``AIMessage`` objects with ``""`` ``TextContentBlock``s.
            2. The ``tool_calls`` attribute on ``AIMessage`` objects is correctly handled and passed to the model in an appropriate format.
            3. The model can correctly handle ``ToolMessage`` objects with string content and arbitrary string values for ``tool_call_id``.

            You can ``xfail`` the test if tool calling is implemented but this format
            is not supported.

            .. code-block:: python

                @pytest.mark.xfail(reason=("Not implemented."))
                def test_tool_message_histories_string_content(self, *args: Any) -> None:
                    super().test_tool_message_histories_string_content(*args)

        """  # noqa: E501
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")

        model_with_tools = model.bind_tools([my_adder_tool])
        function_name = "my_adder_tool"
        function_args = {"a": "1", "b": "2"}

        messages_string_content = [
            HumanMessage("What is 1 + 2"),
            # String content (e.g. OpenAI)
            create_tool_call(function_name, function_args, id="abc123"),
            ToolMessage(
                json.dumps({"result": 3}), tool_call_id="abc123", status="success"
            ),
        ]
        result_string_content = model_with_tools.invoke(
            messages_string_content  # type: ignore[arg-type]
        )  # TODO
        assert isinstance(result_string_content, AIMessage)

    def test_tool_message_histories_list_content(
        self,
        model: BaseChatModel,
        my_adder_tool: BaseTool,
    ) -> None:
        """Test that message histories are compatible with list tool contents
        (e.g. Anthropic format).

        These message histories will include AIMessage objects with "tool use" and
        content blocks, e.g.,

        .. code-block:: python

            [
                {"type": "text", "text": "Hmm let me think about that"},
                {
                    "type": "tool_use",
                    "input": {"fav_color": "green"},
                    "id": "foo",
                    "name": "color_picker",
                },
            ]

        This test should be skipped if the model does not support tool calling
        (see Configuration below).

        .. dropdown:: Configuration

            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            If this test fails, check that:

            1. The model can correctly handle message histories that include ``AIMessage`` objects with list content.
            2. The ``tool_calls`` attribute on ``AIMessage`` objects is correctly handled and passed to the model in an appropriate format.
            3. The model can correctly handle ``ToolMessage`` objects with string content and arbitrary string values for ``tool_call_id``.

            You can ``xfail`` the test if tool calling is implemented but this format
            is not supported.

            .. code-block:: python

                @pytest.mark.xfail(reason=("Not implemented."))
                def test_tool_message_histories_list_content(self, *args: Any) -> None:
                    super().test_tool_message_histories_list_content(*args)

        """  # noqa: E501
        pytest.xfail("Test not implemented yet.")

        # TODO
        # if not self.has_tool_calling:
        #     pytest.skip("Test requires tool calling.")

        # model_with_tools = model.bind_tools([my_adder_tool])
        # function_name = "my_adder_tool"
        # function_args = {"a": 1, "b": 2}

        # messages_list_content = [
        #     HumanMessage("What is 1 + 2"),
        #     # List content (e.g., Anthropic)
        #     AIMessage(
        #         [
        #             {"type": "text", "text": "some text"},
        #             {
        #                 "type": "tool_use",
        #                 "id": "abc123",
        #                 "name": function_name,
        #                 "input": function_args,
        #             },
        #         ],
        #         tool_calls=[
        #             {
        #                 "name": function_name,
        #                 "args": function_args,
        #                 "id": "abc123",
        #                 "type": "tool_call",
        #             },
        #         ],
        #     ),
        #     ToolMessage(
        #         json.dumps({"result": 3}),
        #         name=function_name,
        #         tool_call_id="abc123",
        #     ),
        # ]
        # result_list_content = model_with_tools.invoke(messages_list_content)
        # assert isinstance(result_list_content, AIMessage)

    def test_tool_choice(self, model: BaseChatModel) -> None:
        """Test that the model can force tool calling via the ``tool_choice``
        parameter. This test is skipped if the ``has_tool_choice`` property on the
        test class is set to False.

        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).

        .. dropdown:: Configuration

            To disable tool calling tests, set ``has_tool_choice`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def has_tool_choice(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            If this test fails, check whether the ``test_tool_calling`` test is passing.
            If it is not, refer to the troubleshooting steps in that test first.

            If ``test_tool_calling`` is passing, check that the underlying model
            supports forced tool calling. If it does, ``bind_tools`` should accept a
            ``tool_choice`` parameter that can be used to force a tool call.

            It should accept:

            1. The string ``'any'`` to force calling the bound tool, and,
            2. The string name of the tool to force calling that tool.

        """
        if not self.has_tool_choice or not self.has_tool_calling:
            pytest.skip("Test requires tool choice.")

        @tool
        def get_weather(location: str) -> str:
            """Get weather at a location."""
            return "It's sunny."

        for tool_choice in ["any", "magic_function"]:
            model_with_tools = model.bind_tools(
                [magic_function, get_weather], tool_choice=tool_choice
            )
            result = model_with_tools.invoke("Hello!")
            assert isinstance(result, AIMessage)
            assert result.tool_calls
            if tool_choice == "magic_function":
                assert result.tool_calls[0]["name"] == "magic_function"

    def test_tool_calling_with_no_arguments(self, model: BaseChatModel) -> None:
        """Test that the model generates tool calls for tools with no arguments.
        This test is skipped if the ``has_tool_calling`` property on the test class
        is set to False.

        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).

        .. dropdown:: Configuration

            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            If this test fails, check that ``bind_tools`` is implemented to correctly
            translate LangChain tool objects into the appropriate schema for your
            chat model. It should correctly handle the case where a tool has no
            arguments.

            This test may fail if the chat model does not support a ``tool_choice``
            parameter. This parameter can be used to force a tool call. It may also
            fail if a provider does not support this form of tool. In these cases,
            you can ``xfail`` the test:

            .. code-block:: python

                @pytest.mark.xfail(reason=("Does not support tool_choice."))
                def test_tool_calling_with_no_arguments(self, model: BaseChatModelV1) -> None:
                    super().test_tool_calling_with_no_arguments(model)

            Otherwise, in the case that only one tool is bound, ensure that
            ``tool_choice`` supports the string ``'any'`` to force calling that tool.

        """  # noqa: E501
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")

        tool_choice_value = None if not self.has_tool_choice else "any"
        model_with_tools = model.bind_tools(
            [magic_function_no_args], tool_choice=tool_choice_value
        )
        query = "What is the value of magic_function_no_args()? You must use the tool."

        # Invoke
        result = model_with_tools.invoke(query)
        _validate_tool_call_message_no_args(result)

        # Stream
        full: Optional[AIMessageChunk] = None
        for chunk in model_with_tools.stream(query):
            full = chunk if full is None else full + chunk  # type: ignore[assignment]
        assert isinstance(full, AIMessage)
        _validate_tool_call_message_no_args(full)

    def test_tool_message_error_status(
        self, model: BaseChatModel, my_adder_tool: BaseTool
    ) -> None:
        """Test that ``ToolMessage`` with ``status="error"`` can be handled.

        These messages may take the form:

        .. code-block:: python

            ToolMessage(
                content="Error: Missing required argument 'b'.",
                status="error",
            )

        If possible, the ``status`` field should be parsed and passed appropriately
        to the model.

        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).

        .. dropdown:: Configuration

            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            If this test fails, check that the ``status`` field on ``ToolMessage``
            objects is either ignored or passed to the model appropriately.

        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")

        model_with_tools = model.bind_tools([my_adder_tool])
        messages = [
            HumanMessage("What is 1 + 2?"),
            create_tool_call(
                "my_adder_tool", {"a": 1}, id="abc123"
            ),  # Missing required argument 'b'
            ToolMessage(
                "Error: Missing required argument 'b'.",
                tool_call_id="abc123",
                status="error",
            ),
        ]
        result = model_with_tools.invoke(messages)  # type: ignore[arg-type]
        assert isinstance(result, AIMessage)

    def test_structured_few_shot_examples(
        self, model: BaseChatModel, my_adder_tool: BaseTool
    ) -> None:
        """Test that the model can process few-shot examples with tool calls.

        These are represented as a sequence of messages of the following form:

        - ``HumanMessage`` with ``TextContentBlock`` content;
        - ``AIMessage`` with the ``tool_calls`` attribute populated;
        - ``ToolMessage`` with string content;
        - ``ToolMessage`` with content block content;
        - ``AIMessage`` with ``TextContentBlock`` content (an answer);
        - ``HumanMessage`` with ``TextContentBlock`` content (a follow-up question).

        This test should be skipped if the model does not support tool calling
        (see Configuration below).

        .. dropdown:: Configuration

            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            This test uses `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.tool_example_to_messages.html>`__
            in ``langchain_core`` to generate a sequence of messages representing
            "few-shot" examples.

            If this test fails, check that the model can correctly handle this
            sequence of messages.

            You can ``xfail`` the test if tool calling is implemented but this format
            is not supported.

            .. code-block:: python

                @pytest.mark.xfail(reason=("Not implemented."))
                def test_structured_few_shot_examples(self, *args: Any) -> None:
                    super().test_structured_few_shot_examples(*args)

        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")

        model_with_tools = model.bind_tools([my_adder_tool], tool_choice="any")
        function_result = json.dumps({"result": 3})

        tool_schema = my_adder_tool.args_schema
        assert isinstance(tool_schema, type)
        assert issubclass(tool_schema, BaseModel)
        # TODO verify this is correct
        few_shot_messages = tool_example_to_messages(
            "What is 1 + 2",
            [tool_schema(a=1, b=2)],
            tool_outputs=[function_result],
            ai_response=function_result,
        )

        messages = [*few_shot_messages, HumanMessage("What is 3 + 4")]
        result = model_with_tools.invoke(messages)  # type: ignore[arg-type]
        assert isinstance(result, AIMessage)

    @pytest.mark.parametrize("schema_type", ["pydantic", "typeddict", "json_schema"])
    def test_structured_output(self, model: BaseChatModel, schema_type: str) -> None:
        """Test to verify structured output is generated both on ``invoke()`` and ``stream()``.

        This test is optional and should be skipped if the model does not support
        structured output (see Configuration below).

        .. dropdown:: Configuration

            To disable structured output tests, set ``has_structured_output`` to False
            in your test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def has_structured_output(self) -> bool:
                        return False

            By default, ``has_structured_output`` is True if a model overrides the
            ``with_structured_output`` or ``bind_tools`` methods.

        .. dropdown:: Troubleshooting

            If this test fails, ensure that the model's ``bind_tools`` method
            properly handles both JSON Schema and Pydantic V2 models.

            ``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
            that will accommodate most formats.

            See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
            of ``with_structured_output``.

        """  # noqa: E501
        if not self.has_structured_output:
            pytest.skip("Test requires structured output.")

        schema, validation_function = _get_joke_class(schema_type)  # type: ignore[arg-type]
        chat = model.with_structured_output(schema, **self.structured_output_kwargs)
        mock_callback = MagicMock()
        mock_callback.on_chat_model_start = MagicMock()

        invoke_callback = _TestCallbackHandler()

        result = chat.invoke(
            "Tell me a joke about cats.", config={"callbacks": [invoke_callback]}
        )
        validation_function(result)

        assert len(invoke_callback.options) == 1, (
            "Expected on_chat_model_start to be called once"
        )
        assert isinstance(invoke_callback.options[0], dict)
        assert isinstance(
            invoke_callback.options[0]["ls_structured_output_format"]["schema"], dict
        )
        assert invoke_callback.options[0]["ls_structured_output_format"][
            "schema"
        ] == convert_to_json_schema(schema)

        stream_callback = _TestCallbackHandler()

        for chunk in chat.stream(
            "Tell me a joke about cats.", config={"callbacks": [stream_callback]}
        ):
            validation_function(chunk)
        assert chunk

        assert len(stream_callback.options) == 1, (
            "Expected on_chat_model_start to be called once"
        )
        assert isinstance(stream_callback.options[0], dict)
        assert isinstance(
            stream_callback.options[0]["ls_structured_output_format"]["schema"], dict
        )
        assert stream_callback.options[0]["ls_structured_output_format"][
            "schema"
        ] == convert_to_json_schema(schema)

    @pytest.mark.parametrize("schema_type", ["pydantic", "typeddict", "json_schema"])
    async def test_structured_output_async(
        self, model: BaseChatModel, schema_type: str
    ) -> None:
        """Test to verify structured output is generated both on ``invoke()`` and ``stream()``.

        This test is optional and should be skipped if the model does not support
        structured output (see Configuration below).

        .. dropdown:: Configuration

            To disable structured output tests, set ``has_structured_output`` to False
            in your test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def has_structured_output(self) -> bool:
                        return False

            By default, ``has_structured_output`` is True if a model overrides the
            ``with_structured_output`` or ``bind_tools`` methods.

        .. dropdown:: Troubleshooting

            If this test fails, ensure that the model's ``bind_tools`` method
            properly handles both JSON Schema and Pydantic V2 models.

            ``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
            that will accommodate most formats.

            See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
            of ``with_structured_output``.

        """  # noqa: E501
        if not self.has_structured_output:
            pytest.skip("Test requires structured output.")

        schema, validation_function = _get_joke_class(schema_type)  # type: ignore[arg-type]

        chat = model.with_structured_output(schema, **self.structured_output_kwargs)
        ainvoke_callback = _TestCallbackHandler()

        result = await chat.ainvoke(
            "Tell me a joke about cats.", config={"callbacks": [ainvoke_callback]}
        )
        validation_function(result)

        assert len(ainvoke_callback.options) == 1, (
            "Expected on_chat_model_start to be called once"
        )
        assert isinstance(ainvoke_callback.options[0], dict)
        assert isinstance(
            ainvoke_callback.options[0]["ls_structured_output_format"]["schema"], dict
        )
        assert ainvoke_callback.options[0]["ls_structured_output_format"][
            "schema"
        ] == convert_to_json_schema(schema)

        astream_callback = _TestCallbackHandler()

        async for chunk in chat.astream(
            "Tell me a joke about cats.", config={"callbacks": [astream_callback]}
        ):
            validation_function(chunk)
        assert chunk

        assert len(astream_callback.options) == 1, (
            "Expected on_chat_model_start to be called once"
        )

        assert isinstance(astream_callback.options[0], dict)
        assert isinstance(
            astream_callback.options[0]["ls_structured_output_format"]["schema"], dict
        )
        assert astream_callback.options[0]["ls_structured_output_format"][
            "schema"
        ] == convert_to_json_schema(schema)

    def test_structured_output_optional_param(self, model: BaseChatModel) -> None:
        """Test to verify we can generate structured output that includes optional
        parameters.

        This test is optional and should be skipped if the model does not support
        structured output (see Configuration below).

        .. dropdown:: Configuration

            To disable structured output tests, set ``has_structured_output`` to False
            in your test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def has_structured_output(self) -> bool:
                        return False

            By default, ``has_structured_output`` is True if a model overrides the
            ``with_structured_output`` or ``bind_tools`` methods.

        .. dropdown:: Troubleshooting

            If this test fails, ensure that the model's ``bind_tools`` method
            properly handles Pydantic V2 models with optional parameters.

            ``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
            that will accommodate most formats.

            See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
            of ``with_structured_output``.

        """
        if not self.has_structured_output:
            pytest.skip("Test requires structured output.")

        # Pydantic
        class Joke(BaseModel):
            """Joke to tell user."""

            setup: str = Field(description="question to set up a joke")
            punchline: Optional[str] = Field(
                default=None, description="answer to resolve the joke"
            )

        chat = model.with_structured_output(Joke, **self.structured_output_kwargs)
        setup_result = chat.invoke(
            "Give me the setup to a joke about cats, no punchline."
        )
        assert isinstance(setup_result, Joke)

        joke_result = chat.invoke("Give me a joke about cats, include the punchline.")
        assert isinstance(joke_result, Joke)

        # Schema
        chat = model.with_structured_output(
            Joke.model_json_schema(), **self.structured_output_kwargs
        )
        result = chat.invoke("Tell me a joke about cats.")
        assert isinstance(result, dict)

        # TypedDict
        class JokeDict(TypedDict):
            """Joke to tell user."""

            setup: Annotated[str, ..., "question to set up a joke"]
            punchline: Annotated[Optional[str], None, "answer to resolve the joke"]

        chat = model.with_structured_output(JokeDict, **self.structured_output_kwargs)
        result = chat.invoke("Tell me a joke about cats.")
        assert isinstance(result, dict)

    def test_json_mode(self, model: BaseChatModel) -> None:
        """Test structured output via `JSON mode. <https://python.langchain.com/docs/concepts/structured_outputs/#json-mode>`_.

        This test is optional and should be skipped if the model does not support
        the JSON mode feature (see Configuration below).

        .. dropdown:: Configuration

            To disable this test, set ``supports_json_mode`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def supports_json_mode(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
            of ``with_structured_output``.

        """
        if not self.supports_json_mode:
            pytest.skip("Test requires json mode support.")

        from pydantic import BaseModel as BaseModelProper
        from pydantic import Field as FieldProper

        class Joke(BaseModelProper):
            """Joke to tell user."""

            setup: str = FieldProper(description="question to set up a joke")
            punchline: str = FieldProper(description="answer to resolve the joke")

        # Pydantic class
        # Type ignoring since the interface only officially supports pydantic 1
        # or pydantic.v1.BaseModel but not pydantic.BaseModel from pydantic 2.
        # We'll need to do a pass updating the type signatures.
        chat = model.with_structured_output(Joke, method="json_mode")
        msg = (
            "Tell me a joke about cats. Return the result as a JSON with 'setup' and "
            "'punchline' keys. Return nothing other than JSON."
        )
        result = chat.invoke(msg)
        assert isinstance(result, Joke)

        for chunk in chat.stream(msg):
            assert isinstance(chunk, Joke)

        # Schema
        chat = model.with_structured_output(
            Joke.model_json_schema(), method="json_mode"
        )
        result = chat.invoke(msg)
        assert isinstance(result, dict)
        assert set(result.keys()) == {"setup", "punchline"}

        for chunk in chat.stream(msg):
            assert isinstance(chunk, dict)
        assert isinstance(chunk, dict)  # for mypy
        assert set(chunk.keys()) == {"setup", "punchline"}

    def test_pdf_inputs(self, model: BaseChatModel) -> None:
        """Test that the model can process PDF inputs.

        This test should be skipped (see Configuration below) if the model does not
        support PDF inputs. These will take the form:

        .. code-block:: python

            {
                "type": "image",
                "source_type": "base64",
                "data": "<base64 image data>",
                "mime_type": "application/pdf",
            }

        See https://python.langchain.com/docs/concepts/multimodality/

        .. dropdown:: Configuration

            To disable this test, set ``supports_pdf_inputs`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):

                    @property
                    def supports_pdf_inputs(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            If this test fails, check that the model can correctly handle messages
            with pdf content blocks, including base64-encoded files. Otherwise, set
            the ``supports_pdf_inputs`` property to False.

        """
        pytest.xfail("Test not implemented yet.")

        # TODO
        # if not self.supports_pdf_inputs:
        #     pytest.skip("Model does not support PDF inputs.")
        # url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
        # pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8")

        # message = HumanMessage(
        #     [
        #         {
        #             "type": "text",
        #             "text": "Summarize this document:",
        #         },
        #         {
        #             "type": "file",
        #             "source_type": "base64",
        #             "mime_type": "application/pdf",
        #             "data": pdf_data,
        #         },
        #     ]
        # )
        # _ = model.invoke([message])

        # # Test OpenAI Chat Completions format
        # message = HumanMessage(
        #     [
        #         {
        #             "type": "text",
        #             "text": "Summarize this document:",
        #         },
        #         {
        #             "type": "file",
        #             "file": {
        #                 "filename": "test file.pdf",
        #                 "file_data": f"data:application/pdf;base64,{pdf_data}",
        #             },
        #         },
        #     ]
        # )
        # _ = model.invoke([message])

    def test_audio_inputs(self, model: BaseChatModel) -> None:
        """Test that the model can process audio inputs.

        This test should be skipped (see Configuration below) if the model does not
        support audio inputs. These will take the form:

        .. code-block:: python

            # AudioContentBlock
            {
                "type": "audio",
                "base64": "<base64 audio data>",
                "mime_type": "audio/wav",  # or appropriate mime-type
            }

        See https://python.langchain.com/docs/concepts/multimodality/

        .. dropdown:: Configuration

            To disable this test, set ``supports_audio_content_blocks`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):

                    @property
                    def supports_audio_content_blocks(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            If this test fails, check that the model can correctly handle messages
            with audio content blocks. Otherwise, set the ``supports_audio_content_blocks``
            property to False.

        """  # noqa: E501
        if not self.supports_audio_content_blocks:
            pytest.skip("Model does not support AudioContentBlock inputs.")

        url = "https://upload.wikimedia.org/wikipedia/commons/3/3d/Alcal%C3%A1_de_Henares_%28RPS_13-04-2024%29_canto_de_ruise%C3%B1or_%28Luscinia_megarhynchos%29_en_el_Soto_del_Henares.wav"
        audio_data = base64.b64encode(httpx.get(url).content).decode("utf-8")

        message = HumanMessage(
            [
                create_text_block("Describe this audio:"),
                create_audio_block(
                    base64=audio_data,
                    mime_type="audio/wav",
                ),
            ]
        )
        _ = model.invoke([message])

        # TODO?
        # Test OpenAI Chat Completions format
        # message = HumanMessage(
        #     [
        #         {
        #             "type": "text",
        #             "text": "Describe this audio:",
        #         },
        #         {
        #             "type": "input_audio",
        #             "input_audio": {"data": audio_data, "format": "wav"},
        #         },
        #     ]
        # )
        # _ = model.invoke([message])

    def test_image_inputs(self, model: BaseChatModel) -> None:
        """Test that the model can process image inputs.

        This test should be skipped (see Configuration below) if the model does not
        support image inputs. These will take the form:

        .. code-block:: python

            # ImageContentBlock
            {
                "type": "image",
                "base64": "<base64 audio data>",
                "mime_type": "image/png",  # or appropriate mime-type
            }

        TODO: verify this
        For backward-compatibility, we must also support OpenAI-style
        image content blocks:

        .. code-block:: python

            [
                {"type": "text", "text": "describe the weather in this image"},
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
                },
            ]

        See https://python.langchain.com/docs/concepts/multimodality/

        .. dropdown:: Configuration

            To disable this test, set ``supports_image_content_blocks`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def supports_image_content_blocks(self) -> bool:
                        return False

                    # Can also explicitly disable testing image URLs:
                    @property
                    def supports_image_urls(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            If this test fails, check that the model can correctly handle messages
            with image content blocks, including base64-encoded images. Otherwise, set
            the ``supports_image_content_blocks`` property to False.

        """
        if not self.supports_image_content_blocks:
            pytest.skip("Model does not support image message.")

        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
        image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")

        # TODO?
        # OpenAI format, base64 data
        # message = HumanMessage(
        #     content=[
        #         {"type": "text", "text": "describe the weather in this image"},
        #         {
        #             "type": "image_url",
        #             "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
        #         },
        #     ],
        # )
        # _ = model.invoke([message])

        # Standard format, base64 data
        message = HumanMessage(
            [
                create_text_block("describe the weather in this image"),
                create_image_block(
                    base64=image_data,
                    mime_type="image/jpeg",
                ),
            ],
        )
        _ = model.invoke([message])

        # TODO?
        # Standard format, URL
        # if self.supports_image_urls:
        #     message = HumanMessage(
        #         content=[
        #             {"type": "text", "text": "describe the weather in this image"},
        #             {
        #                 "type": "image",
        #                 "source_type": "url",
        #                 "url": image_url,
        #             },
        #         ],
        #     )
        #     _ = model.invoke([message])

    def test_image_tool_message(self, model: BaseChatModel) -> None:
        """Test that the model can process ToolMessages with image inputs.

        TODO: is this needed?

        This test should be skipped if the model does not support messages of the
        form:

        .. code-block:: python

            ToolMessage(
                content=[
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
                    },
                ],
                tool_call_id="1",
            )

        containing image content blocks in OpenAI Chat Completions format, in addition
        to messages of the form:

        .. code-block:: python

            ToolMessage(
                content=[
                    {
                        "type": "image",
                        "source_type": "base64",
                        "data": image_data,
                        "mime_type": "image/jpeg",
                    },
                ],
                tool_call_id="1",
            )

        containing image content blocks in standard format.

        This test can be skipped by setting the ``supports_image_tool_message`` property
        to False (see Configuration below).

        .. dropdown:: Configuration

            To disable this test, set ``supports_image_tool_message`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def supports_image_tool_message(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            If this test fails, check that the model can correctly handle messages
            with image content blocks in ToolMessages, including base64-encoded
            images. Otherwise, set the ``supports_image_tool_message`` property to
            False.

        """
        pytest.xfail("Test not implemented yet.")

        # TODO
        # if not self.supports_image_tool_message:
        #     pytest.skip("Model does not support image tool message.")
        # image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
        # image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")

        # # Support both OpenAI and standard formats
        # oai_format_message = ToolMessage(
        #     content=[
        #         {
        #             "type": "image_url",
        #             "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
        #         },
        #     ],
        #     tool_call_id="1",
        #     name="random_image",
        # )

        # standard_format_message = ToolMessage(
        #     content=[
        #         {
        #             "type": "image",
        #             "source_type": "base64",
        #             "data": image_data,
        #             "mime_type": "image/jpeg",
        #         },
        #     ],
        #     tool_call_id="1",
        #     name="random_image",
        # )

        # for tool_message in [oai_format_message, standard_format_message]:
        #     messages = [
        #         HumanMessage(
        #             "get a random image using the tool and describe the weather"
        #         ),
        #         AIMessage(
        #             [],
        #             tool_calls=[
        #                 {
        #                     "type": "tool_call",
        #                     "id": "1",
        #                     "name": "random_image",
        #                     "args": {},
        #                 }
        #             ],
        #         ),
        #         tool_message,
        #     ]

        #     def random_image() -> str:
        #         """Return a random image."""
        #         return ""

        #     _ = model.bind_tools([random_image]).invoke(messages)

    def test_anthropic_inputs(self, model: BaseChatModel) -> None:
        """Test that model can process Anthropic-style message histories.

        TODO?

        These message histories will include ``AIMessage`` objects with ``tool_use``
        content blocks, e.g.,

        .. code-block:: python

            AIMessage(
                [
                    {"type": "text", "text": "Hmm let me think about that"},
                    {
                        "type": "tool_use",
                        "input": {"fav_color": "green"},
                        "id": "foo",
                        "name": "color_picker",
                    },
                ]
            )

        as well as ``HumanMessage`` objects containing ``tool_result`` content blocks:

        .. code-block:: python

            HumanMessage(
                [
                    {
                        "type": "tool_result",
                        "tool_use_id": "foo",
                        "content": [
                            {
                                "type": "text",
                                "text": "green is a great pick! that's my sister's favorite color",  # noqa: E501
                            }
                        ],
                        "is_error": False,
                    },
                    {"type": "text", "text": "what's my sister's favorite color"},
                ]
            )

        This test should be skipped if the model does not support messages of this
        form (or doesn't support tool calling generally). See Configuration below.

        .. dropdown:: Configuration

            To disable this test, set ``supports_anthropic_inputs`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def supports_anthropic_inputs(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            If this test fails, check that:

            1. The model can correctly handle message histories that include message objects with list content.
            2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format.
            3. HumanMessages with "tool_result" content blocks are correctly handled.

            Otherwise, if Anthropic tool call and result formats are not supported,
            set the ``supports_anthropic_inputs`` property to False.

        """  # noqa: E501
        pytest.xfail("Test not implemented yet.")

        # TODO
        # if not self.supports_anthropic_inputs:
        #     pytest.skip("Model does not explicitly support Anthropic inputs.")

        # # Anthropic-format tool
        # color_picker = {
        #     "name": "color_picker",
        #     "input_schema": {
        #         "type": "object",
        #         "properties": {
        #             "fav_color": {"type": "string"},
        #         },
        #         "required": ["fav_color"],
        #     },
        #     "description": "Input your fav color and get a random fact about it.",
        #     "cache_control": {"type": "ephemeral"},
        # }

        # human_content: list[dict] = [
        #     {
        #         "type": "text",
        #         "text": "what's your favorite color in this image",
        #         "cache_control": {"type": "ephemeral"},
        #     },
        # ]
        # if self.supports_image_inputs:
        #     image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
        #     image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")  # noqa: E501
        #     human_content.append(
        #         {
        #             "type": "image",
        #             "source": {
        #                 "type": "base64",
        #                 "media_type": "image/jpeg",
        #                 "data": image_data,
        #             },
        #         }
        #     )
        # messages = [
        #     SystemMessage("you're a good assistant"),
        #     HumanMessage(human_content),  # type: ignore[arg-type]
        #     AIMessage(
        #         [
        #             {"type": "text", "text": "Hmm let me think about that"},
        #             {
        #                 "type": "tool_use",
        #                 "input": {"fav_color": "green"},
        #                 "id": "foo",
        #                 "name": "color_picker",
        #             },
        #         ],
        #         tool_calls=[
        #             {
        #                 "name": "color_picker",
        #                 "args": {"fav_color": "green"},
        #                 "id": "foo",
        #                 "type": "tool_call",
        #             }
        #         ],
        #     ),
        #     ToolMessage("That's a great pick!", tool_call_id="foo"),
        # ]
        # response = model.bind_tools([color_picker]).invoke(messages)
        # assert isinstance(response, AIMessage)

        # # Test thinking blocks
        # messages = [
        #     HumanMessage(
        #         [
        #             {
        #                 "type": "text",
        #                 "text": "Hello",
        #             },
        #         ]
        #     ),
        #     AIMessage(
        #         [
        #             {
        #                 "type": "thinking",
        #                 "thinking": "I'm thinking...",
        #                 "signature": "abc123",
        #             },
        #             {
        #                 "type": "text",
        #                 "text": "Hello, how are you?",
        #             },
        #         ]
        #     ),
        #     HumanMessage(
        #         [
        #             {
        #                 "type": "text",
        #                 "text": "Well, thanks.",
        #             },
        #         ]
        #     ),
        # ]
        # response = model.invoke(messages)
        # assert isinstance(response, AIMessage)

    def test_message_with_name(self, model: BaseChatModel) -> None:
        """Test that ``HumanMessage`` with values for the ``name`` field can be handled.

        This test expects that the model with a non-empty ``TextContentBlock``.

        These messages may take the form:

        .. code-block:: python

            HumanMessage("hello", name="example_user")

        If possible, the ``name`` field should be parsed and passed appropriately
        to the model. Otherwise, it should be ignored.

        .. dropdown:: Troubleshooting

            If this test fails, check that the ``name`` field on ``HumanMessage``
            objects is either ignored or passed to the model appropriately.

        """
        result = model.invoke([HumanMessage("hello", name="example_user")])
        assert result is not None
        assert isinstance(result, AIMessage)
        assert len(result.content) > 0
        assert isinstance(result.text, str)
        assert len(result.text) > 0

    def test_agent_loop(self, model: BaseChatModel) -> None:
        """Test that the model supports a simple ReAct agent loop. This test is skipped
        if the ``has_tool_calling`` property on the test class is set to False.

        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).

        .. dropdown:: Configuration

            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:

            .. code-block:: python

                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False

        .. dropdown:: Troubleshooting

            If this test fails, check that ``bind_tools`` is implemented to correctly
            translate LangChain tool objects into the appropriate schema for your
            chat model.

            Check also that all required information (e.g., tool calling identifiers)
            from ``AIMessage`` objects is propagated correctly to model payloads.

            This test may fail if the chat model does not consistently generate tool
            calls in response to an appropriate query. In these cases you can ``xfail``
            the test:

            .. code-block:: python

                @pytest.mark.xfail(reason=("Does not support tool_choice."))
                def test_agent_loop(self, model: BaseChatModel) -> None:
                    super().test_agent_loop(model)

        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")

        @tool
        def get_weather(location: str) -> str:
            """Call to surf the web."""
            return "It's sunny."

        llm_with_tools = model.bind_tools([get_weather])
        input_message = HumanMessage("What is the weather in San Francisco, CA?")
        tool_call_message = llm_with_tools.invoke([input_message])
        assert isinstance(tool_call_message, AIMessage)
        tool_calls = tool_call_message.tool_calls
        assert len(tool_calls) == 1
        tool_call = tool_calls[0]
        tool_message = get_weather.invoke(tool_call)
        assert isinstance(tool_message, ToolMessage)
        response = llm_with_tools.invoke(
            [
                input_message,
                tool_call_message,
                tool_message,
            ]
        )
        assert isinstance(response, AIMessage)

    @pytest.mark.benchmark
    @pytest.mark.vcr
    def test_stream_time(
        self, model: BaseChatModel, benchmark: BenchmarkFixture, vcr: Cassette
    ) -> None:
        """Test that streaming does not introduce undue overhead.

        See ``enable_vcr_tests`` dropdown :class:`above <ChatModelV1IntegrationTests>`
        for more information.

        .. dropdown:: Configuration

            This test can be enabled or disabled using the ``enable_vcr_tests``
            property. For example, to disable the test, set this property to ``False``:

            .. code-block:: python

                @property
                def enable_vcr_tests(self) -> bool:
                    return False

            .. important::

                VCR will by default record authentication headers and other sensitive
                information in cassettes. See ``enable_vcr_tests`` dropdown
                :class:`above <ChatModelV1IntegrationTests>` for how to configure what
                information is recorded in cassettes.

        """
        if not self.enable_vcr_tests:
            pytest.skip("VCR not set up.")

        def _run() -> None:
            for _ in model.stream("Write a story about a cat."):
                pass

        if not vcr.responses:
            _run()
        else:
            benchmark(_run)

    def invoke_with_audio_input(self, *, stream: bool = False) -> AIMessage:
        """:private:"""
        # To be implemented in test subclass
        raise NotImplementedError

    def invoke_with_audio_output(self, *, stream: bool = False) -> AIMessage:
        """:private:"""
        # To be implemented in test subclass
        raise NotImplementedError

    def invoke_with_reasoning_output(self, *, stream: bool = False) -> AIMessage:
        """:private:"""
        # To be implemented in test subclass
        raise NotImplementedError

    def invoke_with_cache_read_input(self, *, stream: bool = False) -> AIMessage:
        """:private:"""
        # To be implemented in test subclass
        raise NotImplementedError

    def invoke_with_cache_creation_input(self, *, stream: bool = False) -> AIMessage:
        """:private:"""
        # To be implemented in test subclass
        raise NotImplementedError

    def test_unicode_tool_call_integration(
        self,
        model: BaseChatModel,
        *,
        tool_choice: Optional[str] = None,
        force_tool_call: bool = True,
    ) -> None:
        """Generic integration test for Unicode characters in tool calls.

        Args:
            model: The chat model to test
            tool_choice: Tool choice parameter to pass to ``bind_tools()`` (provider-specific)
            force_tool_call: Whether to force a tool call (use ``tool_choice=True`` if None)

        Tests that Unicode characters in tool call arguments are preserved correctly,
        not escaped as ``\\uXXXX`` sequences.
        """  # noqa: E501
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling support.")

        # Configure tool choice based on provider capabilities
        if tool_choice is None and force_tool_call:
            tool_choice = "any"

        if tool_choice is not None:
            llm_with_tool = model.bind_tools(
                [unicode_customer], tool_choice=tool_choice
            )
        else:
            llm_with_tool = model.bind_tools([unicode_customer])

        # Test with Chinese characters
        msgs = [
            HumanMessage(
                "Create a customer named '你好啊集团' (Hello Group) - a Chinese "
                "technology company"
            )
        ]
        ai_msg = llm_with_tool.invoke(msgs)

        assert isinstance(ai_msg, AIMessage)
        assert isinstance(ai_msg.tool_calls, list)

        if force_tool_call:
            assert len(ai_msg.tool_calls) >= 1, (
                f"Expected at least 1 tool call, got {len(ai_msg.tool_calls)}"
            )

        if ai_msg.tool_calls:
            tool_call = ai_msg.tool_calls[0]
            assert tool_call["name"] == "unicode_customer"
            assert "args" in tool_call

            # Verify Unicode characters are properly handled
            args = tool_call["args"]
            assert "customer_name" in args
            customer_name = args["customer_name"]

            # The model should include the Unicode characters, not escaped sequences
            assert (
                "你好" in customer_name
                or "你" in customer_name
                or "好" in customer_name
            ), f"Unicode characters not found in: {customer_name}"

        # Test with additional Unicode examples - Japanese
        msgs_jp = [
            HumanMessage(
                "Create a customer named 'こんにちは株式会社' (Hello Corporation) - a "
                "Japanese company"
            )
        ]
        ai_msg_jp = llm_with_tool.invoke(msgs_jp)

        assert isinstance(ai_msg_jp, AIMessage)

        if force_tool_call:
            assert len(ai_msg_jp.tool_calls) >= 1

        if ai_msg_jp.tool_calls:
            tool_call_jp = ai_msg_jp.tool_calls[0]
            args_jp = tool_call_jp["args"]
            customer_name_jp = args_jp["customer_name"]

            # Verify Japanese Unicode characters are preserved
            assert (
                "こんにちは" in customer_name_jp
                or "株式会社" in customer_name_jp
                or "こ" in customer_name_jp
                or "ん" in customer_name_jp
            ), f"Japanese Unicode characters not found in: {customer_name_jp}"

    # TODO
    # def test_multimodal_reasoning(self, model: BaseChatModel) -> None:
    #     """Test complex reasoning with multiple content types.

    #     TODO: expand docstring

    #     """
    #     if not self.supports_multimodal_reasoning:
    #         pytest.skip("Model does not support multimodal reasoning.")

    #     content_blocks: list[types.ContentBlock] = [
    #         create_text_block(
    #             "Compare these media files and provide reasoning analysis:"
    #         ),
    #         create_image_block(
    #             base64=_get_test_image_base64(),
    #             mime_type="image/png",
    #         ),
    #     ]

    #     if self.supports_audio_content_blocks:
    #         content_blocks.append(
    #             create_audio_block(
    #                 base64=_get_test_audio_base64(),
    #                 mime_type="audio/wav",
    #             )
    #         )

    #     message = HumanMessage(content=cast("list[types.ContentBlock]", content_blocks))  # noqa: E501
    #     result = model.invoke([message])

    #     assert isinstance(result, AIMessage)

    #     if self.supports_reasoning_content_blocks:
    #         reasoning_blocks = [
    #             block
    #             for block in result.content
    #             if isinstance(block, dict) and is_reasoning_block(block)
    #         ]
    #         assert len(reasoning_blocks) > 0

    def test_citation_generation_with_sources(self, model: BaseChatModel) -> None:
        """Test that the model can generate ``Citations`` with source links.

        TODO: expand docstring

        """
        if not self.supports_structured_citations:
            pytest.skip("Model does not support structured citations.")

        message = HumanMessage(
            "Provide factual information about the distance to the moon with proper "
            "citations to scientific sources."
        )
        result = model.invoke([message])

        assert isinstance(result, AIMessage)

        # Check for text blocks with citations
        text_blocks_with_citations = []
        for block in result.content:
            if (
                isinstance(block, dict)
                and is_text_block(block)
                and "annotations" in block
            ):
                annotations = cast("list[dict[str, Any]]", block.get("annotations", []))
                citations = [
                    ann
                    for ann in annotations
                    if isinstance(ann, dict) and ann.get("type") == "citation"
                ]
                if citations:
                    text_blocks_with_citations.append(block)
        assert len(text_blocks_with_citations) > 0

        # Validate citation structure
        for block in text_blocks_with_citations:
            annotations = cast("list[dict[str, Any]]", block.get("annotations", []))
            for annotation in annotations:
                if annotation.get("type") == "citation":
                    # TODO: evaluate these since none are *technically* required
                    # This may be a test that needs adjustment on per-integration basis
                    assert "cited_text" in annotation
                    assert "start_index" in annotation
                    assert "end_index" in annotation

    def test_web_search_integration(self, model: BaseChatModel) -> None:
        """Test web search content blocks integration.

        TODO: expand docstring

        """
        if not self.supports_web_search_blocks:
            pytest.skip("Model does not support web search blocks.")

        message = HumanMessage(
            "Search for the latest developments in quantum computing."
        )
        result = model.invoke([message])

        assert isinstance(result, AIMessage)

        # Check for web search blocks
        search_call_blocks = [
            block
            for block in result.content
            if isinstance(block, dict) and block.get("type") == "web_search_call"
        ]
        search_result_blocks = [
            block
            for block in result.content
            if isinstance(block, dict) and block.get("type") == "web_search_result"
        ]
        # TODO: should this be one or the other or both?
        assert len(search_call_blocks) > 0 or len(search_result_blocks) > 0

    def test_code_interpreter_blocks(self, model: BaseChatModel) -> None:
        """Test code interpreter content blocks.

        TODO: expand docstring

        """
        if not self.supports_code_interpreter:
            pytest.skip("Model does not support code interpreter blocks.")

        message = HumanMessage("Calculate the factorial of 10 using Python code.")
        result = model.invoke([message])

        assert isinstance(result, AIMessage)

        # Check for code interpreter blocks
        code_blocks = [
            block
            for block in result.content
            if isinstance(block, dict)
            and block.get("type")
            in [
                "code_interpreter_call",
                "code_interpreter_output",
                "code_interpreter_result",
            ]
        ]
        # TODO: should we require all three types or just an output/result?
        assert len(code_blocks) > 0

    def test_tool_calling_with_content_blocks(self, model: BaseChatModel) -> None:
        """Test tool calling with content blocks.

        TODO: expand docstring

        """
        if not self.has_tool_calling:
            pytest.skip("Model does not support tool calls.")

        @tool
        def calculate_area(length: float, width: float) -> str:
            """Calculate the area of a rectangle."""
            area = length * width
            return f"The area is {area} square units."

        model_with_tools = model.bind_tools([calculate_area])
        message = HumanMessage(
            "Calculate the area of a rectangle with length 5 and width 3."
        )

        result = model_with_tools.invoke([message])
        _validate_tool_call_message(result)

    def test_plaintext_content_blocks_from_documents(
        self, model: BaseChatModel
    ) -> None:
        """Test PlainTextContentBlock for document plaintext content.

        TODO: expand docstring

        """
        if not self.supports_plaintext_content_blocks:
            pytest.skip("Model does not support PlainTextContentBlock.")

        # Test with PlainTextContentBlock (plaintext from document)
        plaintext_block = create_plaintext_block(
            text="This is plaintext content extracted from a document.",
            file_id="doc_123",
        )

        message = HumanMessage(
            content=cast("list[types.ContentBlock]", [plaintext_block])
        )
        result = model.invoke([message])

        assert isinstance(result, AIMessage)
        # TODO expand

    def test_content_block_streaming_integration(self, model: BaseChatModel) -> None:
        """Test streaming with content blocks.

        TODO: expand docstring

        """
        if not self.supports_content_blocks_v1:
            pytest.skip("Model does not support content blocks v1.")

        message = HumanMessage(
            content=[
                {
                    "type": "text",
                    "text": "Write a detailed explanation of machine learning.",
                }
            ]
        )

        chunks = []
        for chunk in model.stream([message]):
            chunks.append(chunk)
            assert isinstance(chunk, (AIMessage, AIMessageChunk))

        assert len(chunks) > 1  # Should receive multiple chunks

        # Aggregate chunks
        final_message = chunks[0]
        for chunk in chunks[1:]:
            final_message = final_message + chunk

        assert isinstance(final_message.content, list)

    def test_error_handling_with_invalid_content_blocks(
        self, model: BaseChatModel
    ) -> None:
        """Test error handling with various invalid content block configurations.

        TODO: expand docstring

        """
        if not self.supports_content_blocks_v1:
            pytest.skip("Model does not support content blocks v1.")

        test_cases = [
            {"type": "text"},  # Missing text field
            {"type": "image"},  # Missing url/mime_type
            {"type": "tool_call", "name": "test"},  # Missing args/id
        ]

        for invalid_block in test_cases:
            message = HumanMessage([invalid_block])  # type: ignore[list-item]

            # Should either handle gracefully or raise appropriate error
            try:
                result = model.invoke([message])
                assert isinstance(result, AIMessage)
            except (ValueError, TypeError, KeyError) as e:
                # Acceptable to raise validation errors
                assert len(str(e)) > 0

    async def test_async_content_blocks_processing(self, model: BaseChatModel) -> None:
        """Test asynchronous processing of content blocks.

        TODO: expand docstring

        """
        if not self.supports_content_blocks_v1:
            pytest.skip("Model does not support content blocks v1.")

        message = HumanMessage("Generate a creative story about space exploration.")

        result = await model.ainvoke([message])
        assert isinstance(result, AIMessage)

    def test_input_conversion_string(self, model: BaseChatModel) -> None:
        """Test that string input is properly converted to messages.

        TODO: expand docstring

        """
        result = model.invoke("Test string input")
        assert isinstance(result, AIMessage)
        assert result.content is not None

    def test_input_conversion_empty_string(self, model: BaseChatModel) -> None:
        """Test that empty string input is handled gracefully.

        TODO: expand docstring

        """
        result = model.invoke("")
        assert isinstance(result, AIMessage)

    def test_input_conversion_message_v1_list(self, model: BaseChatModel) -> None:
        """Test that v1 message list input is handled correctly.

        TODO: expand docstring

        """
        messages = [HumanMessage("Test message")]
        result = model.invoke(messages)
        assert isinstance(result, AIMessage)
        assert result.content is not None

    def test_text_content_blocks_basic(self, model: BaseChatModel) -> None:
        """Test that the model can handle the ``TextContentBlock`` format."""
        if not self.supports_text_content_blocks:
            pytest.skip("Model does not support TextContentBlock (rare!)")

        text_block = create_text_block("Hello, world!")
        message = HumanMessage(content=[text_block])

        result = model.invoke([message])
        assert isinstance(result, AIMessage)
        assert result.content is not None

    def test_mixed_content_blocks_basic(self, model: BaseChatModel) -> None:
        """Test that the model can handle messages with mixed content blocks."""
        if not (
            self.supports_text_content_blocks and self.supports_image_content_blocks
        ):
            pytest.skip(
                "Model doesn't support mixed content blocks (concurrent text and image)"
            )

        content_blocks: list[types.ContentBlock] = [
            create_text_block("Describe this image:"),
            create_image_block(
                base64="iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==",
                mime_type="image/png",
            ),
        ]

        message = HumanMessage(content=content_blocks)
        result = model.invoke([message])

        assert isinstance(result, AIMessage)
        assert result.content is not None

    def test_reasoning_content_blocks_basic(self, model: BaseChatModel) -> None:
        """Test that the model can generate ``ReasoningContentBlock``.

        If your integration requires a reasoning parameter to be explicitly set, you
        will need to override this test to set it appropriately.

        """
        if not self.supports_reasoning_content_blocks:
            pytest.skip("Model does not support ReasoningContentBlock.")

        message = HumanMessage("Think step by step: What is 2 + 2?")
        result = model.invoke([message])

        assert isinstance(result, AIMessage)
        if isinstance(result.content, list):
            reasoning_blocks = [
                block
                for block in result.content
                if isinstance(block, dict) and is_reasoning_block(block)
            ]
            assert len(reasoning_blocks) > 0, (
                "Expected reasoning content blocks but found none. "
                f"Content blocks: {[block.get('type') for block in result.content]}"
            )

    def test_non_standard_content_blocks_basic(self, model: BaseChatModel) -> None:
        """Test that the model can handle ``NonStandardContentBlock``."""
        if not self.supports_non_standard_blocks:
            pytest.skip("Model does not support NonStandardContentBlock.")

        non_standard_block = create_non_standard_block(
            {
                "custom_field": "custom_value",
                "data": [1, 2, 3],
            }
        )

        message = HumanMessage(content=[non_standard_block])

        # Should not raise an error
        result = model.invoke([message])
        assert isinstance(result, AIMessage)

    def test_invalid_tool_call_handling_basic(self, model: BaseChatModel) -> None:
        """Test that the model can handle ``InvalidToolCall`` blocks gracefully."""
        if not self.supports_invalid_tool_calls:
            pytest.skip("Model does not support InvalidToolCall handling.")

        invalid_tool_call: InvalidToolCall = {
            "type": "invalid_tool_call",
            "name": "nonexistent_tool",
            "args": None,
            "id": "invalid_123",
            "error": "Tool not found",
        }

        # Create a message with invalid tool call in history
        ai_message = AIMessage(content=[invalid_tool_call])
        follow_up = HumanMessage("Please try again with a valid approach.")

        result = model.invoke([ai_message, follow_up])
        assert isinstance(result, AIMessage)
        assert result.content is not None

    def test_file_content_blocks_basic(self, model: BaseChatModel) -> None:
        """Test that the model can handle ``FileContentBlock``."""
        if not self.supports_file_content_blocks:
            pytest.skip("Model does not support FileContentBlock.")

        file_block = create_file_block(
            base64="SGVsbG8sIHdvcmxkIQ==",  # "Hello, world!"
            mime_type="text/plain",
        )

        message = HumanMessage(content=[file_block])
        result = model.invoke([message])

        assert isinstance(result, AIMessage)
        assert result.content is not None