diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py
index ef64ec15a16..e09bd204f3a 100644
--- a/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py
+++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models_v1.py
@@ -1,14 +1,20 @@
 """Integration tests for v1 chat models.
 
 This module provides comprehensive integration tests for the new messages and standard
-content block system introduced in ``langchain_core.messages.content_blocks``.
+content block system introduced in ``langchain_core.v1.messages`` and
+``langchain_core.messages.content_blocks``.
 """
 
-from typing import Any, Union, cast
+import base64
+import json
+from typing import Annotated, Any, Literal, Optional, TypedDict, Union, cast
+from unittest.mock import MagicMock
 
+import httpx
 import langchain_core.messages.content_blocks as types
 import pytest
 from langchain_core.callbacks import BaseCallbackHandler
+from langchain_core.language_models.fake_chat_models import GenericFakeChatModel
 from langchain_core.messages.base import BaseMessage
 from langchain_core.messages.content_blocks import (
     AudioContentBlock,
@@ -29,19 +35,41 @@ from langchain_core.messages.content_blocks import (
     WebSearchCall,
     WebSearchResult,
     create_audio_block,
+    create_file_block,
     create_image_block,
+    create_non_standard_block,
     create_plaintext_block,
     create_text_block,
+    create_tool_call,
     create_video_block,
     is_reasoning_block,
     is_text_block,
     is_tool_call_block,
 )
+from langchain_core.output_parsers.string import StrOutputParser
+from langchain_core.prompts.chat import ChatPromptTemplate
 from langchain_core.tools import tool
+from langchain_core.tools.base import BaseTool
+from langchain_core.utils.function_calling import (
+    convert_to_json_schema,
+    tool_example_to_messages,
+)
 from langchain_core.v1.chat_models import BaseChatModel
-from langchain_core.v1.messages import AIMessage, AIMessageChunk, HumanMessage
+from langchain_core.v1.messages import (
+    AIMessage,
+    AIMessageChunk,
+    HumanMessage,
+    SystemMessage,
+    ToolMessage,
+)
+from pydantic import BaseModel, Field
+from pydantic.v1 import BaseModel as BaseModelV1
+from pydantic.v1 import Field as FieldV1
+from pytest_benchmark.fixture import BenchmarkFixture
+from vcr.cassette import Cassette
 
 from langchain_tests.unit_tests.chat_models_v1 import ChatModelV1Tests
+from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION
 
 # Content block type definitions for testing
 ContentBlock = Union[
@@ -85,11 +113,79 @@ def _get_test_video_base64() -> str:
     return "PLACEHOLDER_VIDEO_DATA"
 
 
+def _get_joke_class(
+    schema_type: Literal["pydantic", "typeddict", "json_schema"],
+) -> Any:
+    """:private:"""
+
+    class Joke(BaseModel):
+        """Joke to tell user."""
+
+        setup: str = Field(description="question to set up a joke")
+        punchline: str = Field(description="answer to resolve the joke")
+
+    def validate_joke(result: Any) -> bool:
+        return isinstance(result, Joke)
+
+    class JokeDict(TypedDict):
+        """Joke to tell user."""
+
+        setup: Annotated[str, ..., "question to set up a joke"]
+        punchline: Annotated[str, ..., "answer to resolve the joke"]
+
+    def validate_joke_dict(result: Any) -> bool:
+        return all(key in ["setup", "punchline"] for key in result)
+
+    if schema_type == "pydantic":
+        return Joke, validate_joke
+
+    if schema_type == "typeddict":
+        return JokeDict, validate_joke_dict
+
+    if schema_type == "json_schema":
+        return Joke.model_json_schema(), validate_joke_dict
+    msg = "Invalid schema type"
+    raise ValueError(msg)
+
+
+class _TestCallbackHandler(BaseCallbackHandler):
+    options: list[Optional[dict]]
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.options = []
+
+    def on_chat_model_start(
+        self,
+        serialized: Any,
+        messages: Any,
+        *,
+        options: Optional[dict[str, Any]] = None,
+        **kwargs: Any,
+    ) -> None:
+        self.options.append(options)
+
+
+class _MagicFunctionSchema(BaseModel):
+    input: int = Field(..., gt=-1000, lt=1000)
+
+
+@tool(args_schema=_MagicFunctionSchema)
+def magic_function(_input: int) -> int:
+    """Applies a magic function to an input."""
+    return _input + 2
+
+
+@tool
+def magic_function_no_args() -> int:
+    """Calculates a magic function."""
+    return 5
+
+
 def _validate_tool_call_message(message: AIMessage) -> None:
     """Validate that a message contains tool calls in content blocks format."""
 
     if isinstance(message.content, list):
-        # Check for tool calls in content blocks
         tool_call_blocks = [
             block
             for block in message.content
@@ -97,67 +193,2203 @@ def _validate_tool_call_message(message: AIMessage) -> None:
         ]
         assert len(tool_call_blocks) >= 1
 
-        tool_call = tool_call_blocks[0]
-        assert "name" in tool_call
-        assert "args" in tool_call
-        assert "id" in tool_call
-    # TODO: review if this is necessary
-    # else:
-    #     # Fallback to legacy tool_calls attribute
-    #     assert hasattr(message, "tool_calls")
-    #     assert len(message.tool_calls) >= 1
+        for tool_call in tool_call_blocks:
+            # Ensure each tool call has the required fields
+            assert "name" in tool_call
+            assert "args" in tool_call
+            assert "id" in tool_call
+    # (No fallback, since the tools attribute makes the same search as the list
+    # comprehension above)
 
 
-def _validate_multimodal_content_blocks(
-    message: BaseMessage, expected_types: list[str]
-) -> None:
-    """Validate that a message contains expected content block types."""
-    assert isinstance(message, (HumanMessage, AIMessage))
-    assert isinstance(message.content, list)
+def _validate_tool_call_message_no_args(message: AIMessage) -> None:
+    """Validate that a message contains a single tool call with no arguments.
 
-    found_types = []
-    for block in message.content:
-        if isinstance(block, dict) and "type" in block:
-            found_types.append(block["type"])
+    Used for testing tool calls without arguments, such as
+    ``magic_function_no_args``.
+    """
+    assert isinstance(message, AIMessage)
+    assert len(message.tool_calls) == 1
+    tool_call = message.tool_calls[0]
+    assert tool_call["name"] == "magic_function_no_args"
+    assert tool_call["args"] == {}
+    assert tool_call["id"] is not None
 
-    for type_ in expected_types:
-        assert type_ in found_types, f"Expected content block type '{type_}' not found"
+
+@tool
+def unicode_customer(customer_name: str, description: str) -> str:
+    """Tool for creating a customer with a name containing Unicode characters.
+
+    Args:
+        customer_name: The customer's name in their native language.
+        description: Description of the customer.
+
+    Returns:
+        A confirmation message about the customer creation.
+    """
+    return f"Created customer: {customer_name} - {description}"
 
 
 class ChatModelV1IntegrationTests(ChatModelV1Tests):
-    """Integration tests for v1 chat models with standard content blocks support.
+    """Base class for v1 chat model integration tests.
 
-    Inherits from ``ChatModelV1Tests`` to provide comprehensive testing of content
-    block functionality with real external services.
-    """
+    TODO: verify this entire docstring!
+
+    Test subclasses must implement the ``chat_model_class`` and
+    ``chat_model_params`` properties to specify what model to test and its
+    initialization parameters.
+
+    Example:
+
+    .. code-block:: python
+
+        from typing import Type
+
+        from langchain_tests.integration_tests import ChatModelV1IntegrationTests
+        from my_package.chat_models import MyChatModel
+
+
+        class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+            @property
+            def chat_model_class(self) -> Type[MyV1ChatModel]:
+                # Return the chat model class to test here
+                return MyChatModel
+
+            @property
+            def chat_model_params(self) -> dict:
+                # Return initialization parameters for the v1 model.
+                return {"model": "model-001", "temperature": 0}
+
+    .. note::
+          API references for individual test methods include troubleshooting tips.
+
+
+    Test subclasses **must** implement the following two properties:
+
+    chat_model_class
+        The chat model class to test, e.g., ``ChatParrotLinkV1``.
+
+        Example:
+
+        .. code-block:: python
+
+            @property
+            def chat_model_class(self) -> Type[ChatParrotLinkV1]:
+                return ChatParrotLinkV1
+
+    chat_model_params
+        Initialization parameters for the chat model.
+
+        Example:
+
+        .. code-block:: python
+
+            @property
+            def chat_model_params(self) -> dict:
+                return {"model": "bird-brain-001", "temperature": 0}
+
+    In addition, test subclasses can control what features are tested (such as tool
+    calling or multi-modality) by selectively overriding the following properties.
+    Expand to see details:
+
+    .. dropdown:: has_tool_calling
+
+        TODO
+
+    .. dropdown:: tool_choice_value
+
+        TODO
+
+    .. dropdown:: has_tool_choice
+
+        TODO
+
+    .. dropdown:: has_structured_output
+
+        TODO
+
+    .. dropdown:: structured_output_kwargs
+
+        TODO
+
+    .. dropdown:: supports_json_mode
+
+        TODO
+
+    .. dropdown:: returns_usage_metadata
+
+        TODO
+
+    .. dropdown:: supports_anthropic_inputs
+
+        TODO
+
+    .. dropdown:: supports_image_tool_message
+
+        TODO
+
+    .. dropdown:: supported_usage_metadata_details
+
+        TODO
+
+    .. dropdown:: enable_vcr_tests
+
+        Property controlling whether to enable select tests that rely on
+        `VCR <https://vcrpy.readthedocs.io/en/latest/>`_ caching of HTTP calls, such
+        as benchmarking tests.
+
+        To enable these tests, follow these steps:
+
+        1. Override the ``enable_vcr_tests`` property to return ``True``:
+
+            .. code-block:: python
+
+                @property
+                def enable_vcr_tests(self) -> bool:
+                    return True
+
+        2. Configure VCR to exclude sensitive headers and other information from cassettes.
+
+            .. important::
+                VCR will by default record authentication headers and other sensitive
+                information in cassettes. Read below for how to configure what
+                information is recorded in cassettes.
+
+            To add configuration to VCR, add a ``conftest.py`` file to the ``tests/``
+            directory and implement the ``vcr_config`` fixture there.
+
+            ``langchain-tests`` excludes the headers ``'authorization'``,
+            ``'x-api-key'``, and ``'api-key'`` from VCR cassettes. To pick up this
+            configuration, you will need to add ``conftest.py`` as shown below. You can
+            also exclude additional headers, override the default exclusions, or apply
+            other customizations to the VCR configuration. See example below:
+
+            .. code-block:: python
+                :caption: tests/conftest.py
+
+                import pytest
+                from langchain_tests.conftest import _base_vcr_config as _base_vcr_config
+
+                _EXTRA_HEADERS = [
+                    # Specify additional headers to redact
+                    ("user-agent", "PLACEHOLDER"),
+                ]
+
+
+                def remove_response_headers(response: dict) -> dict:
+                    # If desired, remove or modify headers in the response.
+                    response["headers"] = {}
+                    return response
+
+
+                @pytest.fixture(scope="session")
+                def vcr_config(_base_vcr_config: dict) -> dict:  # noqa: F811
+                    \"\"\"Extend the default configuration from langchain_tests.\"\"\"
+                    config = _base_vcr_config.copy()
+                    config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
+                    config["before_record_response"] = remove_response_headers
+
+                    return config
+
+            .. dropdown:: Compressing cassettes
+
+                ``langchain-tests`` includes a custom VCR serializer that compresses
+                cassettes using gzip. To use it, register the ``yaml.gz`` serializer
+                to your VCR fixture and enable this serializer in the config. See
+                example below:
+
+                .. code-block:: python
+                    :caption: tests/conftest.py
+
+                    import pytest
+                    from langchain_tests.conftest import CustomPersister, CustomSerializer
+                    from langchain_tests.conftest import _base_vcr_config as _base_vcr_config
+                    from vcr import VCR
+
+                    _EXTRA_HEADERS = [
+                        # Specify additional headers to redact
+                        ("user-agent", "PLACEHOLDER"),
+                    ]
+
+
+                    def remove_response_headers(response: dict) -> dict:
+                        # If desired, remove or modify headers in the response.
+                        response["headers"] = {}
+                        return response
+
+
+                    @pytest.fixture(scope="session")
+                    def vcr_config(_base_vcr_config: dict) -> dict:  # noqa: F811
+                        \"\"\"Extend the default configuration from langchain_tests.\"\"\"
+                        config = _base_vcr_config.copy()
+                        config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
+                        config["before_record_response"] = remove_response_headers
+                        # New: enable serializer and set file extension
+                        config["serializer"] = "yaml.gz"
+                        config["path_transformer"] = VCR.ensure_suffix(".yaml.gz")
+
+                        return config
+
+
+                    def pytest_recording_configure(config: dict, vcr: VCR) -> None:
+                        vcr.register_persister(CustomPersister())
+                        vcr.register_serializer("yaml.gz", CustomSerializer())
+
+
+                You can inspect the contents of the compressed cassettes (e.g., to
+                ensure no sensitive information is recorded) using
+
+                .. code-block:: bash
+
+                    gunzip -k /path/to/tests/cassettes/TestClass_test.yaml.gz
+
+                or by using the serializer:
+
+                .. code-block:: python
+
+                    from langchain_tests.conftest import CustomPersister, CustomSerializer
+
+                    cassette_path = "/path/to/tests/cassettes/TestClass_test.yaml.gz"
+                    requests, responses = CustomPersister().load_cassette(path, CustomSerializer())
+
+        3. Run tests to generate VCR cassettes.
+
+            Example:
+
+            .. code-block:: bash
+
+                uv run python -m pytest tests/integration_tests/test_chat_models.py::TestMyModel::test_stream_time
+
+            This will generate a VCR cassette for the test in
+            ``tests/integration_tests/cassettes/``.
+
+            .. important::
+                You should inspect the generated cassette to ensure that it does not
+                contain sensitive information. If it does, you can modify the
+                ``vcr_config`` fixture to exclude headers or modify the response
+                before it is recorded.
+
+            You can then commit the cassette to your repository. Subsequent test runs
+            will use the cassette instead of making HTTP calls.
+
+    """  # noqa: E501
 
-    # Additional multimodal support properties for integration testing
     @property
-    def supports_multimodal_reasoning(self) -> bool:
-        """Whether the model can reason about multimodal content."""
-        return (
-            self.supports_image_content_blocks
-            and self.supports_reasoning_content_blocks
+    def standard_chat_model_params(self) -> dict:
+        """:private:"""
+        return {}
+
+    def test_invoke(self, model: BaseChatModel) -> None:
+        """Test to verify that ``model.invoke(simple_message)`` works.
+
+        A model should be able to produce a non-empty ``AIMessage`` in response to
+        ``"Hello"``. The message should at least contain a ``TextContentBlock`` with
+        text populated.
+
+        .. important::
+            This should pass for all integrations!
+
+        .. dropdown:: Troubleshooting
+
+            TODO
+
+        """
+        result = model.invoke("Hello")
+        assert result is not None
+        assert isinstance(result, AIMessage)
+        assert isinstance(result.text, str)
+        assert len(result.content) > 0
+
+        text_contentblock = result.content[0]
+        assert is_text_block(text_contentblock)
+
+    async def test_ainvoke(self, model: BaseChatModel) -> None:
+        """Test to verify that ``await model.ainvoke(simple_message)`` works.
+
+        A model should be able to produce a non-empty ``AIMessage`` in response to
+        ``"Hello"``. The message should at least contain a ``TextContentBlock`` with
+        text populated.
+
+        .. important::
+            This should pass for all integrations!
+
+        Passing this test does not indicate a "natively async" implementation, but
+        rather that the model can be used in an async context.
+
+        .. dropdown:: Troubleshooting
+
+            First, debug
+            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`.
+            because ``ainvoke`` has a default implementation that calls ``invoke`` in an
+            async context.
+
+        """
+        result = await model.ainvoke("Hello")
+        assert result is not None
+        assert isinstance(result, AIMessage)
+        assert isinstance(result.text, str)
+        assert len(result.content) > 0
+
+        text_contentblock = result.content[0]
+        assert is_text_block(text_contentblock)
+
+    def test_stream(self, model: BaseChatModel) -> None:
+        """Test to verify that ``model.stream(simple_message)`` works.
+
+        .. important::
+            This should pass for all integrations!
+
+        Passing this test does not indicate a "streaming" implementation, but rather
+        that the model can be used in a streaming context. For instance, a model
+        that yields at least one chunk in response to ``"Hello"``.
+
+        .. dropdown:: Troubleshooting
+
+            First, debug
+            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`.
+            because ``stream`` has a default implementation that calls ``invoke`` and
+            yields the result as a single chunk.
+
+        """
+        num_chunks = 0
+        for chunk in model.stream("Hello"):
+            assert chunk is not None
+            assert isinstance(chunk, AIMessageChunk)
+            assert isinstance(chunk.content, list)
+            num_chunks += 1
+        assert num_chunks > 0
+
+    async def test_astream(self, model: BaseChatModel) -> None:
+        """Test to verify that ``await model.astream(simple_message)`` works.
+
+        .. important::
+            This should pass for all integrations!
+
+        Passing this test does not indicate a "natively async" or "streaming"
+        implementation, but rather that the model can be used in an async streaming
+        context.
+
+        .. dropdown:: Troubleshooting
+
+            First, debug
+            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_stream`.
+            and
+            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_ainvoke`.
+            because ``astream`` has a default implementation that calls ``_stream`` in
+            an async context if it is implemented, or ``ainvoke`` and yields the result
+            as a single ``AIMessageChunk`` chunk if not.
+
+        """
+        num_chunks = 0
+        async for chunk in model.astream("Hello"):
+            assert chunk is not None
+            assert isinstance(chunk, AIMessageChunk)
+            assert isinstance(chunk.content, list)
+            num_chunks += 1
+        assert num_chunks > 0
+
+    def test_batch(self, model: BaseChatModel) -> None:
+        """Test to verify that ``model.batch([messages])`` works.
+
+        .. important::
+            This should pass for all integrations!
+
+        Tests the model's ability to process multiple prompts in a single batch. We
+        expect that the ``TextContentBlock`` of each response is populated with text.
+
+        Passing this test does not indicate a "natively batching" or "batching"
+        implementation, but rather that the model can be used in a batching context. For
+        instance, your model may internally call ``invoke`` for each message in the
+        batch, even if the model provider does not support batching natively.
+
+        .. dropdown:: Troubleshooting
+
+            First, debug
+            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`
+            because ``batch`` has a default implementation that calls ``invoke`` for
+            each message in the batch.
+
+            If that test passes but not this one, you should make sure your ``batch``
+            method does not raise any exceptions, and that it returns a list of valid
+            :class:`~langchain_core.v1.messages.AIMessage` objects.
+
+        """
+        batch_results = model.batch(["Hello", "Hey"])
+        assert batch_results is not None
+        assert isinstance(batch_results, list)
+        assert len(batch_results) == 2
+        for result in batch_results:
+            assert result is not None
+            assert isinstance(result, AIMessage)
+            assert len(result.content) > 0
+            assert isinstance(result.text, str)
+            assert len(result.text) > 0
+
+    async def test_abatch(self, model: BaseChatModel) -> None:
+        """Test to verify that ``await model.abatch([messages])`` works.
+
+        .. important::
+            This should pass for all integrations!
+
+        Tests the model's ability to process multiple prompts in a single batch
+        asynchronously. We expect that the ``TextContentBlock`` of each response is
+        populated with text.
+
+        Passing this test does not indicate a "natively batching" or "batching"
+        implementation, but rather that the model can be used in a batching context. For
+        instance, your model may internally call ``ainvoke`` for each message in the
+        batch, even if the model provider does not support batching natively.
+
+        .. dropdown:: Troubleshooting
+
+            First, debug
+            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_batch`
+            and
+            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_ainvoke`
+            because ``abatch`` has a default implementation that calls ``ainvoke`` for
+            each message in the batch.
+
+            If those tests pass but not this one, you should make sure your ``abatch``
+            method does not raise any exceptions, and that it returns a list of valid
+            :class:`~langchain_core.v1.messages.AIMessage` objects.
+
+        """
+        batch_results = await model.abatch(["Hello", "Hey"])
+        assert batch_results is not None
+        assert isinstance(batch_results, list)
+        assert len(batch_results) == 2
+        for result in batch_results:
+            assert result is not None
+            assert isinstance(result, AIMessage)
+            assert len(result.content) > 0
+            assert isinstance(result.text, str)
+            assert len(result.text) > 0
+
+    def test_conversation(self, model: BaseChatModel) -> None:
+        """Test to verify that the model can handle multi-turn conversations.
+
+        .. important::
+            This should pass for all integrations!
+
+        Tests the model's ability to process a sequence of alternating human and AI
+        messages as context for generating the next response. We expect that the
+        ``TextContentBlock`` of each response is populated with text.
+
+        .. dropdown:: Troubleshooting
+
+            First, debug
+            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`
+            because this test also uses ``model.invoke()``.
+
+            If that test passes but not this one, you should verify that:
+            1. Your model correctly processes the message history
+            2. The model maintains appropriate context from previous messages
+            3. The response is a valid :class:`~langchain_core.v1.messages.AIMessage`
+
+        """
+        messages = [
+            HumanMessage("hello"),
+            AIMessage("hello"),
+            HumanMessage("how are you"),
+        ]
+        result = model.invoke(messages)
+        assert result is not None
+        assert isinstance(result, AIMessage)
+        assert len(result.content) > 0
+        assert isinstance(result.text, str)
+        assert len(result.text) > 0
+
+    def test_double_messages_conversation(self, model: BaseChatModel) -> None:
+        """Test to verify that the model can handle double-message conversations.
+
+        .. important::
+            This should pass for all integrations!
+
+        Tests the model's ability to process a sequence of double-system, double-human,
+        and double-ai messages as context for generating the next response. We expect
+        that the ``TextContentBlock`` of each response is populated with text.
+
+        .. dropdown:: Troubleshooting
+
+            First, debug
+            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_invoke`
+            because this test also uses ``model.invoke()``.
+
+            Second, debug
+            :meth:`~langchain_tests.integration_tests.chat_models_v1.ChatModelV1IntegrationTests.test_conversation`
+            because this test is the "basic case" without double messages.
+
+            If that test passes those but not this one, you should verify that:
+            1. Your model API can handle double messages, or the integration should merge messages before sending them to the API.
+            2. The response is a valid :class:`~langchain_core.v1.messages.AIMessage`
+
+        """  # noqa: E501
+        messages = [
+            SystemMessage("hello"),
+            SystemMessage("hello"),
+            HumanMessage("hello"),
+            HumanMessage("hello"),
+            AIMessage("hello"),
+            AIMessage("hello"),
+            HumanMessage("how are you"),
+        ]
+        result = model.invoke(messages)
+        assert result is not None
+        assert isinstance(result, AIMessage)
+        assert len(result.content) > 0
+        assert isinstance(result.text, str)
+        assert len(result.text) > 0
+
+    def test_usage_metadata(self, model: BaseChatModel) -> None:
+        """Test to verify that the model returns correct usage metadata.
+
+        This test is optional and should be skipped if the model does not return
+        usage metadata (see Configuration below).
+
+        .. versionchanged:: 0.3.17
+
+            Additionally check for the presence of ``model_name`` in the response
+            metadata, which is needed for usage tracking in callback handlers.
+
+        .. dropdown:: Configuration
+
+            By default, this test is run.
+
+            To disable this feature, set the ``returns_usage_metadata`` property to
+            ``False`` in your test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def returns_usage_metadata(self) -> bool:
+                        return False
+
+            This test can also check the format of specific kinds of usage metadata
+            based on the ``supported_usage_metadata_details`` property. This property
+            should be configured as follows with the types of tokens that the model
+            supports tracking:
+
+            TODO: check this!
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def supported_usage_metadata_details(self) -> dict:
+                        return {
+                            "invoke": [
+                                "audio_input",
+                                "audio_output",
+                                "reasoning_output",
+                                "cache_read_input",
+                                "cache_creation_input",
+                            ],
+                            "stream": [
+                                "audio_input",
+                                "audio_output",
+                                "reasoning_output",
+                                "cache_read_input",
+                                "cache_creation_input",
+                            ],
+                        }
+
+
+        .. dropdown:: Troubleshooting
+
+            TODO
+
+        """
+        if not self.returns_usage_metadata:
+            pytest.skip("Not implemented.")
+
+        result = model.invoke("Hello")
+        assert result is not None
+        assert isinstance(result, AIMessage)
+
+        assert result.usage_metadata is not None
+        assert isinstance(result.usage_metadata["input_tokens"], int)
+        assert isinstance(result.usage_metadata["output_tokens"], int)
+        assert isinstance(result.usage_metadata["total_tokens"], int)
+
+        # Check model_name is in response_metadata
+        # (Needed for langchain_core.callbacks.usage)
+        model_name = result.response_metadata.get("model_name")
+        assert isinstance(model_name, str)
+        assert model_name != "", "model_name is empty"
+
+        # TODO: check these
+        # `input_tokens` is the total, possibly including other unclassified or
+        # system-level tokens.
+        if "audio_input" in self.supported_usage_metadata_details["invoke"]:
+            # Checks if the specific chat model integration being tested has declared
+            # that it supports reporting token counts specifically for `audio_input`
+            msg = self.invoke_with_audio_input()  # To be implemented in test subclass
+            assert (usage_metadata := msg.usage_metadata) is not None
+            assert (
+                input_token_details := usage_metadata.get("input_token_details")
+            ) is not None
+            assert isinstance(input_token_details.get("audio"), int)
+            # Asserts that total input tokens are at least the sum of the token counts
+            total_detailed_tokens = sum(
+                v for v in input_token_details.values() if isinstance(v, int)
+            )
+            assert usage_metadata.get("input_tokens", 0) >= total_detailed_tokens
+        if "audio_output" in self.supported_usage_metadata_details["invoke"]:
+            msg = self.invoke_with_audio_output()
+            assert (usage_metadata := msg.usage_metadata) is not None
+            assert (
+                output_token_details := usage_metadata.get("output_token_details")
+            ) is not None
+            assert isinstance(output_token_details.get("audio"), int)
+            # Asserts that total output tokens are at least the sum of the token counts
+            total_detailed_tokens = sum(
+                v for v in output_token_details.values() if isinstance(v, int)
+            )
+            assert usage_metadata.get("output_tokens", 0) >= total_detailed_tokens
+        if "reasoning_output" in self.supported_usage_metadata_details["invoke"]:
+            msg = self.invoke_with_reasoning_output()
+            assert (usage_metadata := msg.usage_metadata) is not None
+            assert (
+                output_token_details := usage_metadata.get("output_token_details")
+            ) is not None
+            assert isinstance(output_token_details.get("reasoning"), int)
+            # Asserts that total output tokens are at least the sum of the token counts
+            total_detailed_tokens = sum(
+                v for v in output_token_details.values() if isinstance(v, int)
+            )
+            assert usage_metadata.get("output_tokens", 0) >= total_detailed_tokens
+        if "cache_read_input" in self.supported_usage_metadata_details["invoke"]:
+            msg = self.invoke_with_cache_read_input()
+            assert (usage_metadata := msg.usage_metadata) is not None
+            assert (
+                input_token_details := usage_metadata.get("input_token_details")
+            ) is not None
+            assert isinstance(input_token_details.get("cache_read"), int)
+            # Asserts that total input tokens are at least the sum of the token counts
+            total_detailed_tokens = sum(
+                v for v in input_token_details.values() if isinstance(v, int)
+            )
+            assert usage_metadata.get("input_tokens", 0) >= total_detailed_tokens
+        if "cache_creation_input" in self.supported_usage_metadata_details["invoke"]:
+            msg = self.invoke_with_cache_creation_input()
+            assert (usage_metadata := msg.usage_metadata) is not None
+            assert (
+                input_token_details := usage_metadata.get("input_token_details")
+            ) is not None
+            assert isinstance(input_token_details.get("cache_creation"), int)
+            # Asserts that total input tokens are at least the sum of the token counts
+            total_detailed_tokens = sum(
+                v for v in input_token_details.values() if isinstance(v, int)
+            )
+            assert usage_metadata.get("input_tokens", 0) >= total_detailed_tokens
+
+    def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
+        """Test usage metadata in streaming mode.
+
+        Test to verify that the model returns correct usage metadata in streaming mode.
+
+        .. versionchanged:: 0.3.17
+
+            Additionally check for the presence of ``model_name`` in the response
+            metadata, which is needed for usage tracking in callback handlers.
+
+        .. dropdown:: Configuration
+
+            By default, this test is run.
+            To disable this feature, set ``returns_usage_metadata`` to ``False`` in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def returns_usage_metadata(self) -> bool:
+                        return False
+
+            This test can also check the format of specific kinds of usage metadata
+            based on the ``supported_usage_metadata_details`` property. This property
+            should be configured as follows with the types of tokens that the model
+            supports tracking:
+
+            TODO: check this!
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def supported_usage_metadata_details(self) -> dict:
+                        return {
+                            "invoke": [
+                                "audio_input",
+                                "audio_output",
+                                "reasoning_output",
+                                "cache_read_input",
+                                "cache_creation_input",
+                            ],
+                            "stream": [
+                                "audio_input",
+                                "audio_output",
+                                "reasoning_output",
+                                "cache_read_input",
+                                "cache_creation_input",
+                            ],
+                        }
+
+        .. dropdown:: Troubleshooting
+
+            TODO
+
+        """
+        if not self.returns_usage_metadata:
+            pytest.skip("Not implemented.")
+
+        full: Optional[AIMessageChunk] = None
+        for chunk in model.stream("Write me 2 haikus. Only include the haikus."):
+            assert isinstance(chunk, AIMessageChunk)
+            # Only one chunk is allowed to set usage_metadata.input_tokens
+            # if multiple do, it's likely a bug that will result in overcounting
+            # input tokens (since the total number of input tokens applies to the full
+            # generation, not individual chunks)
+            if full and full.usage_metadata and full.usage_metadata["input_tokens"]:
+                assert (
+                    not chunk.usage_metadata or not chunk.usage_metadata["input_tokens"]
+                ), (
+                    "Only one chunk should set input_tokens,"
+                    " the rest should be 0 or None"
+                )
+            full = chunk if full is None else cast("AIMessageChunk", full + chunk)
+
+        assert isinstance(full, AIMessageChunk)
+        assert full.usage_metadata is not None
+        assert isinstance(full.usage_metadata["input_tokens"], int)
+        assert isinstance(full.usage_metadata["output_tokens"], int)
+        assert isinstance(full.usage_metadata["total_tokens"], int)
+
+        # Check model_name is in response_metadata
+        # (Needed for langchain_core.callbacks.usage)
+        model_name = full.response_metadata.get("model_name")
+        assert isinstance(model_name, str)
+        assert model_name != "", "model_name is empty"
+
+        # TODO: check these
+        if "audio_input" in self.supported_usage_metadata_details["stream"]:
+            msg = self.invoke_with_audio_input(stream=True)
+            assert msg.usage_metadata is not None
+            assert isinstance(
+                msg.usage_metadata.get("input_token_details", {}).get("audio"), int
+            )
+        if "audio_output" in self.supported_usage_metadata_details["stream"]:
+            msg = self.invoke_with_audio_output(stream=True)
+            assert msg.usage_metadata is not None
+            assert isinstance(
+                msg.usage_metadata.get("output_token_details", {}).get("audio"), int
+            )
+        if "reasoning_output" in self.supported_usage_metadata_details["stream"]:
+            msg = self.invoke_with_reasoning_output(stream=True)
+            assert msg.usage_metadata is not None
+            assert isinstance(
+                msg.usage_metadata.get("output_token_details", {}).get("reasoning"), int
+            )
+        if "cache_read_input" in self.supported_usage_metadata_details["stream"]:
+            msg = self.invoke_with_cache_read_input(stream=True)
+            assert msg.usage_metadata is not None
+            assert isinstance(
+                msg.usage_metadata.get("input_token_details", {}).get("cache_read"), int
+            )
+        if "cache_creation_input" in self.supported_usage_metadata_details["stream"]:
+            msg = self.invoke_with_cache_creation_input(stream=True)
+            assert msg.usage_metadata is not None
+            assert isinstance(
+                msg.usage_metadata.get("input_token_details", {}).get("cache_creation"),
+                int,
+            )
+
+    def test_stop_sequence(self, model: BaseChatModel) -> None:
+        """Test that model does not fail when invoked with the ``stop`` parameter,
+        which is a standard parameter for stopping generation at a certain token.
+
+        `More on standard parameters <https://python.langchain.com/docs/concepts/chat_models/#standard-parameters>`__
+
+        .. important::
+            This should pass for all integrations!
+
+        .. dropdown:: Troubleshooting
+
+            TODO
+
+        """
+        result = model.invoke("hi", stop=["you"])
+        assert isinstance(result, AIMessage)
+
+        custom_model = self.chat_model_class(
+            **{
+                **self.chat_model_params,
+                "stop": ["you"],
+            }
+        )
+        result = custom_model.invoke("hi")
+        assert isinstance(result, AIMessage)
+
+    def test_tool_calling(self, model: BaseChatModel) -> None:
+        """Test that the model generates tool calls. This test is skipped if the
+        ``has_tool_calling`` property on the test class is set to False.
+
+        This test is optional and should be skipped if the model does not support
+        tool calling (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable tool calling tests, set ``has_tool_calling`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def has_tool_calling(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check that ``bind_tools`` is implemented to correctly
+            translate LangChain tool objects into the appropriate schema for your
+            chat model.
+
+            This test may fail if the chat model does not support a ``tool_choice``
+            parameter. This parameter can be used to force a tool call. If
+            ``tool_choice`` is not supported and the model consistently fails this
+            test, you can ``xfail`` the test:
+
+            .. code-block:: python
+
+                @pytest.mark.xfail(reason=("Does not support tool_choice."))
+                def test_tool_calling(self, model: BaseChatModelV1) -> None:
+                    super().test_tool_calling(model)
+
+            Otherwise, in the case that only one tool is bound, ensure that
+            ``tool_choice`` supports the string ``'any'`` to force calling that tool.
+
+        """
+        if not self.has_tool_calling:
+            pytest.skip("Test requires tool calling.")
+
+        tool_choice_value = None if not self.has_tool_choice else "any"
+        # Emit warning if tool_choice_value property is overridden
+
+        # TODO remove since deprecated?
+        # if inspect.getattr_static(
+        #     self, "tool_choice_value"
+        # ) is not inspect.getattr_static(
+        #     ChatModelV1IntegrationTests, "tool_choice_value"
+        # ):
+        #     warn_deprecated(
+        #         "0.3.15",
+        #         message=(
+        #             "`tool_choice_value` will be removed in version 0.3.20. If a "
+        #             "model supports `tool_choice`, it should accept `tool_choice='any' "  # noqa: E501
+        #             "and `tool_choice=<string name of tool>`. If the model does not "
+        #             "support `tool_choice`, override the `supports_tool_choice` "
+        #             "property to return `False`."
+        #         ),
+        #         removal="0.3.20",
+        #     )
+
+        model_with_tools = model.bind_tools(
+            [magic_function], tool_choice=tool_choice_value
+        )
+        query = "What is the value of magic_function(3)? Use the tool."
+        result = model_with_tools.invoke(query)
+        _validate_tool_call_message(result)
+
+        # Test stream()
+        full: Optional[AIMessageChunk] = None
+        for chunk in model_with_tools.stream(query):
+            full = chunk if full is None else full + chunk
+        assert isinstance(full, AIMessage)
+        _validate_tool_call_message(full)
+
+    async def test_tool_calling_async(self, model: BaseChatModel) -> None:
+        """Test that the model generates tool calls. This test is skipped if the
+        ``has_tool_calling`` property on the test class is set to False.
+
+        This test is optional and should be skipped if the model does not support
+        tool calling (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable tool calling tests, set ``has_tool_calling`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def has_tool_calling(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check that ``bind_tools`` is implemented to correctly
+            translate LangChain tool objects into the appropriate schema for your
+            chat model.
+
+            This test may fail if the chat model does not support a ``tool_choice``
+            parameter. This parameter can be used to force a tool call. If
+            ``tool_choice`` is not supported and the model consistently fails this
+            test, you can ``xfail`` the test:
+
+            .. code-block:: python
+
+                @pytest.mark.xfail(reason=("Does not support tool_choice."))
+                async def test_tool_calling_async(self, model: BaseChatModelV1) -> None:
+                    await super().test_tool_calling_async(model)
+
+            Otherwise, in the case that only one tool is bound, ensure that
+            ``tool_choice`` supports the string ``'any'`` to force calling that tool.
+
+        """
+        if not self.has_tool_calling:
+            pytest.skip("Test requires tool calling.")
+
+        tool_choice_value = None if not self.has_tool_choice else "any"
+        model_with_tools = model.bind_tools(
+            [magic_function], tool_choice=tool_choice_value
+        )
+        query = "What is the value of magic_function(3)? Use the tool."
+        result = await model_with_tools.ainvoke(query)
+        _validate_tool_call_message(result)
+
+        # Test astream()
+        full: Optional[AIMessageChunk] = None
+        async for chunk in model_with_tools.astream(query):
+            full = chunk if full is None else full + chunk
+        assert isinstance(full, AIMessage)
+        _validate_tool_call_message(full)
+
+    def test_bind_runnables_as_tools(self, model: BaseChatModel) -> None:
+        """Test that the model generates tool calls for tools that are derived from
+        LangChain runnables. This test is skipped if the ``has_tool_calling`` property
+        on the test class is set to False.
+
+        This test is optional and should be skipped if the model does not support
+        tool calling (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable tool calling tests, set ``has_tool_calling`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def has_tool_calling(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check that ``bind_tools`` is implemented to correctly
+            translate LangChain tool objects into the appropriate schema for your
+            chat model.
+
+            This test may fail if the chat model does not support a ``tool_choice``
+            parameter. This parameter can be used to force a tool call. If
+            ``tool_choice`` is not supported and the model consistently fails this
+            test, you can ``xfail`` the test:
+
+            .. code-block:: python
+
+                @pytest.mark.xfail(reason=("Does not support tool_choice."))
+                def test_bind_runnables_as_tools(self, model: BaseChatModelV1) -> None:
+                    super().test_bind_runnables_as_tools(model)
+
+            Otherwise, ensure that the ``tool_choice_value`` property is correctly
+            specified on the test class.
+
+        """
+        if not self.has_tool_calling:
+            pytest.skip("Test requires tool calling.")
+
+        prompt = ChatPromptTemplate.from_messages(
+            [("human", "Hello. Please respond in the style of {answer_style}.")]
+        )
+        llm = GenericFakeChatModel(messages=iter(["hello matey"]))
+        chain = prompt | llm | StrOutputParser()
+        tool_ = chain.as_tool(
+            name="greeting_generator",
+            description="Generate a greeting in a particular style of speaking.",
         )
 
-    @property
-    def supports_code_interpreter(self) -> bool:
-        """Whether the model supports code interpreter blocks."""
-        return False
+        if self.has_tool_choice:
+            tool_choice: Optional[str] = "any"
+        else:
+            tool_choice = None
 
-    @property
-    def supports_structured_citations(self) -> bool:
-        """Whether the model supports structured citation generation."""
-        return self.supports_citations
+        model_with_tools = model.bind_tools([tool_], tool_choice=tool_choice)
+        query = "Using the tool, generate a Pirate greeting."
+        result = model_with_tools.invoke(query)
+        assert isinstance(result, AIMessage)
+        assert result.tool_calls
+        tool_call = result.tool_calls[0]
+        assert tool_call["args"].get(
+            "answer_style"
+        )  # TODO: do we need to handle if args is str?  # noqa: E501
+        assert is_tool_call_block(tool_call)
 
-    @property
-    def requires_api_key(self) -> bool:
-        """Whether integration tests require an API key."""
-        return True
+    def test_tool_message_histories_string_content(
+        self, model: BaseChatModel, my_adder_tool: BaseTool
+    ) -> None:
+        """Test that message histories are compatible with string tool contents
+        (e.g. OpenAI format). If a model passes this test, it should be compatible
+        with messages generated from providers following OpenAI format.
+
+        This test should be skipped if the model does not support tool calling
+        (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable tool calling tests, set ``has_tool_calling`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def has_tool_calling(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            TODO: verify this!
+
+            If this test fails, check that:
+
+            1. The model can correctly handle message histories that include ``AIMessage`` objects with ``""`` ``TextContentBlock``s.
+            2. The ``tool_calls`` attribute on ``AIMessage`` objects is correctly handled and passed to the model in an appropriate format.
+            3. The model can correctly handle ``ToolMessage`` objects with string content and arbitrary string values for ``tool_call_id``.
+
+            You can ``xfail`` the test if tool calling is implemented but this format
+            is not supported.
+
+            .. code-block:: python
+
+                @pytest.mark.xfail(reason=("Not implemented."))
+                def test_tool_message_histories_string_content(self, *args: Any) -> None:
+                    super().test_tool_message_histories_string_content(*args)
+
+        """  # noqa: E501
+        if not self.has_tool_calling:
+            pytest.skip("Test requires tool calling.")
+
+        model_with_tools = model.bind_tools([my_adder_tool])
+        function_name = "my_adder_tool"
+        function_args = {"a": "1", "b": "2"}
+
+        messages_string_content = [
+            HumanMessage("What is 1 + 2"),
+            # String content (e.g. OpenAI)
+            create_tool_call(function_name, function_args, id="abc123"),
+            ToolMessage(
+                json.dumps({"result": 3}), tool_call_id="abc123", status="success"
+            ),
+        ]
+        result_string_content = model_with_tools.invoke(messages_string_content)  # TODO
+        assert isinstance(result_string_content, AIMessage)
+
+    def test_tool_message_histories_list_content(
+        self,
+        model: BaseChatModel,
+        my_adder_tool: BaseTool,
+    ) -> None:
+        """Test that message histories are compatible with list tool contents
+        (e.g. Anthropic format).
+
+        These message histories will include AIMessage objects with "tool use" and
+        content blocks, e.g.,
+
+        .. code-block:: python
+
+            [
+                {"type": "text", "text": "Hmm let me think about that"},
+                {
+                    "type": "tool_use",
+                    "input": {"fav_color": "green"},
+                    "id": "foo",
+                    "name": "color_picker",
+                },
+            ]
+
+        This test should be skipped if the model does not support tool calling
+        (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable tool calling tests, set ``has_tool_calling`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def has_tool_calling(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check that:
+
+            1. The model can correctly handle message histories that include ``AIMessage`` objects with list content.
+            2. The ``tool_calls`` attribute on ``AIMessage`` objects is correctly handled and passed to the model in an appropriate format.
+            3. The model can correctly handle ``ToolMessage`` objects with string content and arbitrary string values for ``tool_call_id``.
+
+            You can ``xfail`` the test if tool calling is implemented but this format
+            is not supported.
+
+            .. code-block:: python
+
+                @pytest.mark.xfail(reason=("Not implemented."))
+                def test_tool_message_histories_list_content(self, *args: Any) -> None:
+                    super().test_tool_message_histories_list_content(*args)
+
+        """  # noqa: E501
+        pytest.fail("Test not implemented yet.")
+
+        # TODO
+        # if not self.has_tool_calling:
+        #     pytest.skip("Test requires tool calling.")
+
+        # model_with_tools = model.bind_tools([my_adder_tool])
+        # function_name = "my_adder_tool"
+        # function_args = {"a": 1, "b": 2}
+
+        # messages_list_content = [
+        #     HumanMessage("What is 1 + 2"),
+        #     # List content (e.g., Anthropic)
+        #     AIMessage(
+        #         [
+        #             {"type": "text", "text": "some text"},
+        #             {
+        #                 "type": "tool_use",
+        #                 "id": "abc123",
+        #                 "name": function_name,
+        #                 "input": function_args,
+        #             },
+        #         ],
+        #         tool_calls=[
+        #             {
+        #                 "name": function_name,
+        #                 "args": function_args,
+        #                 "id": "abc123",
+        #                 "type": "tool_call",
+        #             },
+        #         ],
+        #     ),
+        #     ToolMessage(
+        #         json.dumps({"result": 3}),
+        #         name=function_name,
+        #         tool_call_id="abc123",
+        #     ),
+        # ]
+        # result_list_content = model_with_tools.invoke(messages_list_content)
+        # assert isinstance(result_list_content, AIMessage)
+
+    def test_tool_choice(self, model: BaseChatModel) -> None:
+        """Test that the model can force tool calling via the ``tool_choice``
+        parameter. This test is skipped if the ``has_tool_choice`` property on the
+        test class is set to False.
+
+        This test is optional and should be skipped if the model does not support
+        tool calling (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable tool calling tests, set ``has_tool_choice`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def has_tool_choice(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check whether the ``test_tool_calling`` test is passing.
+            If it is not, refer to the troubleshooting steps in that test first.
+
+            If ``test_tool_calling`` is passing, check that the underlying model
+            supports forced tool calling. If it does, ``bind_tools`` should accept a
+            ``tool_choice`` parameter that can be used to force a tool call.
+
+            It should accept:
+
+            1. The string ``'any'`` to force calling the bound tool, and,
+            2. The string name of the tool to force calling that tool.
+
+        """
+        if not self.has_tool_choice or not self.has_tool_calling:
+            pytest.skip("Test requires tool choice.")
+
+        @tool
+        def get_weather(location: str) -> str:
+            """Get weather at a location."""
+            return "It's sunny."
+
+        for tool_choice in ["any", "magic_function"]:
+            model_with_tools = model.bind_tools(
+                [magic_function, get_weather], tool_choice=tool_choice
+            )
+            result = model_with_tools.invoke("Hello!")
+            assert isinstance(result, AIMessage)
+            assert result.tool_calls
+            if tool_choice == "magic_function":
+                assert result.tool_calls[0]["name"] == "magic_function"
+
+    def test_tool_calling_with_no_arguments(self, model: BaseChatModel) -> None:
+        """Test that the model generates tool calls for tools with no arguments.
+        This test is skipped if the ``has_tool_calling`` property on the test class
+        is set to False.
+
+        This test is optional and should be skipped if the model does not support
+        tool calling (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable tool calling tests, set ``has_tool_calling`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def has_tool_calling(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check that ``bind_tools`` is implemented to correctly
+            translate LangChain tool objects into the appropriate schema for your
+            chat model. It should correctly handle the case where a tool has no
+            arguments.
+
+            This test may fail if the chat model does not support a ``tool_choice``
+            parameter. This parameter can be used to force a tool call. It may also
+            fail if a provider does not support this form of tool. In these cases,
+            you can ``xfail`` the test:
+
+            .. code-block:: python
+
+                @pytest.mark.xfail(reason=("Does not support tool_choice."))
+                def test_tool_calling_with_no_arguments(self, model: BaseChatModelV1) -> None:
+                    super().test_tool_calling_with_no_arguments(model)
+
+            Otherwise, in the case that only one tool is bound, ensure that
+            ``tool_choice`` supports the string ``'any'`` to force calling that tool.
+
+        """  # noqa: E501
+        if not self.has_tool_calling:
+            pytest.skip("Test requires tool calling.")
+
+        tool_choice_value = None if not self.has_tool_choice else "any"
+        model_with_tools = model.bind_tools(
+            [magic_function_no_args], tool_choice=tool_choice_value
+        )
+        query = "What is the value of magic_function_no_args()? Use the tool."
+        result = model_with_tools.invoke(query)
+        _validate_tool_call_message_no_args(result)
+
+        full: Optional[AIMessageChunk] = None
+        for chunk in model_with_tools.stream(query):
+            full = chunk if full is None else full + chunk
+        assert isinstance(full, AIMessage)
+        _validate_tool_call_message_no_args(full)
+
+    def test_tool_message_error_status(
+        self, model: BaseChatModel, my_adder_tool: BaseTool
+    ) -> None:
+        """Test that ``ToolMessage`` with ``status="error"`` can be handled.
+
+        These messages may take the form:
+
+        .. code-block:: python
+
+            ToolMessage(
+                content="Error: Missing required argument 'b'.",
+                status="error",
+            )
+
+        If possible, the ``status`` field should be parsed and passed appropriately
+        to the model.
+
+        This test is optional and should be skipped if the model does not support
+        tool calling (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable tool calling tests, set ``has_tool_calling`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def has_tool_calling(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check that the ``status`` field on ``ToolMessage``
+            objects is either ignored or passed to the model appropriately.
+
+        """
+        if not self.has_tool_calling:
+            pytest.skip("Test requires tool calling.")
+
+        model_with_tools = model.bind_tools([my_adder_tool])
+        messages = [
+            HumanMessage("What is 1 + 2?"),
+            create_tool_call(
+                "my_adder_tool", {"a": 1}, id="abc123"
+            ),  # Missing required argument 'b'
+            ToolMessage(
+                "Error: Missing required argument 'b'.",
+                tool_call_id="abc123",
+                status="error",
+            ),
+        ]
+        result = model_with_tools.invoke(messages)
+        assert isinstance(result, AIMessage)
+
+    def test_structured_few_shot_examples(
+        self, model: BaseChatModel, my_adder_tool: BaseTool
+    ) -> None:
+        """Test that the model can process few-shot examples with tool calls.
+
+        These are represented as a sequence of messages of the following form:
+
+        - ``HumanMessage`` with ``TextContentBlock`` content;
+        - ``AIMessage`` with the ``tool_calls`` attribute populated;
+        - ``ToolMessage`` with string content;
+        - ``ToolMessage`` with content block content;
+        - ``AIMessage`` with ``TextContentBlock`` content (an answer);
+        - ``HumanMessage`` with ``TextContentBlock`` content (a follow-up question).
+
+        This test should be skipped if the model does not support tool calling
+        (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable tool calling tests, set ``has_tool_calling`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def has_tool_calling(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            This test uses `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.tool_example_to_messages.html>`__
+            in ``langchain_core`` to generate a sequence of messages representing
+            "few-shot" examples.
+
+            If this test fails, check that the model can correctly handle this
+            sequence of messages.
+
+            You can ``xfail`` the test if tool calling is implemented but this format
+            is not supported.
+
+            .. code-block:: python
+
+                @pytest.mark.xfail(reason=("Not implemented."))
+                def test_structured_few_shot_examples(self, *args: Any) -> None:
+                    super().test_structured_few_shot_examples(*args)
+
+        """
+        if not self.has_tool_calling:
+            pytest.skip("Test requires tool calling.")
+
+        model_with_tools = model.bind_tools([my_adder_tool], tool_choice="any")
+        function_result = json.dumps({"result": 3})
+
+        tool_schema = my_adder_tool.args_schema
+        assert isinstance(tool_schema, type)
+        assert issubclass(tool_schema, BaseModel)
+        # TODO verify this is correct
+        few_shot_messages = tool_example_to_messages(
+            "What is 1 + 2",
+            [tool_schema(a=1, b=2)],
+            tool_outputs=[function_result],
+            ai_response=function_result,
+        )
+
+        messages = [*few_shot_messages, HumanMessage("What is 3 + 4")]
+        result = model_with_tools.invoke(messages)
+        assert isinstance(result, AIMessage)
+
+    @pytest.mark.parametrize("schema_type", ["pydantic", "typeddict", "json_schema"])
+    def test_structured_output(self, model: BaseChatModel, schema_type: str) -> None:
+        """Test to verify structured output is generated both on ``invoke()`` and ``stream()``.
+
+        This test is optional and should be skipped if the model does not support
+        structured output (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable structured output tests, set ``has_structured_output`` to False
+            in your test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def has_structured_output(self) -> bool:
+                        return False
+
+            By default, ``has_structured_output`` is True if a model overrides the
+            ``with_structured_output`` or ``bind_tools`` methods.
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, ensure that the model's ``bind_tools`` method
+            properly handles both JSON Schema and Pydantic V2 models.
+
+            ``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
+            that will accommodate most formats.
+
+            See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
+            of ``with_structured_output``.
+
+        """  # noqa: E501
+        if not self.has_structured_output:
+            pytest.skip("Test requires structured output.")
+
+        schema, validation_function = _get_joke_class(schema_type)
+        chat = model.with_structured_output(schema, **self.structured_output_kwargs)
+        mock_callback = MagicMock()
+        mock_callback.on_chat_model_start = MagicMock()
+
+        invoke_callback = _TestCallbackHandler()
+
+        result = chat.invoke(
+            "Tell me a joke about cats.", config={"callbacks": [invoke_callback]}
+        )
+        validation_function(result)
+
+        assert len(invoke_callback.options) == 1, (
+            "Expected on_chat_model_start to be called once"
+        )
+        assert isinstance(invoke_callback.options[0], dict)
+        assert isinstance(
+            invoke_callback.options[0]["ls_structured_output_format"]["schema"], dict
+        )
+        assert invoke_callback.options[0]["ls_structured_output_format"][
+            "schema"
+        ] == convert_to_json_schema(schema)
+
+        stream_callback = _TestCallbackHandler()
+
+        for chunk in chat.stream(
+            "Tell me a joke about cats.", config={"callbacks": [stream_callback]}
+        ):
+            validation_function(chunk)
+        assert chunk
+
+        assert len(stream_callback.options) == 1, (
+            "Expected on_chat_model_start to be called once"
+        )
+        assert isinstance(stream_callback.options[0], dict)
+        assert isinstance(
+            stream_callback.options[0]["ls_structured_output_format"]["schema"], dict
+        )
+        assert stream_callback.options[0]["ls_structured_output_format"][
+            "schema"
+        ] == convert_to_json_schema(schema)
+
+    @pytest.mark.parametrize("schema_type", ["pydantic", "typeddict", "json_schema"])
+    async def test_structured_output_async(
+        self, model: BaseChatModel, schema_type: str
+    ) -> None:
+        """Test to verify structured output is generated both on ``invoke()`` and ``stream()``.
+
+        This test is optional and should be skipped if the model does not support
+        structured output (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable structured output tests, set ``has_structured_output`` to False
+            in your test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def has_structured_output(self) -> bool:
+                        return False
+
+            By default, ``has_structured_output`` is True if a model overrides the
+            ``with_structured_output`` or ``bind_tools`` methods.
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, ensure that the model's ``bind_tools`` method
+            properly handles both JSON Schema and Pydantic V2 models.
+
+            ``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
+            that will accommodate most formats.
+
+            See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
+            of ``with_structured_output``.
+
+        """  # noqa: E501
+        if not self.has_structured_output:
+            pytest.skip("Test requires structured output.")
+
+        schema, validation_function = _get_joke_class(schema_type)
+
+        chat = model.with_structured_output(schema, **self.structured_output_kwargs)
+        ainvoke_callback = _TestCallbackHandler()
+
+        result = await chat.ainvoke(
+            "Tell me a joke about cats.", config={"callbacks": [ainvoke_callback]}
+        )
+        validation_function(result)
+
+        assert len(ainvoke_callback.options) == 1, (
+            "Expected on_chat_model_start to be called once"
+        )
+        assert isinstance(ainvoke_callback.options[0], dict)
+        assert isinstance(
+            ainvoke_callback.options[0]["ls_structured_output_format"]["schema"], dict
+        )
+        assert ainvoke_callback.options[0]["ls_structured_output_format"][
+            "schema"
+        ] == convert_to_json_schema(schema)
+
+        astream_callback = _TestCallbackHandler()
+
+        async for chunk in chat.astream(
+            "Tell me a joke about cats.", config={"callbacks": [astream_callback]}
+        ):
+            validation_function(chunk)
+        assert chunk
+
+        assert len(astream_callback.options) == 1, (
+            "Expected on_chat_model_start to be called once"
+        )
+
+        assert isinstance(astream_callback.options[0], dict)
+        assert isinstance(
+            astream_callback.options[0]["ls_structured_output_format"]["schema"], dict
+        )
+        assert astream_callback.options[0]["ls_structured_output_format"][
+            "schema"
+        ] == convert_to_json_schema(schema)
+
+    @pytest.mark.skipif(PYDANTIC_MAJOR_VERSION != 2, reason="Test requires pydantic 2.")
+    def test_structured_output_pydantic_2_v1(self, model: BaseChatModel) -> None:
+        """Test to verify we can generate structured output using ``pydantic.v1.BaseModel``.
+
+        ``pydantic.v1.BaseModel`` is available in the Pydantic 2 package.
+
+        This test is optional and should be skipped if the model does not support
+        structured output (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable structured output tests, set ``has_structured_output`` to False
+            in your test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def has_structured_output(self) -> bool:
+                        return False
+
+            By default, ``has_structured_output`` is True if a model overrides the
+            ``with_structured_output`` or ``bind_tools`` methods.
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, ensure that the model's ``bind_tools`` method
+            properly handles both JSON Schema and Pydantic V1 models.
+
+            ``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
+            that will accommodate most formats.
+
+            See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
+            of ``with_structured_output``.
+
+        """  # noqa: E501
+        if not self.has_structured_output:
+            pytest.skip("Test requires structured output.")
+
+        class Joke(BaseModelV1):  # Uses langchain_core.pydantic_v1.BaseModel
+            """Joke to tell user."""
+
+            setup: str = FieldV1(description="question to set up a joke")
+            punchline: str = FieldV1(description="answer to resolve the joke")
+
+        # Pydantic class
+        chat = model.with_structured_output(Joke, **self.structured_output_kwargs)
+        result = chat.invoke("Tell me a joke about cats.")
+        assert isinstance(result, Joke)
+
+        for chunk in chat.stream("Tell me a joke about cats."):
+            assert isinstance(chunk, Joke)
+
+        # Schema
+        chat = model.with_structured_output(
+            Joke.schema(), **self.structured_output_kwargs
+        )
+        result = chat.invoke("Tell me a joke about cats.")
+        assert isinstance(result, dict)
+        assert set(result.keys()) == {"setup", "punchline"}
+
+        for chunk in chat.stream("Tell me a joke about cats."):
+            assert isinstance(chunk, dict)
+        assert isinstance(chunk, dict)  # for mypy
+        assert set(chunk.keys()) == {"setup", "punchline"}
+
+    def test_structured_output_optional_param(self, model: BaseChatModel) -> None:
+        """Test to verify we can generate structured output that includes optional
+        parameters.
+
+        This test is optional and should be skipped if the model does not support
+        structured output (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable structured output tests, set ``has_structured_output`` to False
+            in your test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def has_structured_output(self) -> bool:
+                        return False
+
+            By default, ``has_structured_output`` is True if a model overrides the
+            ``with_structured_output`` or ``bind_tools`` methods.
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, ensure that the model's ``bind_tools`` method
+            properly handles Pydantic V2 models with optional parameters.
+
+            ``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
+            that will accommodate most formats.
+
+            See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
+            of ``with_structured_output``.
+
+        """
+        if not self.has_structured_output:
+            pytest.skip("Test requires structured output.")
+
+        # Pydantic
+        class Joke(BaseModel):
+            """Joke to tell user."""
+
+            setup: str = Field(description="question to set up a joke")
+            punchline: Optional[str] = Field(
+                default=None, description="answer to resolve the joke"
+            )
+
+        chat = model.with_structured_output(Joke, **self.structured_output_kwargs)
+        setup_result = chat.invoke(
+            "Give me the setup to a joke about cats, no punchline."
+        )
+        assert isinstance(setup_result, Joke)
+
+        joke_result = chat.invoke("Give me a joke about cats, include the punchline.")
+        assert isinstance(joke_result, Joke)
+
+        # Schema
+        chat = model.with_structured_output(
+            Joke.model_json_schema(), **self.structured_output_kwargs
+        )
+        result = chat.invoke("Tell me a joke about cats.")
+        assert isinstance(result, dict)
+
+        # TypedDict
+        class JokeDict(TypedDict):
+            """Joke to tell user."""
+
+            setup: Annotated[str, ..., "question to set up a joke"]
+            punchline: Annotated[Optional[str], None, "answer to resolve the joke"]
+
+        chat = model.with_structured_output(JokeDict, **self.structured_output_kwargs)
+        result = chat.invoke("Tell me a joke about cats.")
+        assert isinstance(result, dict)
+
+    def test_json_mode(self, model: BaseChatModel) -> None:
+        """Test structured output via `JSON mode. <https://python.langchain.com/docs/concepts/structured_outputs/#json-mode>`_.
+
+        This test is optional and should be skipped if the model does not support
+        the JSON mode feature (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable this test, set ``supports_json_mode`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def supports_json_mode(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
+            of ``with_structured_output``.
+
+        """
+        if not self.supports_json_mode:
+            pytest.skip("Test requires json mode support.")
+
+        from pydantic import BaseModel as BaseModelProper
+        from pydantic import Field as FieldProper
+
+        class Joke(BaseModelProper):
+            """Joke to tell user."""
+
+            setup: str = FieldProper(description="question to set up a joke")
+            punchline: str = FieldProper(description="answer to resolve the joke")
+
+        # Pydantic class
+        # Type ignoring since the interface only officially supports pydantic 1
+        # or pydantic.v1.BaseModel but not pydantic.BaseModel from pydantic 2.
+        # We'll need to do a pass updating the type signatures.
+        chat = model.with_structured_output(Joke, method="json_mode")
+        msg = (
+            "Tell me a joke about cats. Return the result as a JSON with 'setup' and "
+            "'punchline' keys. Return nothing other than JSON."
+        )
+        result = chat.invoke(msg)
+        assert isinstance(result, Joke)
+
+        for chunk in chat.stream(msg):
+            assert isinstance(chunk, Joke)
+
+        # Schema
+        chat = model.with_structured_output(
+            Joke.model_json_schema(), method="json_mode"
+        )
+        result = chat.invoke(msg)
+        assert isinstance(result, dict)
+        assert set(result.keys()) == {"setup", "punchline"}
+
+        for chunk in chat.stream(msg):
+            assert isinstance(chunk, dict)
+        assert isinstance(chunk, dict)  # for mypy
+        assert set(chunk.keys()) == {"setup", "punchline"}
+
+    def test_pdf_inputs(self, model: BaseChatModel) -> None:
+        """Test that the model can process PDF inputs.
+
+        This test should be skipped (see Configuration below) if the model does not
+        support PDF inputs. These will take the form:
+
+        .. code-block:: python
+
+            {
+                "type": "image",
+                "source_type": "base64",
+                "data": "<base64 image data>",
+                "mime_type": "application/pdf",
+            }
+
+        See https://python.langchain.com/docs/concepts/multimodality/
+
+        .. dropdown:: Configuration
+
+            To disable this test, set ``supports_pdf_inputs`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+
+                    @property
+                    def supports_pdf_inputs(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check that the model can correctly handle messages
+            with pdf content blocks, including base64-encoded files. Otherwise, set
+            the ``supports_pdf_inputs`` property to False.
+
+        """
+        pytest.fail("Test not implemented yet.")
+
+        # TODO
+        # if not self.supports_pdf_inputs:
+        #     pytest.skip("Model does not support PDF inputs.")
+        # url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
+        # pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
+
+        # message = HumanMessage(
+        #     [
+        #         {
+        #             "type": "text",
+        #             "text": "Summarize this document:",
+        #         },
+        #         {
+        #             "type": "file",
+        #             "source_type": "base64",
+        #             "mime_type": "application/pdf",
+        #             "data": pdf_data,
+        #         },
+        #     ]
+        # )
+        # _ = model.invoke([message])
+
+        # # Test OpenAI Chat Completions format
+        # message = HumanMessage(
+        #     [
+        #         {
+        #             "type": "text",
+        #             "text": "Summarize this document:",
+        #         },
+        #         {
+        #             "type": "file",
+        #             "file": {
+        #                 "filename": "test file.pdf",
+        #                 "file_data": f"data:application/pdf;base64,{pdf_data}",
+        #             },
+        #         },
+        #     ]
+        # )
+        # _ = model.invoke([message])
+
+    def test_audio_inputs(self, model: BaseChatModel) -> None:
+        """Test that the model can process audio inputs.
+
+        This test should be skipped (see Configuration below) if the model does not
+        support audio inputs. These will take the form:
+
+        .. code-block:: python
+
+            # AudioContentBlock
+            {
+                "type": "audio",
+                "base64": "<base64 audio data>",
+                "mime_type": "audio/wav",  # or appropriate mime-type
+            }
+
+        See https://python.langchain.com/docs/concepts/multimodality/
+
+        .. dropdown:: Configuration
+
+            To disable this test, set ``supports_audio_content_blocks`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+
+                    @property
+                    def supports_audio_content_blocks(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check that the model can correctly handle messages
+            with audio content blocks. Otherwise, set the ``supports_audio_content_blocks``
+            property to False.
+
+        """  # noqa: E501
+        if not self.supports_audio_content_blocks:
+            pytest.skip("Model does not support AudioContentBlock inputs.")
+
+        url = "https://upload.wikimedia.org/wikipedia/commons/3/3d/Alcal%C3%A1_de_Henares_%28RPS_13-04-2024%29_canto_de_ruise%C3%B1or_%28Luscinia_megarhynchos%29_en_el_Soto_del_Henares.wav"
+        audio_data = base64.b64encode(httpx.get(url).content).decode("utf-8")
+
+        message = HumanMessage(
+            [
+                create_text_block("Describe this audio:"),
+                create_audio_block(
+                    base64=audio_data,
+                    mime_type="audio/wav",
+                ),
+            ]
+        )
+        _ = model.invoke([message])
+
+        # TODO?
+        # Test OpenAI Chat Completions format
+        # message = HumanMessage(
+        #     [
+        #         {
+        #             "type": "text",
+        #             "text": "Describe this audio:",
+        #         },
+        #         {
+        #             "type": "input_audio",
+        #             "input_audio": {"data": audio_data, "format": "wav"},
+        #         },
+        #     ]
+        # )
+        # _ = model.invoke([message])
+
+    def test_audio_content_blocks_processing(self, model: BaseChatModel) -> None:
+        """Test audio content block processing with transcription.
+
+        TODO: expand docstring
+
+        """
+        if not self.supports_audio_content_blocks:
+            pytest.skip("Model does not support audio inputs.")
+
+        audio_block = create_audio_block(
+            base64=_get_test_audio_base64(),
+            mime_type="audio/wav",
+        )
+        text_block = create_text_block("Transcribe this audio file.")
+
+        result = model.invoke([HumanMessage([text_block, audio_block])])
+
+        assert isinstance(result, AIMessage)
+        if result.text:
+            assert len(result.text) > 10  # Substantial response
+
+    def test_image_inputs(self, model: BaseChatModel) -> None:
+        """Test that the model can process image inputs.
+
+        This test should be skipped (see Configuration below) if the model does not
+        support image inputs. These will take the form:
+
+        .. code-block:: python
+
+            # ImageContentBlock
+            {
+                "type": "image",
+                "base64": "<base64 audio data>",
+                "mime_type": "image/png",  # or appropriate mime-type
+            }
+
+        TODO: verify this
+        For backward-compatibility, we must also support OpenAI-style
+        image content blocks:
+
+        .. code-block:: python
+
+            [
+                {"type": "text", "text": "describe the weather in this image"},
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
+                },
+            ]
+
+        See https://python.langchain.com/docs/concepts/multimodality/
+
+        .. dropdown:: Configuration
+
+            To disable this test, set ``supports_image_content_blocks`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def supports_image_content_blocks(self) -> bool:
+                        return False
+
+                    # Can also explicitly disable testing image URLs:
+                    @property
+                    def supports_image_urls(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check that the model can correctly handle messages
+            with image content blocks, including base64-encoded images. Otherwise, set
+            the ``supports_image_content_blocks`` property to False.
+
+        """
+        if not self.supports_image_content_blocks:
+            pytest.skip("Model does not support image message.")
+
+        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+        image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
+
+        # TODO?
+        # OpenAI format, base64 data
+        # message = HumanMessage(
+        #     content=[
+        #         {"type": "text", "text": "describe the weather in this image"},
+        #         {
+        #             "type": "image_url",
+        #             "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
+        #         },
+        #     ],
+        # )
+        # _ = model.invoke([message])
+
+        # Standard format, base64 data
+        message = HumanMessage(
+            [
+                create_text_block("describe the weather in this image"),
+                create_image_block(
+                    base64=image_data,
+                    mime_type="image/jpeg",
+                ),
+            ],
+        )
+        _ = model.invoke([message])
+
+        # TODO?
+        # Standard format, URL
+        # if self.supports_image_urls:
+        #     message = HumanMessage(
+        #         content=[
+        #             {"type": "text", "text": "describe the weather in this image"},
+        #             {
+        #                 "type": "image",
+        #                 "source_type": "url",
+        #                 "url": image_url,
+        #             },
+        #         ],
+        #     )
+        #     _ = model.invoke([message])
+
+    def test_image_tool_message(self, model: BaseChatModel) -> None:
+        """Test that the model can process ToolMessages with image inputs.
+
+        TODO: is this needed?
+
+        This test should be skipped if the model does not support messages of the
+        form:
+
+        .. code-block:: python
+
+            ToolMessage(
+                content=[
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
+                    },
+                ],
+                tool_call_id="1",
+            )
+
+        containing image content blocks in OpenAI Chat Completions format, in addition
+        to messages of the form:
+
+        .. code-block:: python
+
+            ToolMessage(
+                content=[
+                    {
+                        "type": "image",
+                        "source_type": "base64",
+                        "data": image_data,
+                        "mime_type": "image/jpeg",
+                    },
+                ],
+                tool_call_id="1",
+            )
+
+        containing image content blocks in standard format.
+
+        This test can be skipped by setting the ``supports_image_tool_message`` property
+        to False (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable this test, set ``supports_image_tool_message`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def supports_image_tool_message(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check that the model can correctly handle messages
+            with image content blocks in ToolMessages, including base64-encoded
+            images. Otherwise, set the ``supports_image_tool_message`` property to
+            False.
+
+        """
+        pytest.fail("Test not implemented yet.")
+
+        # TODO
+        # if not self.supports_image_tool_message:
+        #     pytest.skip("Model does not support image tool message.")
+        # image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+        # image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")
+
+        # # Support both OpenAI and standard formats
+        # oai_format_message = ToolMessage(
+        #     content=[
+        #         {
+        #             "type": "image_url",
+        #             "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
+        #         },
+        #     ],
+        #     tool_call_id="1",
+        #     name="random_image",
+        # )
+
+        # standard_format_message = ToolMessage(
+        #     content=[
+        #         {
+        #             "type": "image",
+        #             "source_type": "base64",
+        #             "data": image_data,
+        #             "mime_type": "image/jpeg",
+        #         },
+        #     ],
+        #     tool_call_id="1",
+        #     name="random_image",
+        # )
+
+        # for tool_message in [oai_format_message, standard_format_message]:
+        #     messages = [
+        #         HumanMessage(
+        #             "get a random image using the tool and describe the weather"
+        #         ),
+        #         AIMessage(
+        #             [],
+        #             tool_calls=[
+        #                 {
+        #                     "type": "tool_call",
+        #                     "id": "1",
+        #                     "name": "random_image",
+        #                     "args": {},
+        #                 }
+        #             ],
+        #         ),
+        #         tool_message,
+        #     ]
+
+        #     def random_image() -> str:
+        #         """Return a random image."""
+        #         return ""
+
+        #     _ = model.bind_tools([random_image]).invoke(messages)
 
-    # Multimodal testing
     def test_image_content_blocks_with_analysis(self, model: BaseChatModel) -> None:
-        """Test image analysis using ``ImageContentBlock``s."""
+        """Test image analysis using ``ImageContentBlock``s.
+
+        TODO: expand docstring
+
+        """
         if not self.supports_image_content_blocks:
             pytest.skip("Model does not support image inputs.")
 
@@ -180,7 +2412,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
             assert len(result.text) > 10  # Substantial response
 
     def test_video_content_blocks(self, model: BaseChatModel) -> None:
-        """Test video content block processing."""
+        """Test video content block processing.
+
+        TODO: expand docstring
+
+        """
         if not self.supports_video_content_blocks:
             pytest.skip("Model does not support video inputs.")
 
@@ -196,30 +2432,438 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
         if result.text:
             assert len(result.text) > 10  # Substantial response
 
-    def test_audio_content_blocks_processing(self, model: BaseChatModel) -> None:
-        """Test audio content block processing with transcription."""
-        if not self.supports_audio_content_blocks:
-            pytest.skip("Model does not support audio inputs.")
+    def test_anthropic_inputs(self, model: BaseChatModel) -> None:
+        """Test that model can process Anthropic-style message histories.
 
-        audio_block = create_audio_block(
-            base64=_get_test_audio_base64(),
-            mime_type="audio/wav",
-        )
-        text_block = create_text_block("Transcribe this audio file.")
+        TODO?
 
-        result = model.invoke([HumanMessage([text_block, audio_block])])
+        These message histories will include ``AIMessage`` objects with ``tool_use``
+        content blocks, e.g.,
 
+        .. code-block:: python
+
+            AIMessage(
+                [
+                    {"type": "text", "text": "Hmm let me think about that"},
+                    {
+                        "type": "tool_use",
+                        "input": {"fav_color": "green"},
+                        "id": "foo",
+                        "name": "color_picker",
+                    },
+                ]
+            )
+
+        as well as ``HumanMessage`` objects containing ``tool_result`` content blocks:
+
+        .. code-block:: python
+
+            HumanMessage(
+                [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": "foo",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": "green is a great pick! that's my sister's favorite color",  # noqa: E501
+                            }
+                        ],
+                        "is_error": False,
+                    },
+                    {"type": "text", "text": "what's my sister's favorite color"},
+                ]
+            )
+
+        This test should be skipped if the model does not support messages of this
+        form (or doesn't support tool calling generally). See Configuration below.
+
+        .. dropdown:: Configuration
+
+            To disable this test, set ``supports_anthropic_inputs`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def supports_anthropic_inputs(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check that:
+
+            1. The model can correctly handle message histories that include message objects with list content.
+            2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format.
+            3. HumanMessages with "tool_result" content blocks are correctly handled.
+
+            Otherwise, if Anthropic tool call and result formats are not supported,
+            set the ``supports_anthropic_inputs`` property to False.
+
+        """  # noqa: E501
+        pytest.fail("Test not implemented yet.")
+
+        # TODO
+        # if not self.supports_anthropic_inputs:
+        #     pytest.skip("Model does not explicitly support Anthropic inputs.")
+
+        # # Anthropic-format tool
+        # color_picker = {
+        #     "name": "color_picker",
+        #     "input_schema": {
+        #         "type": "object",
+        #         "properties": {
+        #             "fav_color": {"type": "string"},
+        #         },
+        #         "required": ["fav_color"],
+        #     },
+        #     "description": "Input your fav color and get a random fact about it.",
+        #     "cache_control": {"type": "ephemeral"},
+        # }
+
+        # human_content: list[dict] = [
+        #     {
+        #         "type": "text",
+        #         "text": "what's your favorite color in this image",
+        #         "cache_control": {"type": "ephemeral"},
+        #     },
+        # ]
+        # if self.supports_image_inputs:
+        #     image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+        #     image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")  # noqa: E501
+        #     human_content.append(
+        #         {
+        #             "type": "image",
+        #             "source": {
+        #                 "type": "base64",
+        #                 "media_type": "image/jpeg",
+        #                 "data": image_data,
+        #             },
+        #         }
+        #     )
+        # messages = [
+        #     SystemMessage("you're a good assistant"),
+        #     HumanMessage(human_content),  # type: ignore[arg-type]
+        #     AIMessage(
+        #         [
+        #             {"type": "text", "text": "Hmm let me think about that"},
+        #             {
+        #                 "type": "tool_use",
+        #                 "input": {"fav_color": "green"},
+        #                 "id": "foo",
+        #                 "name": "color_picker",
+        #             },
+        #         ],
+        #         tool_calls=[
+        #             {
+        #                 "name": "color_picker",
+        #                 "args": {"fav_color": "green"},
+        #                 "id": "foo",
+        #                 "type": "tool_call",
+        #             }
+        #         ],
+        #     ),
+        #     ToolMessage("That's a great pick!", tool_call_id="foo"),
+        # ]
+        # response = model.bind_tools([color_picker]).invoke(messages)
+        # assert isinstance(response, AIMessage)
+
+        # # Test thinking blocks
+        # messages = [
+        #     HumanMessage(
+        #         [
+        #             {
+        #                 "type": "text",
+        #                 "text": "Hello",
+        #             },
+        #         ]
+        #     ),
+        #     AIMessage(
+        #         [
+        #             {
+        #                 "type": "thinking",
+        #                 "thinking": "I'm thinking...",
+        #                 "signature": "abc123",
+        #             },
+        #             {
+        #                 "type": "text",
+        #                 "text": "Hello, how are you?",
+        #             },
+        #         ]
+        #     ),
+        #     HumanMessage(
+        #         [
+        #             {
+        #                 "type": "text",
+        #                 "text": "Well, thanks.",
+        #             },
+        #         ]
+        #     ),
+        # ]
+        # response = model.invoke(messages)
+        # assert isinstance(response, AIMessage)
+
+    def test_message_with_name(self, model: BaseChatModel) -> None:
+        """Test that ``HumanMessage`` with values for the ``name`` field can be handled.
+
+        This test expects that the model with a non-empty ``TextContentBlock``.
+
+        These messages may take the form:
+
+        .. code-block:: python
+
+            HumanMessage("hello", name="example_user")
+
+        If possible, the ``name`` field should be parsed and passed appropriately
+        to the model. Otherwise, it should be ignored.
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check that the ``name`` field on ``HumanMessage``
+            objects is either ignored or passed to the model appropriately.
+
+        """
+        result = model.invoke([HumanMessage("hello", name="example_user")])
+        assert result is not None
         assert isinstance(result, AIMessage)
-        if result.text:
-            assert len(result.text) > 10  # Substantial response
+        assert len(result.content) > 0
+        assert isinstance(result.text, str)
+        assert len(result.text) > 0
+
+    def test_agent_loop(self, model: BaseChatModel) -> None:
+        """Test that the model supports a simple ReAct agent loop. This test is skipped
+        if the ``has_tool_calling`` property on the test class is set to False.
+
+        This test is optional and should be skipped if the model does not support
+        tool calling (see Configuration below).
+
+        .. dropdown:: Configuration
+
+            To disable tool calling tests, set ``has_tool_calling`` to False in your
+            test class:
+
+            .. code-block:: python
+
+                class TestMyV1ChatModelIntegration(ChatModelV1IntegrationTests):
+                    @property
+                    def has_tool_calling(self) -> bool:
+                        return False
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check that ``bind_tools`` is implemented to correctly
+            translate LangChain tool objects into the appropriate schema for your
+            chat model.
+
+            Check also that all required information (e.g., tool calling identifiers)
+            from ``AIMessage`` objects is propagated correctly to model payloads.
+
+            This test may fail if the chat model does not consistently generate tool
+            calls in response to an appropriate query. In these cases you can ``xfail``
+            the test:
+
+            .. code-block:: python
+
+                @pytest.mark.xfail(reason=("Does not support tool_choice."))
+                def test_agent_loop(self, model: BaseChatModel) -> None:
+                    super().test_agent_loop(model)
+
+        """
+        if not self.has_tool_calling:
+            pytest.skip("Test requires tool calling.")
+
+        @tool
+        def get_weather(location: str) -> str:
+            """Call to surf the web."""
+            return "It's sunny."
+
+        llm_with_tools = model.bind_tools([get_weather])
+        input_message = HumanMessage("What is the weather in San Francisco, CA?")
+        tool_call_message = llm_with_tools.invoke([input_message])
+        assert isinstance(tool_call_message, AIMessage)
+        tool_calls = tool_call_message.tool_calls
+        assert len(tool_calls) == 1
+        tool_call = tool_calls[0]
+        tool_message = get_weather.invoke(tool_call)
+        assert isinstance(tool_message, ToolMessage)
+        response = llm_with_tools.invoke(
+            [
+                input_message,
+                tool_call_message,
+                tool_message,
+            ]
+        )
+        assert isinstance(response, AIMessage)
+
+    @pytest.mark.benchmark
+    @pytest.mark.vcr
+    def test_stream_time(
+        self, model: BaseChatModel, benchmark: BenchmarkFixture, vcr: Cassette
+    ) -> None:
+        """Test that streaming does not introduce undue overhead.
+
+        See ``enable_vcr_tests`` dropdown :class:`above <ChatModelV1IntegrationTests>`
+        for more information.
+
+        .. dropdown:: Configuration
+
+            This test can be enabled or disabled using the ``enable_vcr_tests``
+            property. For example, to disable the test, set this property to ``False``:
+
+            .. code-block:: python
+
+                @property
+                def enable_vcr_tests(self) -> bool:
+                    return False
+
+            .. important::
+
+                VCR will by default record authentication headers and other sensitive
+                information in cassettes. See ``enable_vcr_tests`` dropdown
+                :class:`above <ChatModelV1IntegrationTests>` for how to configure what
+                information is recorded in cassettes.
+
+        """
+        if not self.enable_vcr_tests:
+            pytest.skip("VCR not set up.")
+
+        def _run() -> None:
+            for _ in model.stream("Write a story about a cat."):
+                pass
+
+        if not vcr.responses:
+            _run()
+        else:
+            benchmark(_run)
+
+    def invoke_with_audio_input(self, *, stream: bool = False) -> AIMessage:
+        """:private:"""
+        # To be implemented in test subclass
+        raise NotImplementedError
+
+    def invoke_with_audio_output(self, *, stream: bool = False) -> AIMessage:
+        """:private:"""
+        # To be implemented in test subclass
+        raise NotImplementedError
+
+    def invoke_with_reasoning_output(self, *, stream: bool = False) -> AIMessage:
+        """:private:"""
+        # To be implemented in test subclass
+        raise NotImplementedError
+
+    def invoke_with_cache_read_input(self, *, stream: bool = False) -> AIMessage:
+        """:private:"""
+        # To be implemented in test subclass
+        raise NotImplementedError
+
+    def invoke_with_cache_creation_input(self, *, stream: bool = False) -> AIMessage:
+        """:private:"""
+        # To be implemented in test subclass
+        raise NotImplementedError
+
+    def test_unicode_tool_call_integration(
+        self,
+        model: BaseChatModel,
+        *,
+        tool_choice: Optional[str] = None,
+        force_tool_call: bool = True,
+    ) -> None:
+        """Generic integration test for Unicode characters in tool calls.
+
+        Args:
+            model: The chat model to test
+            tool_choice: Tool choice parameter to pass to ``bind_tools()`` (provider-specific)
+            force_tool_call: Whether to force a tool call (use ``tool_choice=True`` if None)
+
+        Tests that Unicode characters in tool call arguments are preserved correctly,
+        not escaped as ``\\uXXXX`` sequences.
+        """  # noqa: E501
+        if not self.has_tool_calling:
+            pytest.skip("Test requires tool calling support.")
+
+        # Configure tool choice based on provider capabilities
+        if tool_choice is None and force_tool_call:
+            tool_choice = "any"
+
+        if tool_choice is not None:
+            llm_with_tool = model.bind_tools(
+                [unicode_customer], tool_choice=tool_choice
+            )
+        else:
+            llm_with_tool = model.bind_tools([unicode_customer])
+
+        # Test with Chinese characters
+        msgs = [
+            HumanMessage(
+                "Create a customer named '你好啊集团' (Hello Group) - a Chinese "
+                "technology company"
+            )
+        ]
+        ai_msg = llm_with_tool.invoke(msgs)
+
+        assert isinstance(ai_msg, AIMessage)
+        assert isinstance(ai_msg.tool_calls, list)
+
+        if force_tool_call:
+            assert len(ai_msg.tool_calls) >= 1, (
+                f"Expected at least 1 tool call, got {len(ai_msg.tool_calls)}"
+            )
+
+        if ai_msg.tool_calls:
+            tool_call = ai_msg.tool_calls[0]
+            assert tool_call["name"] == "unicode_customer"
+            assert "args" in tool_call
+
+            # Verify Unicode characters are properly handled
+            args = tool_call["args"]
+            assert "customer_name" in args
+            customer_name = args["customer_name"]
+
+            # The model should include the Unicode characters, not escaped sequences
+            assert (
+                "你好" in customer_name
+                or "你" in customer_name
+                or "好" in customer_name
+            ), f"Unicode characters not found in: {customer_name}"
+
+        # Test with additional Unicode examples - Japanese
+        msgs_jp = [
+            HumanMessage(
+                "Create a customer named 'こんにちは株式会社' (Hello Corporation) - a "
+                "Japanese company"
+            )
+        ]
+        ai_msg_jp = llm_with_tool.invoke(msgs_jp)
+
+        assert isinstance(ai_msg_jp, AIMessage)
+
+        if force_tool_call:
+            assert len(ai_msg_jp.tool_calls) >= 1
+
+        if ai_msg_jp.tool_calls:
+            tool_call_jp = ai_msg_jp.tool_calls[0]
+            args_jp = tool_call_jp["args"]
+            customer_name_jp = args_jp["customer_name"]
+
+            # Verify Japanese Unicode characters are preserved
+            assert (
+                "こんにちは" in customer_name_jp
+                or "株式会社" in customer_name_jp
+                or "こ" in customer_name_jp
+                or "ん" in customer_name_jp
+            ), f"Japanese Unicode characters not found in: {customer_name_jp}"
 
     def test_complex_multimodal_reasoning(self, model: BaseChatModel) -> None:
-        """Test complex reasoning with multiple content types."""
-        # TODO: come back to this, seems like a unique scenario
+        """Test complex reasoning with multiple content types.
+
+        TODO: expand docstring
+
+        """
         if not self.supports_multimodal_reasoning:
             pytest.skip("Model does not support multimodal reasoning.")
 
-        content_blocks: list[ContentBlock] = [
+        content_blocks: list[types.ContentBlock] = [
             create_text_block(
                 "Compare these media files and provide reasoning analysis:"
             ),
@@ -242,7 +2886,6 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
 
         assert isinstance(result, AIMessage)
 
-        # Check for reasoning blocks in response
         if self.supports_reasoning_content_blocks:
             reasoning_blocks = [
                 block
@@ -252,7 +2895,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
             assert len(reasoning_blocks) > 0
 
     def test_citation_generation_with_sources(self, model: BaseChatModel) -> None:
-        """Test that the model can generate ``Citations`` with source links."""
+        """Test that the model can generate ``Citations`` with source links.
+
+        TODO: expand docstring
+
+        """
         if not self.supports_structured_citations:
             pytest.skip("Model does not support structured citations.")
 
@@ -294,7 +2941,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
                     assert "end_index" in annotation
 
     def test_web_search_integration(self, model: BaseChatModel) -> None:
-        """Test web search content blocks integration."""
+        """Test web search content blocks integration.
+
+        TODO: expand docstring
+
+        """
         if not self.supports_web_search_blocks:
             pytest.skip("Model does not support web search blocks.")
 
@@ -320,7 +2971,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
         assert len(search_call_blocks) > 0 or len(search_result_blocks) > 0
 
     def test_code_interpreter_blocks(self, model: BaseChatModel) -> None:
-        """Test code interpreter content blocks."""
+        """Test code interpreter content blocks.
+
+        TODO: expand docstring
+
+        """
         if not self.supports_code_interpreter:
             pytest.skip("Model does not support code interpreter blocks.")
 
@@ -345,7 +3000,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
         assert len(code_blocks) > 0
 
     def test_tool_calling_with_content_blocks(self, model: BaseChatModel) -> None:
-        """Test tool calling with content blocks."""
+        """Test tool calling with content blocks.
+
+        TODO: expand docstring
+
+        """
         if not self.has_tool_calling:
             pytest.skip("Model does not support tool calls.")
 
@@ -366,7 +3025,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
     def test_plaintext_content_blocks_from_documents(
         self, model: BaseChatModel
     ) -> None:
-        """Test PlainTextContentBlock for document plaintext content."""
+        """Test PlainTextContentBlock for document plaintext content.
+
+        TODO: expand docstring
+
+        """
         if not self.supports_plaintext_content_blocks:
             pytest.skip("Model does not support PlainTextContentBlock.")
 
@@ -385,7 +3048,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
         # TODO expand
 
     def test_content_block_streaming_integration(self, model: BaseChatModel) -> None:
-        """Test streaming with content blocks."""
+        """Test streaming with content blocks.
+
+        TODO: expand docstring
+
+        """
         if not self.supports_content_blocks_v1:
             pytest.skip("Model does not support content blocks v1.")
 
@@ -415,7 +3082,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
     def test_error_handling_with_invalid_content_blocks(
         self, model: BaseChatModel
     ) -> None:
-        """Test error handling with various invalid content block configurations."""
+        """Test error handling with various invalid content block configurations.
+
+        TODO: expand docstring
+
+        """
         if not self.supports_content_blocks_v1:
             pytest.skip("Model does not support content blocks v1.")
 
@@ -437,7 +3108,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
                 assert len(str(e)) > 0
 
     async def test_async_content_blocks_processing(self, model: BaseChatModel) -> None:
-        """Test asynchronous processing of content blocks."""
+        """Test asynchronous processing of content blocks.
+
+        TODO: expand docstring
+
+        """
         if not self.supports_content_blocks_v1:
             pytest.skip("Model does not support content blocks v1.")
 
@@ -447,7 +3122,11 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
         assert isinstance(result, AIMessage)
 
     def test_content_blocks_with_callbacks(self, model: BaseChatModel) -> None:
-        """Test that content blocks work correctly with callback handlers."""
+        """Test that content blocks work correctly with callback handlers.
+
+        TODO: expand docstring
+
+        """
         if not self.supports_content_blocks_v1:
             pytest.skip("Model does not support content blocks v1.")
 
@@ -475,3 +3154,140 @@ class ChatModelV1IntegrationTests(ChatModelV1Tests):
             hasattr(msg, "content") and isinstance(msg.content, list)
             for msg in callback_handler.messages_seen
         )
+
+    def test_input_conversion_string(self, model: BaseChatModel) -> None:
+        """Test that string input is properly converted to messages.
+
+        TODO: expand docstring
+
+        """
+        result = model.invoke("Test string input")
+        assert isinstance(result, AIMessage)
+        assert result.content is not None
+
+    def test_input_conversion_empty_string(self, model: BaseChatModel) -> None:
+        """Test that empty string input is handled gracefully.
+
+        TODO: expand docstring
+
+        """
+        result = model.invoke("")
+        assert isinstance(result, AIMessage)
+
+    def test_input_conversion_message_v1_list(self, model: BaseChatModel) -> None:
+        """Test that v1 message list input is handled correctly.
+
+        TODO: expand docstring
+
+        """
+        messages = [HumanMessage("Test message")]
+        result = model.invoke(messages)
+        assert isinstance(result, AIMessage)
+        assert result.content is not None
+
+    def test_text_content_blocks_basic(self, model: BaseChatModel) -> None:
+        """Test that the model can handle the ``TextContentBlock`` format."""
+        if not self.supports_text_content_blocks:
+            pytest.skip("Model does not support TextContentBlock (rare!)")
+
+        text_block = create_text_block("Hello, world!")
+        message = HumanMessage(content=[text_block])
+
+        result = model.invoke([message])
+        assert isinstance(result, AIMessage)
+        assert result.content is not None
+
+    def test_mixed_content_blocks_basic(self, model: BaseChatModel) -> None:
+        """Test that the model can handle messages with mixed content blocks."""
+        if not (
+            self.supports_text_content_blocks and self.supports_image_content_blocks
+        ):
+            pytest.skip(
+                "Model doesn't support mixed content blocks (concurrent text and image)"
+            )
+
+        content_blocks: list[types.ContentBlock] = [
+            create_text_block("Describe this image:"),
+            create_image_block(
+                base64="iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==",
+                mime_type="image/png",
+            ),
+        ]
+
+        message = HumanMessage(content=content_blocks)
+        result = model.invoke([message])
+
+        assert isinstance(result, AIMessage)
+        assert result.content is not None
+
+    def test_reasoning_content_blocks_basic(self, model: BaseChatModel) -> None:
+        """Test that the model can generate ``ReasoningContentBlock``."""
+        if not self.supports_reasoning_content_blocks:
+            pytest.skip("Model does not support ReasoningContentBlock.")
+
+        message = HumanMessage("Think step by step: What is 2 + 2?")
+        result = model.invoke([message])
+
+        assert isinstance(result, AIMessage)
+        if isinstance(result.content, list):
+            reasoning_blocks = [
+                block
+                for block in result.content
+                if isinstance(block, dict) and is_reasoning_block(block)
+            ]
+            assert len(reasoning_blocks) > 0
+
+    def test_non_standard_content_blocks_basic(self, model: BaseChatModel) -> None:
+        """Test that the model can handle ``NonStandardContentBlock``."""
+        if not self.supports_non_standard_blocks:
+            pytest.skip("Model does not support NonStandardContentBlock.")
+
+        non_standard_block = create_non_standard_block(
+            {
+                "custom_field": "custom_value",
+                "data": [1, 2, 3],
+            }
+        )
+
+        message = HumanMessage(content=[non_standard_block])
+
+        # Should not raise an error
+        result = model.invoke([message])
+        assert isinstance(result, AIMessage)
+
+    def test_invalid_tool_call_handling_basic(self, model: BaseChatModel) -> None:
+        """Test that the model can handle ``InvalidToolCall`` blocks gracefully."""
+        if not self.supports_invalid_tool_calls:
+            pytest.skip("Model does not support InvalidToolCall handling.")
+
+        invalid_tool_call: InvalidToolCall = {
+            "type": "invalid_tool_call",
+            "name": "nonexistent_tool",
+            "args": None,
+            "id": "invalid_123",
+            "error": "Tool not found",
+        }
+
+        # Create a message with invalid tool call in history
+        ai_message = AIMessage(content=[invalid_tool_call])
+        follow_up = HumanMessage("Please try again with a valid approach.")
+
+        result = model.invoke([ai_message, follow_up])
+        assert isinstance(result, AIMessage)
+        assert result.content is not None
+
+    def test_file_content_blocks_basic(self, model: BaseChatModel) -> None:
+        """Test that the model can handle ``FileContentBlock``."""
+        if not self.supports_file_content_blocks:
+            pytest.skip("Model does not support FileContentBlock.")
+
+        file_block = create_file_block(
+            base64="SGVsbG8sIHdvcmxkIQ==",  # "Hello, world!"
+            mime_type="text/plain",
+        )
+
+        message = HumanMessage(content=[file_block])
+        result = model.invoke([message])
+
+        assert isinstance(result, AIMessage)
+        assert result.content is not None
diff --git a/libs/standard-tests/langchain_tests/unit_tests/chat_models_v1.py b/libs/standard-tests/langchain_tests/unit_tests/chat_models_v1.py
index 92f2b409884..bcf84c49632 100644
--- a/libs/standard-tests/langchain_tests/unit_tests/chat_models_v1.py
+++ b/libs/standard-tests/langchain_tests/unit_tests/chat_models_v1.py
@@ -7,27 +7,69 @@ This module provides updated test patterns for the new messages introduced in
 content blocks system.
 """
 
-from typing import Literal, cast
+import inspect
+import os
+from abc import abstractmethod
+from typing import Any, Literal, Optional
+from unittest import mock
 
 import pytest
 from langchain_core.load import dumpd, load
 from langchain_core.messages.content_blocks import (
-    ContentBlock,
-    InvalidToolCall,
-    TextContentBlock,
-    create_file_block,
-    create_image_block,
-    create_non_standard_block,
     create_text_block,
-    is_reasoning_block,
-    is_text_block,
-    is_tool_call_block,
 )
-from langchain_core.tools import tool
+from langchain_core.runnables import RunnableBinding
+from langchain_core.tools import BaseTool, tool
 from langchain_core.v1.chat_models import BaseChatModel
-from langchain_core.v1.messages import AIMessage, HumanMessage
+from langchain_core.v1.messages import HumanMessage
+from pydantic import BaseModel, Field, SecretStr
+from pydantic.v1 import BaseModel as BaseModelV1
+from pydantic.v1 import Field as FieldV1
+from pydantic.v1 import ValidationError as ValidationErrorV1
+from pytest_benchmark.fixture import BenchmarkFixture  # type: ignore[import-untyped]
+from syrupy.assertion import SnapshotAssertion
 
 from langchain_tests.base import BaseStandardTests
+from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION
+
+
+def generate_schema_pydantic_v1_from_2() -> Any:
+    """Use to generate a schema from v1 namespace in pydantic 2.
+
+    :private:
+    """
+    if PYDANTIC_MAJOR_VERSION != 2:
+        msg = "This function is only compatible with Pydantic v2."
+        raise AssertionError(msg)
+
+    class PersonB(BaseModelV1):
+        """Record attributes of a person."""
+
+        name: str = FieldV1(..., description="The name of the person.")
+        age: int = FieldV1(..., description="The age of the person.")
+
+    return PersonB
+
+
+def generate_schema_pydantic() -> Any:
+    """Works with either pydantic 1 or 2.
+
+    :private:
+    """
+
+    class PersonA(BaseModel):
+        """Record attributes of a person."""
+
+        name: str = Field(..., description="The name of the person.")
+        age: int = Field(..., description="The age of the person.")
+
+    return PersonA
+
+
+TEST_PYDANTIC_MODELS = [generate_schema_pydantic()]
+
+if PYDANTIC_MAJOR_VERSION == 2:
+    TEST_PYDANTIC_MODELS.append(generate_schema_pydantic_v1_from_2())
 
 
 class ChatModelV1Tests(BaseStandardTests):
@@ -39,16 +81,79 @@ class ChatModelV1Tests(BaseStandardTests):
     :private:
     """
 
-    # Core Model Properties - these should be implemented by subclasses
+    @property
+    @abstractmethod
+    def chat_model_class(self) -> type[BaseChatModel]:
+        """The chat model class to test, e.g., ``ChatParrotLink``."""
+        ...
+
+    @property
+    def chat_model_params(self) -> dict:
+        """Initialization parameters for the chat model."""
+        return {}
+
+    @property
+    def standard_chat_model_params(self) -> dict:
+        """:private:"""
+        return {
+            "temperature": 0,
+            "max_tokens": 100,
+            "timeout": 60,
+            "stop": [],
+            "max_retries": 2,
+        }
+
+    @pytest.fixture
+    def model(self) -> BaseChatModel:
+        """:private:"""
+        return self.chat_model_class(
+            **{
+                **self.standard_chat_model_params,
+                **self.chat_model_params,
+            }
+        )
+
+    @pytest.fixture
+    def my_adder_tool(self) -> BaseTool:
+        """:private:"""
+
+        @tool
+        def my_adder_tool(a: int, b: int) -> int:
+            """Takes two integers, a and b, and returns their sum."""
+            return a + b
+
+        return my_adder_tool
+
     @property
     def has_tool_calling(self) -> bool:
         """Whether the model supports tool calling."""
-        return False
+        return self.chat_model_class.bind_tools is not BaseChatModel.bind_tools
+
+    @property
+    def tool_choice_value(self) -> Optional[str]:
+        """(None or str) To use for tool choice when used in tests."""
+        return None
+
+    @property
+    def has_tool_choice(self) -> bool:
+        """Whether the model supports forcing tool calling via ``tool_choice``."""
+        bind_tools_params = inspect.signature(
+            self.chat_model_class.bind_tools
+        ).parameters
+        return "tool_choice" in bind_tools_params
 
     @property
     def has_structured_output(self) -> bool:
         """Whether the model supports structured output."""
-        return False
+        return (
+            self.chat_model_class.with_structured_output
+            is not BaseChatModel.with_structured_output
+        ) or self.has_tool_calling
+
+    @property
+    def structured_output_kwargs(self) -> dict:
+        """Additional kwargs for ``with_structured_output``."""
+        return {}
 
     @property
     def supports_json_mode(self) -> bool:
@@ -139,30 +244,43 @@ class ChatModelV1Tests(BaseStandardTests):
         """
         return False
 
+    @property
+    def supports_multimodal_reasoning(self) -> bool:
+        """Whether the model can reason about multimodal content."""
+        return (
+            self.supports_image_content_blocks
+            and self.supports_reasoning_content_blocks
+        )
+
     @property
     def supports_citations(self) -> bool:
         """Whether the model supports ``Citation`` annotations."""
         return False
 
+    @property
+    def supports_structured_citations(self) -> bool:
+        """Whether the model supports structured citation generation."""
+        return self.supports_citations
+
     @property
     def supports_web_search_blocks(self) -> bool:
         """Whether the model supports ``WebSearchCall``/``WebSearchResult`` blocks."""
         return False
 
+    @property
+    def supports_code_interpreter(self) -> bool:
+        """Whether the model supports code interpreter blocks."""
+        return False
+
     @property
     def supports_invalid_tool_calls(self) -> bool:
         """Whether the model can handle ``InvalidToolCall`` blocks."""
         return False
 
     @property
-    def has_tool_choice(self) -> bool:
-        """Whether the model supports forcing tool calling via ``tool_choice``."""
-        return False
-
-    @property
-    def structured_output_kwargs(self) -> dict:
-        """Additional kwargs for ``with_structured_output``."""
-        return {}
+    def returns_usage_metadata(self) -> bool:
+        """Whether the model returns usage metadata on invoke and streaming."""
+        return True
 
     @property
     def supports_anthropic_inputs(self) -> bool:
@@ -170,10 +288,17 @@ class ChatModelV1Tests(BaseStandardTests):
         return False
 
     @property
-    def returns_usage_metadata(self) -> bool:
-        """Whether the model returns usage metadata on invoke and streaming."""
-        return True
+    def enable_vcr_tests(self) -> bool:
+        """Whether to enable VCR tests for the chat model.
 
+        .. important::
+            See ``enable_vcr_tests`` dropdown :class:`above <ChatModelTests>` for more
+            information.
+
+        """
+        return False
+
+    # TODO: check this, since there is `reasoning_output` in usage metadata details ?
     @property
     def supported_usage_metadata_details(
         self,
@@ -192,67 +317,503 @@ class ChatModelV1Tests(BaseStandardTests):
         """What usage metadata details are emitted in ``invoke()`` and ``stream()``."""
         return {"invoke": [], "stream": []}
 
-    @property
-    def enable_vcr_tests(self) -> bool:
-        """Whether to enable VCR tests for the chat model."""
-        return False
-
 
 class ChatModelV1UnitTests(ChatModelV1Tests):
-    """Unit tests for chat models with content blocks v1 support.
+    """Base class for chat model v1 unit tests.
 
     These tests run in isolation without external dependencies.
-    """
 
-    # Core Method Tests
-    def test_invoke_basic(self, model: BaseChatModel) -> None:
-        """Test basic invoke functionality with simple string input."""
-        result = model.invoke("Hello, world!")
-        assert isinstance(result, AIMessage)
-        assert result.content is not None
+    Test subclasses must implement the ``chat_model_class`` and
+    ``chat_model_params`` properties to specify what model to test and its
+    initialization parameters.
 
-    def test_invoke_with_message_list(self, model: BaseChatModel) -> None:
-        """Test invoke with list of messages."""
-        messages = [HumanMessage("Hello, world!")]
-        result = model.invoke(messages)
-        assert isinstance(result, AIMessage)
-        assert result.content is not None
+    Example:
 
-    async def test_ainvoke_basic(self, model: BaseChatModel) -> None:
-        """Test basic async invoke functionality."""
-        result = await model.ainvoke("Hello, world!")
-        assert isinstance(result, AIMessage)
-        assert result.content is not None
+    .. code-block:: python
 
-    def test_stream_basic(self, model: BaseChatModel) -> None:
-        """Test basic streaming functionality."""
-        chunks = []
-        for chunk in model.stream("Hello, world!"):
-            chunks.append(chunk)
-            assert hasattr(chunk, "content")
+        from typing import Type
 
-        assert len(chunks) > 0
-        # Verify chunks can be aggregated
-        if chunks:
-            final_message = chunks[0]
-            for chunk in chunks[1:]:
-                final_message = final_message + chunk
-            assert isinstance(final_message.content, (str, list))
+        from langchain_tests.unit_tests import ChatModelV1UnitTests
+        from my_package.chat_models import MyChatModel
 
-    async def test_astream_basic(self, model: BaseChatModel) -> None:
-        """Test basic async streaming functionality."""
-        chunks = []
-        async for chunk in model.astream("Hello, world!"):
-            chunks.append(chunk)
-            assert hasattr(chunk, "content")
 
-        assert len(chunks) > 0
-        # Verify chunks can be aggregated
-        if chunks:
-            final_message = chunks[0]
-            for chunk in chunks[1:]:
-                final_message = final_message + chunk
-            assert isinstance(final_message.content, (str, list))
+        class TestMyChatModelUnit(ChatModelV1UnitTests):
+            @property
+            def chat_model_class(self) -> Type[MyChatModel]:
+                # Return the chat model class to test here
+                return MyChatModel
+
+            @property
+            def chat_model_params(self) -> dict:
+                # Return initialization parameters for the v1 model.
+                return {"model": "model-001", "temperature": 0}
+
+    .. note::
+          API references for individual test methods include troubleshooting tips.
+
+
+    Test subclasses **must** implement the following two properties:
+
+    chat_model_class
+        The chat model class to test, e.g., ``ChatParrotLinkV1``.
+
+        Example:
+
+        .. code-block:: python
+
+            @property
+            def chat_model_class(self) -> Type[ChatParrotLinkV1]:
+                return ChatParrotLinkV1
+
+    chat_model_params
+        Initialization parameters for the chat model.
+
+        Example:
+
+        .. code-block:: python
+
+            @property
+            def chat_model_params(self) -> dict:
+                return {"model": "bird-brain-001", "temperature": 0}
+
+    In addition, test subclasses can control what features are tested (such as tool
+    calling or multi-modality) by selectively overriding the following properties.
+    Expand to see details:
+
+    .. dropdown:: has_tool_calling
+
+        TODO
+
+    .. dropdown:: tool_choice_value
+
+        TODO
+
+    .. dropdown:: has_tool_choice
+
+        TODO
+
+    .. dropdown:: has_structured_output
+
+        TODO
+
+    .. dropdown:: structured_output_kwargs
+
+        TODO
+
+    .. dropdown:: supports_json_mode
+
+        TODO
+
+    .. dropdown:: returns_usage_metadata
+
+        TODO
+
+    .. dropdown:: supports_anthropic_inputs
+
+        TODO
+
+    .. dropdown:: supported_usage_metadata_details
+
+        TODO
+
+    .. dropdown:: enable_vcr_tests
+
+        Property controlling whether to enable select tests that rely on
+        `VCR <https://vcrpy.readthedocs.io/en/latest/>`_ caching of HTTP calls, such
+        as benchmarking tests.
+
+        To enable these tests, follow these steps:
+
+        1. Override the ``enable_vcr_tests`` property to return ``True``:
+
+            .. code-block:: python
+
+                @property
+                def enable_vcr_tests(self) -> bool:
+                    return True
+
+        2. Configure VCR to exclude sensitive headers and other information from cassettes.
+
+            .. important::
+                VCR will by default record authentication headers and other sensitive
+                information in cassettes. Read below for how to configure what
+                information is recorded in cassettes.
+
+            To add configuration to VCR, add a ``conftest.py`` file to the ``tests/``
+            directory and implement the ``vcr_config`` fixture there.
+
+            ``langchain-tests`` excludes the headers ``'authorization'``,
+            ``'x-api-key'``, and ``'api-key'`` from VCR cassettes. To pick up this
+            configuration, you will need to add ``conftest.py`` as shown below. You can
+            also exclude additional headers, override the default exclusions, or apply
+            other customizations to the VCR configuration. See example below:
+
+            .. code-block:: python
+                :caption: tests/conftest.py
+
+                import pytest
+                from langchain_tests.conftest import _base_vcr_config as _base_vcr_config
+
+                _EXTRA_HEADERS = [
+                    # Specify additional headers to redact
+                    ("user-agent", "PLACEHOLDER"),
+                ]
+
+
+                def remove_response_headers(response: dict) -> dict:
+                    # If desired, remove or modify headers in the response.
+                    response["headers"] = {}
+                    return response
+
+
+                @pytest.fixture(scope="session")
+                def vcr_config(_base_vcr_config: dict) -> dict:  # noqa: F811
+                    \"\"\"Extend the default configuration from langchain_tests.\"\"\"
+                    config = _base_vcr_config.copy()
+                    config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
+                    config["before_record_response"] = remove_response_headers
+
+                    return config
+
+            .. dropdown:: Compressing cassettes
+
+                ``langchain-tests`` includes a custom VCR serializer that compresses
+                cassettes using gzip. To use it, register the ``yaml.gz`` serializer
+                to your VCR fixture and enable this serializer in the config. See
+                example below:
+
+                .. code-block:: python
+                    :caption: tests/conftest.py
+
+                    import pytest
+                    from langchain_tests.conftest import CustomPersister, CustomSerializer
+                    from langchain_tests.conftest import _base_vcr_config as _base_vcr_config
+                    from vcr import VCR
+
+                    _EXTRA_HEADERS = [
+                        # Specify additional headers to redact
+                        ("user-agent", "PLACEHOLDER"),
+                    ]
+
+
+                    def remove_response_headers(response: dict) -> dict:
+                        # If desired, remove or modify headers in the response.
+                        response["headers"] = {}
+                        return response
+
+
+                    @pytest.fixture(scope="session")
+                    def vcr_config(_base_vcr_config: dict) -> dict:  # noqa: F811
+                        \"\"\"Extend the default configuration from langchain_tests.\"\"\"
+                        config = _base_vcr_config.copy()
+                        config.setdefault("filter_headers", []).extend(_EXTRA_HEADERS)
+                        config["before_record_response"] = remove_response_headers
+                        # New: enable serializer and set file extension
+                        config["serializer"] = "yaml.gz"
+                        config["path_transformer"] = VCR.ensure_suffix(".yaml.gz")
+
+                        return config
+
+
+                    def pytest_recording_configure(config: dict, vcr: VCR) -> None:
+                        vcr.register_persister(CustomPersister())
+                        vcr.register_serializer("yaml.gz", CustomSerializer())
+
+
+                You can inspect the contents of the compressed cassettes (e.g., to
+                ensure no sensitive information is recorded) using
+
+                .. code-block:: bash
+
+                    gunzip -k /path/to/tests/cassettes/TestClass_test.yaml.gz
+
+                or by using the serializer:
+
+                .. code-block:: python
+
+                    from langchain_tests.conftest import CustomPersister, CustomSerializer
+
+                    cassette_path = "/path/to/tests/cassettes/TestClass_test.yaml.gz"
+                    requests, responses = CustomPersister().load_cassette(path, CustomSerializer())
+
+        3. Run tests to generate VCR cassettes.
+
+            Example:
+
+            .. code-block:: bash
+
+                uv run python -m pytest tests/integration_tests/test_chat_models.py::TestMyModel::test_stream_time
+
+            This will generate a VCR cassette for the test in
+            ``tests/integration_tests/cassettes/``.
+
+            .. important::
+                You should inspect the generated cassette to ensure that it does not
+                contain sensitive information. If it does, you can modify the
+                ``vcr_config`` fixture to exclude headers or modify the response
+                before it is recorded.
+
+            You can then commit the cassette to your repository. Subsequent test runs
+            will use the cassette instead of making HTTP calls.
+
+    Testing initialization from environment variables
+        Some unit tests may require testing initialization from environment variables.
+        These tests can be enabled by overriding the ``init_from_env_params``
+        property (see below):
+
+        .. dropdown:: init_from_env_params
+
+            This property is used in unit tests to test initialization from
+            environment variables. It should return a tuple of three dictionaries
+            that specify the environment variables, additional initialization args,
+            and expected instance attributes to check.
+
+            Defaults to empty dicts. If not overridden, the test is skipped.
+
+            Example:
+
+            .. code-block:: python
+
+                @property
+                def init_from_env_params(self) -> Tuple[dict, dict, dict]:
+                    return (
+                        {
+                            "MY_API_KEY": "api_key",
+                        },
+                        {
+                            "model": "bird-brain-001",
+                        },
+                        {
+                            "my_api_key": "api_key",
+                        },
+                    )
+
+    """  # noqa: E501
+
+    @property
+    def standard_chat_model_params(self) -> dict:
+        """:private:"""
+        params = super().standard_chat_model_params
+        params["api_key"] = "test"
+        return params
+
+    @property
+    def init_from_env_params(self) -> tuple[dict, dict, dict]:
+        """Environment variables, additional initialization args, and expected
+        instance attributes for testing initialization from environment variables.
+
+        """
+        return {}, {}, {}
+
+    # Initialization Tests
+    def test_init(self) -> None:
+        """Test model initialization. This should pass for all integrations.
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, ensure that:
+
+            1. ``chat_model_params`` is specified and the model can be initialized from those params;
+            2. The model accommodates `standard parameters <https://python.langchain.com/docs/concepts/chat_models/#standard-parameters>`__
+
+        """  # noqa: E501
+        model = self.chat_model_class(
+            **{
+                **self.standard_chat_model_params,
+                **self.chat_model_params,
+            }
+        )
+        assert model is not None
+
+    def test_init_from_env(self) -> None:
+        """Test initialization from environment variables. Relies on the
+        ``init_from_env_params`` property. Test is skipped if that property is not
+        set.
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, ensure that ``init_from_env_params`` is specified
+            correctly and that model parameters are properly set from environment
+            variables during initialization.
+
+        """
+        env_params, model_params, expected_attrs = self.init_from_env_params
+        if not env_params:
+            pytest.skip("init_from_env_params not specified.")
+        else:
+            with mock.patch.dict(os.environ, env_params):
+                model = self.chat_model_class(**model_params)
+            assert model is not None
+            for k, expected in expected_attrs.items():
+                actual = getattr(model, k)
+                if isinstance(actual, SecretStr):
+                    actual = actual.get_secret_value()
+                assert actual == expected
+
+    def test_init_streaming(
+        self,
+    ) -> None:
+        """Test that model can be initialized with ``streaming=True``. This is for
+        backward-compatibility purposes.
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, ensure that the model can be initialized with a
+            boolean ``streaming`` parameter.
+
+        """
+        model = self.chat_model_class(
+            **{
+                **self.standard_chat_model_params,
+                **self.chat_model_params,
+                "streaming": True,
+            }
+        )
+        assert model is not None
+
+    def test_bind_tool_pydantic(
+        self,
+        model: BaseChatModel,
+        my_adder_tool: BaseTool,
+    ) -> None:
+        """Test that chat model correctly handles Pydantic models that are passed
+        into ``bind_tools``. Test is skipped if the ``has_tool_calling`` property
+        on the test class is False.
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, ensure that the model's ``bind_tools`` method
+            properly handles Pydantic V2 models. ``langchain_core`` implements
+            a utility function that will accommodate most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
+
+            See example implementation of ``bind_tools`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.bind_tools
+
+        """
+        if not self.has_tool_calling:
+            return
+
+        def my_adder(a: int, b: int) -> int:
+            """Takes two integers, a and b, and returns their sum."""
+            return a + b
+
+        tools = [my_adder_tool, my_adder]
+
+        for pydantic_model in TEST_PYDANTIC_MODELS:
+            model_schema = (
+                pydantic_model.model_json_schema()
+                if hasattr(pydantic_model, "model_json_schema")
+                else pydantic_model.schema()
+            )
+            tools.extend([pydantic_model, model_schema])
+
+        # Doing a mypy ignore here since some of the tools are from pydantic
+        # BaseModel 2 which isn't typed properly yet. This will need to be fixed
+        # so type checking does not become annoying to users.
+        tool_model = model.bind_tools(tools, tool_choice="any")  # type: ignore[arg-type]
+        assert isinstance(tool_model, RunnableBinding)
+
+    @pytest.mark.parametrize("schema", TEST_PYDANTIC_MODELS)
+    def test_with_structured_output(
+        self,
+        model: BaseChatModel,
+        schema: Any,
+    ) -> None:
+        """Test ``with_structured_output`` method. Test is skipped if the
+        ``has_structured_output`` property on the test class is False.
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, ensure that the model's ``bind_tools`` method
+            properly handles Pydantic V2 models. ``langchain_core`` implements
+            a utility function that will accommodate most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
+
+            See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
+
+        """
+        if not self.has_structured_output:
+            return
+
+        assert model.with_structured_output(schema) is not None
+        for method in ["json_schema", "function_calling", "json_mode"]:
+            strict_values = [None, False, True] if method != "json_mode" else [None]
+            for strict in strict_values:
+                assert model.with_structured_output(
+                    schema, method=method, strict=strict
+                )
+
+    def test_standard_params(self, model: BaseChatModel) -> None:
+        """Test that model properly generates standard parameters. These are used
+        for tracing purposes.
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check that the model accommodates `standard parameters <https://python.langchain.com/docs/concepts/chat_models/#standard-parameters>`__.
+
+            Check also that the model class is named according to convention
+            (e.g., ``ChatProviderName``).
+        """
+
+        class ExpectedParams(BaseModelV1):
+            ls_provider: str
+            ls_model_name: str
+            ls_model_type: Literal["chat"]
+            ls_temperature: Optional[float]
+            ls_max_tokens: Optional[int]
+            ls_stop: Optional[list[str]]
+
+        ls_params = model._get_ls_params()
+        try:
+            ExpectedParams(**ls_params)  # type: ignore[arg-type]
+        except ValidationErrorV1 as e:
+            pytest.fail(f"Validation error: {e}")
+
+        # Test optional params
+        model = self.chat_model_class(
+            max_tokens=10,  # type: ignore[call-arg]
+            stop=["test"],  # type: ignore[call-arg]
+            **self.chat_model_params,
+        )
+        ls_params = model._get_ls_params()
+        try:
+            ExpectedParams(**ls_params)  # type: ignore[arg-type]
+        except ValidationErrorV1 as e:
+            pytest.fail(f"Validation error: {e}")
+
+    def test_serdes(self, model: BaseChatModel, snapshot: SnapshotAssertion) -> None:
+        """Test serialization and deserialization of the model. Test is skipped if the
+        ``is_lc_serializable`` property on the chat model class is not overwritten
+        to return ``True``.
+
+        .. dropdown:: Troubleshooting
+
+            If this test fails, check that the ``init_from_env_params`` property is
+            correctly set on the test class.
+        """
+        if not self.chat_model_class.is_lc_serializable():
+            pytest.skip("Model is not serializable.")
+        else:
+            env_params, _model_params, _expected_attrs = self.init_from_env_params
+            with mock.patch.dict(os.environ, env_params):
+                ser = dumpd(model)
+                assert ser == snapshot(name="serialized")
+                assert (
+                    model.model_dump()
+                    == load(
+                        dumpd(model), valid_namespaces=model.get_lc_namespace()[:1]
+                    ).model_dump()
+                )
+
+    @pytest.mark.benchmark
+    def test_init_time(self, benchmark: BenchmarkFixture) -> None:
+        """Test initialization time of the chat model. If this test fails, check that
+        we are not introducing undue overhead in the model's initialization.
+        """
+
+        def _init_in_loop() -> None:
+            for _ in range(10):
+                self.chat_model_class(**self.chat_model_params)
+
+        benchmark(_init_in_loop)
 
     # Property Tests
     def test_llm_type_property(self, model: BaseChatModel) -> None:
@@ -279,275 +840,10 @@ class ChatModelV1UnitTests(ChatModelV1Tests):
             assert key in dumped
             assert dumped[key] == value
 
-    # Input Conversion Tests
-    def test_input_conversion_string(self, model: BaseChatModel) -> None:
-        """Test that string input is properly converted to messages."""
-        # This test verifies the _convert_input method works correctly
-        result = model.invoke("Test string input")
-        assert isinstance(result, AIMessage)
-        assert result.content is not None
-
-    def test_input_conversion_empty_string(self, model: BaseChatModel) -> None:
-        """Test that empty string input is handled gracefully."""
-        result = model.invoke("")
-        assert isinstance(result, AIMessage)
-        # Content might be empty or some default response
-
-    def test_input_conversion_message_v1_list(self, model: BaseChatModel) -> None:
-        """Test that v1 message list input is handled correctly."""
-        messages = [HumanMessage("Test message")]
-        result = model.invoke(messages)
-        assert isinstance(result, AIMessage)
-        assert result.content is not None
-
-    # Batch Processing Tests
-    def test_batch_basic(self, model: BaseChatModel) -> None:
-        """Test basic batch processing functionality."""
-        inputs = ["Hello", "How are you?", "Goodbye"]
-        results = model.batch(inputs)  # type: ignore[arg-type]
-
-        assert isinstance(results, list)
-        assert len(results) == len(inputs)
-        for result in results:
-            assert isinstance(result, AIMessage)
-            assert result.content is not None
-
-    async def test_abatch_basic(self, model: BaseChatModel) -> None:
-        """Test basic async batch processing functionality."""
-        inputs = ["Hello", "How are you?", "Goodbye"]
-        results = await model.abatch(inputs)  # type: ignore[arg-type]
-
-        assert isinstance(results, list)
-        assert len(results) == len(inputs)
-        for result in results:
-            assert isinstance(result, AIMessage)
-            assert result.content is not None
-
-    # Content Block Tests
-    def test_text_content_blocks(self, model: BaseChatModel) -> None:
-        """Test that the model can handle the ``TextContentBlock`` format.
-
-        This test verifies that the model correctly processes messages containing
-        ``TextContentBlock`` objects instead of plain strings.
-        """
-        if not self.supports_text_content_blocks:
-            pytest.skip("Model does not support TextContentBlock (rare!)")
-
-        text_block = create_text_block("Hello, world!")
-        message = HumanMessage(content=[text_block])
-
-        result = model.invoke([message])
-        assert isinstance(result, AIMessage)
-        assert result.content is not None
-
-    def test_mixed_content_blocks(self, model: BaseChatModel) -> None:
-        """Test that the model can handle messages with mixed content blocks."""
-        if not (
-            self.supports_text_content_blocks and self.supports_image_content_blocks
-        ):
-            pytest.skip(
-                "Model doesn't support mixed content blocks (concurrent text and image)"
-            )
-
-        content_blocks: list[ContentBlock] = [
-            create_text_block("Describe this image:"),
-            create_image_block(
-                base64="iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==",
-                mime_type="image/png",
-            ),
-        ]
-
-        message = HumanMessage(content=content_blocks)
-        result = model.invoke([message])
-
-        assert isinstance(result, AIMessage)
-        assert result.content is not None
-
-    def test_reasoning_content_blocks(self, model: BaseChatModel) -> None:
-        """Test that the model can generate ``ReasoningContentBlock``."""
-        if not self.supports_reasoning_content_blocks:
-            pytest.skip("Model does not support ReasoningContentBlock.")
-
-        message = HumanMessage("Think step by step: What is 2 + 2?")
-        result = model.invoke([message])
-
-        assert isinstance(result, AIMessage)
-        if isinstance(result.content, list):
-            reasoning_blocks = [
-                block
-                for block in result.content
-                if isinstance(block, dict) and is_reasoning_block(block)
-            ]
-            assert len(reasoning_blocks) > 0
-
-    def test_citations_in_response(self, model: BaseChatModel) -> None:
-        """Test that the model can generate ``Citations`` in text blocks."""
-        if not self.supports_citations:
-            pytest.skip("Model does not support citations.")
-
-        message = HumanMessage("Provide information about Python with citations.")
-        result = model.invoke([message])
-
-        assert isinstance(result, AIMessage)
-        if isinstance(result.content, list):
-            content_list = result.content
-            text_blocks_with_citations: list[TextContentBlock] = []
-            for block in content_list:
-                if (
-                    isinstance(block, dict)
-                    and is_text_block(block)
-                    and "annotations" in block
-                    and isinstance(block.get("annotations"), list)
-                    and len(cast(list, block.get("annotations", []))) > 0
-                ):
-                    text_block = cast(TextContentBlock, block)
-                    text_blocks_with_citations.append(text_block)
-            assert len(text_blocks_with_citations) > 0
-
-            # Verify that at least one known citation type is present
-            has_citation = any(
-                "citation" in annotation.get("type", "")
-                for block in text_blocks_with_citations
-                for annotation in block.get("annotations", [])
-            ) or any(
-                "non_standard_annotation" in annotation.get("type", "")
-                for block in text_blocks_with_citations
-                for annotation in block.get("annotations", [])
-            )
-            assert has_citation, "No citations found in text blocks."
-
-    def test_non_standard_content_blocks(self, model: BaseChatModel) -> None:
-        """Test that the model can handle ``NonStandardContentBlock``."""
-        if not self.supports_non_standard_blocks:
-            pytest.skip("Model does not support NonStandardContentBlock.")
-
-        non_standard_block = create_non_standard_block(
-            {
-                "custom_field": "custom_value",
-                "data": [1, 2, 3],
-            }
-        )
-
-        message = HumanMessage(content=[non_standard_block])
-
-        # Should not raise an error
-        result = model.invoke([message])
-        assert isinstance(result, AIMessage)
-
-    def test_enhanced_tool_calls_with_content_blocks(
-        self, model: BaseChatModel
-    ) -> None:
-        """Test enhanced tool calling with content blocks format."""
-        if not self.has_tool_calling:
-            pytest.skip("Model does not support enhanced tool calls.")
-
-        @tool
-        def sample_tool(query: str) -> str:
-            """A sample tool for testing."""
-            return f"Result for: {query}"
-
-        model_with_tools = model.bind_tools([sample_tool])
-        message = HumanMessage("Use the sample tool with query 'test'")
-
-        result = model_with_tools.invoke([message])
-        assert isinstance(result, AIMessage)
-
-        # Check if tool calls are in content blocks format
-        if isinstance(result.content, list):
-            tool_call_blocks = [
-                block
-                for block in result.content
-                if isinstance(block, dict) and is_tool_call_block(block)
-            ]
-            assert len(tool_call_blocks) > 0
-        # Backwards compat?
-        # else:
-        #     # Fallback to legacy tool_calls attribute
-        #     assert hasattr(result, "tool_calls") and result.tool_calls
-
-    def test_invalid_tool_call_handling(self, model: BaseChatModel) -> None:
-        """Test that the model can handle ``InvalidToolCall`` blocks gracefully."""
-        if not self.supports_invalid_tool_calls:
-            pytest.skip("Model does not support InvalidToolCall handling.")
-
-        invalid_tool_call: InvalidToolCall = {
-            "type": "invalid_tool_call",
-            "name": "nonexistent_tool",
-            "args": None,
-            "id": "invalid_123",
-            "error": "Tool not found",
-        }
-
-        # Create a message with invalid tool call in history
-        ai_message = AIMessage(content=[invalid_tool_call])
-        follow_up = HumanMessage("Please try again with a valid approach.")
-
-        result = model.invoke([ai_message, follow_up])
-        assert isinstance(result, AIMessage)
-        assert result.content is not None
-        # TODO: enhance/double check this
-
-    def test_web_search_content_blocks(self, model: BaseChatModel) -> None:
-        """Test generating ``WebSearchCall``/``WebSearchResult`` blocks."""
-        if not self.supports_web_search_blocks:
-            pytest.skip("Model does not support web search blocks.")
-
-        message = HumanMessage("Search for recent news about AI developments.")
-        result = model.invoke([message])
-
-        assert isinstance(result, AIMessage)
-        if isinstance(result.content, list):
-            search_blocks = [
-                block
-                for block in result.content
-                if isinstance(block, dict)
-                and block.get("type") in ["web_search_call", "web_search_result"]
-            ]
-            assert len(search_blocks) > 0
-
-    def test_file_content_blocks(self, model: BaseChatModel) -> None:
-        """Test that the model can handle ``FileContentBlock``."""
-        if not self.supports_file_content_blocks:
-            pytest.skip("Model does not support FileContentBlock.")
-
-        file_block = create_file_block(
-            base64="SGVsbG8sIHdvcmxkIQ==",  # "Hello, world!"
-            mime_type="text/plain",
-        )
-
-        message = HumanMessage(content=[file_block])
-        result = model.invoke([message])
-
-        assert isinstance(result, AIMessage)
-        assert result.content is not None
-        # TODO: make more robust?
-
-    def test_content_block_streaming(self, model: BaseChatModel) -> None:
-        """Test that content blocks work correctly with streaming."""
-        if not self.supports_content_blocks_v1:
-            pytest.skip("Model does not support content blocks v1.")
-
-        text_block = create_text_block("Generate a short story.")
-        message = HumanMessage(content=[text_block])
-
-        chunks = []
-        for chunk in model.stream([message]):
-            chunks.append(chunk)
-            assert hasattr(chunk, "content")
-
-        assert len(chunks) > 0
-
-        # Verify final aggregated message
-        final_message = chunks[0]
-        for chunk in chunks[1:]:
-            final_message = final_message + chunk
-
-        assert isinstance(final_message.content, (str, list))
-
     def test_content_block_serialization(self, model: BaseChatModel) -> None:
         """Test that messages with content blocks can be serialized/deserialized."""
         if not self.supports_content_blocks_v1:
-            pytest.skip("Model does not support content blocks v1.")
+            pytest.skip("Model does not support v1 content blocks.")
 
         text_block = create_text_block("Test serialization")
         message = HumanMessage(content=[text_block])
@@ -560,36 +856,4 @@ class ChatModelV1UnitTests(ChatModelV1Tests):
         deserialized = load(serialized)
         assert isinstance(deserialized, HumanMessage)
         assert deserialized.content == message.content
-        # TODO: make more robust
-
-    def test_backwards_compatibility(self, model: BaseChatModel) -> None:
-        """Test that models still work with legacy string content."""
-        # This should work regardless of content blocks support
-        legacy_message = HumanMessage("Hello, world!")
-        result = model.invoke([legacy_message])
-
-        assert isinstance(result, AIMessage)
-        assert result.content is not None
-
-        legacy_message_named_param = HumanMessage(content="Hello, world!")
-        result_named_param = model.invoke([legacy_message_named_param])
-
-        assert isinstance(result_named_param, AIMessage)
-        assert result_named_param.content is not None
-
-    def test_content_block_validation(self, model: BaseChatModel) -> None:
-        """Test that invalid content blocks are handled gracefully."""
-        if not self.supports_content_blocks_v1:
-            pytest.skip("Model does not support content blocks v1.")
-
-        # Test with invalid content block structure
-        invalid_block = {"type": "invalid_type", "invalid_field": "value"}
-        message = HumanMessage(content=[invalid_block])  # type: ignore[list-item]
-
-        # Should handle gracefully (either convert to NonStandardContentBlock or reject)
-        try:
-            result = model.invoke([message])
-            assert isinstance(result, AIMessage)
-        except (ValueError, TypeError) as e:
-            # Acceptable to raise validation errors for truly invalid blocks
-            assert "invalid" in str(e).lower() or "unknown" in str(e).lower()
+        # TODO: make more robust? include more fields