tests[patch]: populate API reference for chat models (#28487)

Populate API reference for test class properties and test methods for chat models. Also: - Make `standard_chat_model_params` private. - `pytest.skip` some tests that were previously passed if features are not supported.
2025-12-15 20:04:15 +00:00 · 2024-12-03 15:24:54 -05:00
parent 50ddf13692
commit ab831ce05c
2 changed files with 1028 additions and 31 deletions
--- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py
+++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py
@@ -73,8 +73,46 @@ def _validate_tool_call_message_no_args(message: BaseMessage) -> None:
 class ChatModelIntegrationTests(ChatModelTests):
    """Base class for chat model integration tests.
    Test subclasses must implement the following two properties:
    chat_model_class
        The chat model class to test, e.g., ``ChatParrotLink``.
        Example:
        .. code-block:: python
            @property
            def chat_model_class(self) -> Type[ChatParrotLink]:
                return ChatParrotLink
    chat_model_params
        Initialization parameters for the chat model.
        Example:
        .. code-block:: python
            @property
            def chat_model_params(self) -> dict:
                return {"model": "bird-brain-001", "temperature": 0}
    .. note::
          API references for individual test methods include troubleshooting tips.
    .. note::
        Test subclasses can control what features are tested (such as tool
        calling or multi-modality) by selectively overriding the properties on the
        class. Relevant properties are mentioned in the references for each method.
        See this page for detail on all properties:
        https://python.langchain.com/api_reference/standard_tests/unit_tests/langchain_tests.unit_tests.chat_models.ChatModelTests.html
    """
    @property
    def standard_chat_model_params(self) -> dict:
        """:meta private:"""
        return {}
    def test_invoke(self, model: BaseChatModel) -> None:
@@ -295,8 +333,8 @@ class ChatModelIntegrationTests(ChatModelTests):
        .. dropdown:: Configuration
            By default, this test is run.
-            To disable this feature, set `returns_usage_metadata` to False in your test
+            To disable this feature, set `returns_usage_metadata` to False in your
-            class:
+            test class:
            .. code-block:: python
@@ -435,8 +473,8 @@ class ChatModelIntegrationTests(ChatModelTests):
        .. dropdown:: Configuration
            By default, this test is run.
-            To disable this feature, set `returns_usage_metadata` to False in your test
+            To disable this feature, set `returns_usage_metadata` to False in your
-            class:
+            test class:
            .. code-block:: python
@@ -564,6 +602,28 @@ class ChatModelIntegrationTests(ChatModelTests):
            )
    def test_stop_sequence(self, model: BaseChatModel) -> None:
        """Test that model does not fail when invoked with the ``stop`` parameter,
        which is a standard parameter for stopping generation at a certain token.
        More on standard parameters here: https://python.langchain.com/docs/concepts/chat_models/#standard-parameters
        This should pass for all integrations.
        .. dropdown:: Troubleshooting
            If this test fails, check that the function signature for ``_generate``
            (as well as ``_stream`` and async variants) accepts the ``stop`` parameter:
            .. code-block:: python
                def _generate(
                    self,
                    messages: List[BaseMessage],
                    stop: Optional[List[str]] = None,
                    run_manager: Optional[CallbackManagerForLLMRun] = None,
                    **kwargs: Any,
                ) -> ChatResult:
        """  # noqa: E501
        result = model.invoke("hi", stop=["you"])
        assert isinstance(result, AIMessage)
@@ -574,6 +634,44 @@ class ChatModelIntegrationTests(ChatModelTests):
        assert isinstance(result, AIMessage)
    def test_tool_calling(self, model: BaseChatModel) -> None:
        """Test that the model generates tool calls. This test is skipped if the
        ``has_tool_calling`` property on the test class is set to False.
        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).
        .. dropdown:: Configuration
            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:
            .. code-block:: python
                class TestMyChatModelIntegration(ChatModelIntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False
        .. dropdown:: Troubleshooting
            If this test fails, check that ``bind_tools`` is implemented to correctly
            translate LangChain tool objects into the appropriate schema for your
            chat model.
            This test may fail if the chat model does not support a ``tool_choice``
            parameter. This parameter can be used to force a tool call. If
            ``tool_choice`` is not supported and the model consistently fails this
            test, you can ``xfail`` the test:
            .. code-block:: python
                @pytest.mark.xfail(reason=("Does not support tool_choice."))
                def test_tool_calling(self, model: BaseChatModel) -> None:
                    super().test_tool_calling(model)
            Otherwise, ensure that the ``tool_choice_value`` property is correctly
            specified on the test class.
        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
        if self.tool_choice_value == "tool_name":
@@ -595,6 +693,44 @@ class ChatModelIntegrationTests(ChatModelTests):
        _validate_tool_call_message(full)
    async def test_tool_calling_async(self, model: BaseChatModel) -> None:
        """Test that the model generates tool calls. This test is skipped if the
        ``has_tool_calling`` property on the test class is set to False.
        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).
        .. dropdown:: Configuration
            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:
            .. code-block:: python
                class TestMyChatModelIntegration(ChatModelIntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False
        .. dropdown:: Troubleshooting
            If this test fails, check that ``bind_tools`` is implemented to correctly
            translate LangChain tool objects into the appropriate schema for your
            chat model.
            This test may fail if the chat model does not support a ``tool_choice``
            parameter. This parameter can be used to force a tool call. If
            ``tool_choice`` is not supported and the model consistently fails this
            test, you can ``xfail`` the test:
            .. code-block:: python
                @pytest.mark.xfail(reason=("Does not support tool_choice."))
                async def test_tool_calling_async(self, model: BaseChatModel) -> None:
                    await super().test_tool_calling_async(model)
            Otherwise, ensure that the ``tool_choice_value`` property is correctly
            specified on the test class.
        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
        if self.tool_choice_value == "tool_name":
@@ -616,6 +752,46 @@ class ChatModelIntegrationTests(ChatModelTests):
        _validate_tool_call_message(full)
    def test_tool_calling_with_no_arguments(self, model: BaseChatModel) -> None:
        """Test that the model generates tool calls for tools with no arguments.
        This test is skipped if the ``has_tool_calling`` property on the test class
        is set to False.
        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).
        .. dropdown:: Configuration
            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:
            .. code-block:: python
                class TestMyChatModelIntegration(ChatModelIntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False
        .. dropdown:: Troubleshooting
            If this test fails, check that ``bind_tools`` is implemented to correctly
            translate LangChain tool objects into the appropriate schema for your
            chat model. It should correctly handle the case where a tool has no
            arguments.
            This test may fail if the chat model does not support a ``tool_choice``
            parameter. This parameter can be used to force a tool call. It may also
            fail if a provider does not support this form of tool. In these cases,
            you can ``xfail`` the test:
            .. code-block:: python
                @pytest.mark.xfail(reason=("Does not support tool_choice."))
                def test_tool_calling_with_no_arguments(self, model: BaseChatModel) -> None:
                    super().test_tool_calling_with_no_arguments(model)
            Otherwise, ensure that the ``tool_choice_value`` property is correctly
            specified on the test class.
        """  # noqa: E501
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
@@ -637,6 +813,45 @@ class ChatModelIntegrationTests(ChatModelTests):
        _validate_tool_call_message_no_args(full)
    def test_bind_runnables_as_tools(self, model: BaseChatModel) -> None:
        """Test that the model generates tool calls for tools that are derived from
        LangChain runnables. This test is skipped if the ``has_tool_calling`` property
        on the test class is set to False.
        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).
        .. dropdown:: Configuration
            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:
            .. code-block:: python
                class TestMyChatModelIntegration(ChatModelIntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False
        .. dropdown:: Troubleshooting
            If this test fails, check that ``bind_tools`` is implemented to correctly
            translate LangChain tool objects into the appropriate schema for your
            chat model.
            This test may fail if the chat model does not support a ``tool_choice``
            parameter. This parameter can be used to force a tool call. If
            ``tool_choice`` is not supported and the model consistently fails this
            test, you can ``xfail`` the test:
            .. code-block:: python
                @pytest.mark.xfail(reason=("Does not support tool_choice."))
                def test_bind_runnables_as_tools(self, model: BaseChatModel) -> None:
                    super().test_bind_runnables_as_tools(model)
            Otherwise, ensure that the ``tool_choice_value`` property is correctly
            specified on the test class.
        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
@@ -663,7 +878,32 @@ class ChatModelIntegrationTests(ChatModelTests):
        assert tool_call["type"] == "tool_call"
    def test_structured_output(self, model: BaseChatModel) -> None:
-        """Test to verify structured output with a Pydantic model."""
+        """Test to verify structured output is generated both on invoke and stream.
        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).
        .. dropdown:: Configuration
            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:
            .. code-block:: python
                class TestMyChatModelIntegration(ChatModelIntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False
        .. dropdown:: Troubleshooting
            If this test fails, ensure that the model's ``bind_tools`` method
            properly handles both JSON Schema and Pydantic V2 models.
            ``langchain_core`` implements a utility function that will accommodate
            most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
            See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
        """  # noqa: E501
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
@@ -690,7 +930,32 @@ class ChatModelIntegrationTests(ChatModelTests):
        assert set(chunk.keys()) == {"setup", "punchline"}
    async def test_structured_output_async(self, model: BaseChatModel) -> None:
-        """Test to verify structured output with a Pydantic model."""
+        """Test to verify structured output is generated both on invoke and stream.
        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).
        .. dropdown:: Configuration
            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:
            .. code-block:: python
                class TestMyChatModelIntegration(ChatModelIntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False
        .. dropdown:: Troubleshooting
            If this test fails, ensure that the model's ``bind_tools`` method
            properly handles both JSON Schema and Pydantic V2 models.
            ``langchain_core`` implements a utility function that will accommodate
            most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
            See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
        """  # noqa: E501
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
@@ -718,9 +983,34 @@ class ChatModelIntegrationTests(ChatModelTests):
    @pytest.mark.skipif(PYDANTIC_MAJOR_VERSION != 2, reason="Test requires pydantic 2.")
    def test_structured_output_pydantic_2_v1(self, model: BaseChatModel) -> None:
-        """Test to verify compatibility with pydantic.v1.BaseModel.
+        """Test to verify we can generate structured output using
        pydantic.v1.BaseModel.
        pydantic.v1.BaseModel is available in the pydantic 2 package.
        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).
        .. dropdown:: Configuration
            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:
            .. code-block:: python
                class TestMyChatModelIntegration(ChatModelIntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False
        .. dropdown:: Troubleshooting
            If this test fails, ensure that the model's ``bind_tools`` method
            properly handles both JSON Schema and Pydantic V1 models.
            ``langchain_core`` implements a utility function that will accommodate
            most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
            See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
@@ -751,7 +1041,33 @@ class ChatModelIntegrationTests(ChatModelTests):
        assert set(chunk.keys()) == {"setup", "punchline"}
    def test_structured_output_optional_param(self, model: BaseChatModel) -> None:
-        """Test to verify structured output with an optional param."""
+        """Test to verify we can generate structured output that includes optional
        parameters.
        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).
        .. dropdown:: Configuration
            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:
            .. code-block:: python
                class TestMyChatModelIntegration(ChatModelIntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False
        .. dropdown:: Troubleshooting
            If this test fails, ensure that the model's ``bind_tools`` method
            properly handles Pydantic V2 models with optional parameters.
            ``langchain_core`` implements a utility function that will accommodate
            most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
            See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
@@ -773,10 +1089,42 @@ class ChatModelIntegrationTests(ChatModelTests):
        assert isinstance(joke_result, Joke)
    def test_tool_message_histories_string_content(self, model: BaseChatModel) -> None:
-        """
+        """Test that message histories are compatible with string tool contents
-        Test that message histories are compatible with string tool contents
+        (e.g. OpenAI format). If a model passes this test, it should be compatible
-        (e.g. OpenAI).
+        with messages generated from providers following OpenAI format.
-        """
+
        This test should be skipped if the model does not support tool calling
        (see Configuration below).
        .. dropdown:: Configuration
            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:
            .. code-block:: python
                class TestMyChatModelIntegration(ChatModelIntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False
        .. dropdown:: Troubleshooting
            If this test fails, check that:
            1. The model can correctly handle message histories that include AIMessage objects with ``""`` content.
            2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format.
            3. The model can correctly handle ToolMessage objects with string content and arbitrary string values for ``tool_call_id``.
            You can ``xfail`` the test if tool calling is implemented but this format
            is not supported.
            .. code-block:: python
                @pytest.mark.xfail(reason=("Not implemented."))
                def test_tool_message_histories_string_content(self, model: BaseChatModel) -> None:
                    super().test_tool_message_histories_string_content(model)
        """  # noqa: E501
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
        model_with_tools = model.bind_tools([my_adder_tool])
@@ -810,10 +1158,56 @@ class ChatModelIntegrationTests(ChatModelTests):
        self,
        model: BaseChatModel,
    ) -> None:
-        """
+        """Test that message histories are compatible with list tool contents
-        Test that message histories are compatible with list tool contents
+        (e.g. Anthropic format).
-        (e.g. Anthropic).
+
-        """
+        These message histories will include AIMessage objects with "tool use" and
        content blocks, e.g.,
        .. code-block:: python
            [
                {"type": "text", "text": "Hmm let me think about that"},
                {
                    "type": "tool_use",
                    "input": {"fav_color": "green"},
                    "id": "foo",
                    "name": "color_picker",
                },
            ]
        This test should be skipped if the model does not support tool calling
        (see Configuration below).
        .. dropdown:: Configuration
            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:
            .. code-block:: python
                class TestMyChatModelIntegration(ChatModelIntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False
        .. dropdown:: Troubleshooting
            If this test fails, check that:
            1. The model can correctly handle message histories that include AIMessage objects with list content.
            2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format.
            3. The model can correctly handle ToolMessage objects with string content and arbitrary string values for ``tool_call_id``.
            You can ``xfail`` the test if tool calling is implemented but this format
            is not supported.
            .. code-block:: python
                @pytest.mark.xfail(reason=("Not implemented."))
                def test_tool_message_histories_list_content(self, model: BaseChatModel) -> None:
                    super().test_tool_message_histories_list_content(model)
        """  # noqa: E501
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
        model_with_tools = model.bind_tools([my_adder_tool])
@@ -852,9 +1246,48 @@ class ChatModelIntegrationTests(ChatModelTests):
        assert isinstance(result_list_content, AIMessage)
    def test_structured_few_shot_examples(self, model: BaseChatModel) -> None:
-        """
+        """Test that the model can process few-shot examples with tool calls.
-        Test that model can process few-shot examples with tool calls.
+
-        """
+        These are represented as a sequence of messages of the following form:
        - ``HumanMessage`` with string content;
        - ``AIMessage`` with the ``tool_calls`` attribute populated;
        - ``ToolMessage`` with string content;
        - ``AIMessage`` with string content (an answer);
        - ``HuamnMessage`` with string content (a follow-up question).
        This test should be skipped if the model does not support tool calling
        (see Configuration below).
        .. dropdown:: Configuration
            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:
            .. code-block:: python
                class TestMyChatModelIntegration(ChatModelIntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False
        .. dropdown:: Troubleshooting
            This test uses a utility function in ``langchain_core`` to generate a
            sequence of messages representing "few-shot" examples: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.tool_example_to_messages.html
            If this test fails, check that the model can correctly handle this
            sequence of messages.
            You can ``xfail`` the test if tool calling is implemented but this format
            is not supported.
            .. code-block:: python
                @pytest.mark.xfail(reason=("Not implemented."))
                def test_structured_few_shot_examples(self, model: BaseChatModel) -> None:
                    super().test_structured_few_shot_examples(model)
        """  # noqa: E501
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
        model_with_tools = model.bind_tools([my_adder_tool], tool_choice="any")
@@ -874,6 +1307,42 @@ class ChatModelIntegrationTests(ChatModelTests):
        assert isinstance(result, AIMessage)
    def test_image_inputs(self, model: BaseChatModel) -> None:
        """Test that the model can process image inputs.
        This test should be skipped (see Configuration below) if the model does not
        support image inputs These will take the form of messages with OpenAI-style
        image content blocks:
        .. code-block:: python
            [
                {"type": "text", "text": "describe the weather in this image"},
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
                },
            ]
        See https://python.langchain.com/docs/concepts/multimodality/
        .. dropdown:: Configuration
            To disable this test, set ``supports_image_inputs`` to False in your
            test class:
            .. code-block:: python
                class TestMyChatModelIntegration(ChatModelIntegrationTests):
                    @property
                    def supports_image_inputs(self) -> bool:
                        return False
        .. dropdown:: Troubleshooting
            If this test fails, check that the model can correctly handle messages
            with image content blocks in OpenAI format, including base64-encoded
            images. Otherwise, set the ``supports_image_inputs`` property to False.
        """
        if not self.supports_image_inputs:
            return
        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
@@ -890,6 +1359,46 @@ class ChatModelIntegrationTests(ChatModelTests):
        model.invoke([message])
    def test_image_tool_message(self, model: BaseChatModel) -> None:
        """Test that the model can process ToolMessages with image inputs.
        This test should be skipped if the model does not support messages of the
        form:
        .. code-block:: python
            ToolMessage(
                content=[
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
                    },
                ],
                tool_call_id="1",
                name="random_image",
            )
        This test can be skipped by setting the ``supports_image_tool_message`` property
        to False (see Configuration below).
        .. dropdown:: Configuration
            To disable this test, set ``supports_image_tool_message`` to False in your
            test class:
            .. code-block:: python
                class TestMyChatModelIntegration(ChatModelIntegrationTests):
                    @property
                    def supports_image_tool_message(self) -> bool:
                        return False
        .. dropdown:: Troubleshooting
            If this test fails, check that the model can correctly handle messages
            with image content blocks in ToolMessages, including base64-encoded
            images. Otherwise, set the ``supports_image_tool_message`` property to
            False.
        """
        if not self.supports_image_tool_message:
            return
        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
@@ -921,6 +1430,72 @@ class ChatModelIntegrationTests(ChatModelTests):
        model.bind_tools([random_image]).invoke(messages)
    def test_anthropic_inputs(self, model: BaseChatModel) -> None:
        """Test that model can process Anthropic-style message histories.
        These message histories will include ``AIMessage`` objects with ``tool_use``
        content blocks, e.g.,
        .. code-block:: python
            AIMessage(
                [
                    {"type": "text", "text": "Hmm let me think about that"},
                    {
                        "type": "tool_use",
                        "input": {"fav_color": "green"},
                        "id": "foo",
                        "name": "color_picker",
                    },
                ]
            )
        as well as ``HumanMessage`` objects containing ``tool_result`` content blocks:
        .. code-block:: python
            HumanMessage(
                [
                    {
                        "type": "tool_result",
                        "tool_use_id": "foo",
                        "content": [
                            {
                                "type": "text",
                                "text": "green is a great pick! that's my sister's favorite color",  # noqa: E501
                            }
                        ],
                        "is_error": False,
                    },
                    {"type": "text", "text": "what's my sister's favorite color"},
                ]
            )
        This test should be skipped if the model does not support messages of this
        form (or doesn't support tool calling generally). See Configuration below.
        .. dropdown:: Configuration
            To disable this test, set ``supports_anthropic_inputs`` to False in your
            test class:
            .. code-block:: python
                class TestMyChatModelIntegration(ChatModelIntegrationTests):
                    @property
                    def supports_anthropic_inputs(self) -> bool:
                        return False
        .. dropdown:: Troubleshooting
            If this test fails, check that:
            1. The model can correctly handle message histories that include message objects with list content.
            2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format.
            3. HumanMessages with "tool_result" content blocks are correctly handled.
            Otherwise, if Anthropic tool call and result formats are not supported,
            set the ``supports_anthropic_inputs`` property to False.
        """  # noqa: E501
        if not self.supports_anthropic_inputs:
            return
@@ -982,7 +1557,45 @@ class ChatModelIntegrationTests(ChatModelTests):
        model.bind_tools([color_picker]).invoke(messages)
    def test_tool_message_error_status(self, model: BaseChatModel) -> None:
-        """Test that ToolMessage with status='error' can be handled."""
+        """Test that ToolMessage with ``status="error"`` can be handled.
        These messages may take the form:
        .. code-block:: python
            ToolMessage(
                "Error: Missing required argument 'b'.",
                name="my_adder_tool",
                tool_call_id="abc123",
                status="error",
            )
        If possible, the ``status`` field should be parsed and passed appropriately
        to the model.
        This test is optional and should be skipped if the model does not support
        tool calling (see Configuration below).
        .. dropdown:: Configuration
            To disable tool calling tests, set ``has_tool_calling`` to False in your
            test class:
            .. code-block:: python
                class TestMyChatModelIntegration(ChatModelIntegrationTests):
                    @property
                    def has_tool_calling(self) -> bool:
                        return False
        .. dropdown:: Troubleshooting
            If this test fails, check that the ``status`` field on ``ToolMessage``
            objects is either ignored or passed to the model appropriately.
            Otherwise, ensure that the ``tool_choice_value`` property is correctly
            specified on the test class.
        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
        model_with_tools = model.bind_tools([my_adder_tool])
@@ -1010,6 +1623,22 @@ class ChatModelIntegrationTests(ChatModelTests):
        assert isinstance(result, AIMessage)
    def test_message_with_name(self, model: BaseChatModel) -> None:
        """Test that HumanMessage with values for the ``name`` field can be handled.
        These messages may take the form:
        .. code-block:: python
            HumanMessage("hello", name="example_user")
        If possible, the ``name`` field should be parsed and passed appropriately
        to the model. Otherwise, it should be ignored.
        .. dropdown:: Troubleshooting
            If this test fails, check that the ``name`` field on ``HumanMessage``
            objects is either ignored or passed to the model appropriately.
        """
        result = model.invoke([HumanMessage("hello", name="example_user")])
        assert result is not None
        assert isinstance(result, AIMessage)
--- a/libs/standard-tests/langchain_tests/unit_tests/chat_models.py
+++ b/libs/standard-tests/langchain_tests/unit_tests/chat_models.py
@@ -1,4 +1,6 @@
-"""Unit tests for chat models."""
+"""
 :autodoc-options: autoproperty
 """
 import os
 from abc import abstractmethod
@@ -77,16 +79,218 @@ def my_adder(a: int, b: int) -> int:
 class ChatModelTests(BaseStandardTests):
    """Base class for chat model tests.
    Test subclasses must implement the following two properties:
    chat_model_class
        The chat model class to test, e.g., ``ChatParrotLink``.
        Example:
        .. code-block:: python
            @property
            def chat_model_class(self) -> Type[ChatParrotLink]:
                return ChatParrotLink
    chat_model_params
        Initialization parameters for the chat model.
        Example:
        .. code-block:: python
            @property
            def chat_model_params(self) -> dict:
                return {"model": "bird-brain-001", "temperature": 0}
    In addition, test subclasses can control what features are tested (such as tool
    calling or multi-modality) by selectively overriding the following properties.
    Expand to see details:
    .. dropdown:: has_tool_calling
        Boolean property indicating whether the chat model supports tool calling.
        By default, this is determined by whether the chat model's `bind_tools` method
        is overridden. It typically does not need to be overridden on the test class.
    .. dropdown:: tool_choice_value
        Value to use for tool choice when used in tests.
        Some tests for tool calling features attempt to force tool calling via a
        `tool_choice` parameter. A common value for this parameter is "any". Defaults
        to `None`.
        Note: if the value is set to "tool_name", the name of the tool used in each
        test will be set as the value for `tool_choice`.
        Example:
        .. code-block:: python
            @property
            def tool_choice_value(self) -> Optional[str]:
                return "any"
    .. dropdown:: has_structured_output
        Boolean property indicating whether the chat model supports structured
        output.
        By default, this is determined by whether the chat model's
        `with_structured_output` method is overridden. If the base implementation is
        intended to be used, this method should be overridden.
        See: https://python.langchain.com/docs/concepts/structured_outputs/
        Example:
        .. code-block:: python
            @property
            def has_structured_output(self) -> bool:
                return True
    .. dropdown:: supports_image_inputs
        Boolean property indicating whether the chat model supports image inputs.
        Defaults to ``False``.
        If set to ``True``, the chat model will be tested using content blocks of the
        form
        .. code-block:: python
            [
                {"type": "text", "text": "describe the weather in this image"},
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
                },
            ]
        See https://python.langchain.com/docs/concepts/multimodality/
        Example:
        .. code-block:: python
            @property
            def supports_image_inputs(self) -> bool:
                return True
    .. dropdown:: supports_video_inputs
        Boolean property indicating whether the chat model supports image inputs.
        Defaults to ``False``. No current tests are written for this feature.
    .. dropdown:: returns_usage_metadata
        Boolean property indicating whether the chat model returns usage metadata
        on invoke and streaming responses.
        ``usage_metadata`` is an optional dict attribute on AIMessages that track input
        and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
        Example:
        .. code-block:: python
            @property
            def returns_usage_metadata(self) -> bool:
                return False
    .. dropdown:: supports_anthropic_inputs
        Boolean property indicating whether the chat model supports Anthropic-style
        inputs.
        These inputs might feature "tool use" and "tool result" content blocks, e.g.,
        .. code-block:: python
            [
                {"type": "text", "text": "Hmm let me think about that"},
                {
                    "type": "tool_use",
                    "input": {"fav_color": "green"},
                    "id": "foo",
                    "name": "color_picker",
                },
            ]
        If set to ``True``, the chat model will be tested using content blocks of this
        form.
        Example:
        .. code-block:: python
            @property
            def supports_anthropic_inputs(self) -> bool:
                return False
    .. dropdown:: supports_image_tool_message
        Boolean property indicating whether the chat model supports ToolMessages
        that include image content, e.g.,
        .. code-block:: python
            ToolMessage(
                content=[
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
                    },
                ],
                tool_call_id="1",
                name="random_image",
            )
        If set to ``True``, the chat model will be tested with message sequences that
        include ToolMessages of this form.
        Example:
        .. code-block:: python
            @property
            def supports_image_tool_message(self) -> bool:
                return False
    .. dropdown:: supported_usage_metadata_details
        Property controlling what usage metadata details are emitted in both invoke
        and stream.
        ``usage_metadata`` is an optional dict attribute on AIMessages that track input
        and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
        It includes optional keys ``input_token_details`` and ``output_token_details``
        that can track usage details associated with special types of tokens, such as
        cached, audio, or reasoning.
        Only needs to be overridden if these details are supplied.
    """  # noqa: E501
    @property
    @abstractmethod
-    def chat_model_class(self) -> Type[BaseChatModel]: ...
+    def chat_model_class(self) -> Type[BaseChatModel]:
        """The chat model class to test, e.g., `ChatParrotLink`."""
        ...
    @property
    def chat_model_params(self) -> dict:
        """Initialization parameters for the chat mobdel."""
        return {}
    @property
    def standard_chat_model_params(self) -> dict:
        """:meta private:"""
        return {
            "temperature": 0,
            "max_tokens": 100,
@@ -97,12 +301,15 @@ class ChatModelTests(BaseStandardTests):
    @pytest.fixture
    def model(self) -> BaseChatModel:
        """Fixture that returns an instance of the chat model. Should not be
        overridden."""
        return self.chat_model_class(
            **{**self.standard_chat_model_params, **self.chat_model_params}
        )
    @property
    def has_tool_calling(self) -> bool:
        """Boolean property indicating whether the model supports tool calling."""
        return self.chat_model_class.bind_tools is not BaseChatModel.bind_tools
    @property
@@ -112,6 +319,8 @@ class ChatModelTests(BaseStandardTests):
    @property
    def has_structured_output(self) -> bool:
        """Boolean property indicating whether the chat model supports structured
        output."""
        return (
            self.chat_model_class.with_structured_output
            is not BaseChatModel.with_structured_output
@@ -119,22 +328,32 @@ class ChatModelTests(BaseStandardTests):
    @property
    def supports_image_inputs(self) -> bool:
        """Boolean property indicating whether the chat model supports image inputs.
        Defaults to ``False``."""
        return False
    @property
    def supports_video_inputs(self) -> bool:
        """Boolean property indicating whether the chat model supports image inputs.
        Defaults to ``False``. No current tests are written for this feature."""
        return False
    @property
    def returns_usage_metadata(self) -> bool:
        """Boolean property indicating whether the chat model returns usage metadata
        on invoke and streaming responses."""
        return True
    @property
    def supports_anthropic_inputs(self) -> bool:
        """Boolean property indicating whether the chat model supports Anthropic-style
        inputs."""
        return False
    @property
    def supports_image_tool_message(self) -> bool:
        """Boolean property indicating whether the chat model supports ToolMessages
        that include image content."""
        return False
    @property
@@ -152,31 +371,127 @@ class ChatModelTests(BaseStandardTests):
            ]
        ],
    ]:
        """Property controlling what usage metadata details are emitted in both invoke
        and stream. Only needs to be overridden if these details are returned by the
        model."""
        return {"invoke": [], "stream": []}
 class ChatModelUnitTests(ChatModelTests):
    """Base class for chat model unit tests.
    Test subclasses must implement the following two properties:
    chat_model_class
        The chat model class to test, e.g., ``ChatParrotLink``.
        Example:
        .. code-block:: python
            @property
            def chat_model_class(self) -> Type[ChatParrotLink]:
                return ChatParrotLink
    chat_model_params
        Initialization parameters for the chat model.
        Example:
        .. code-block:: python
            @property
            def chat_model_params(self) -> dict:
                return {"model": "bird-brain-001", "temperature": 0}
    .. note::
          API references for individual test methods include troubleshooting tips.
    .. note::
        Test subclasses can control what features are tested (such as tool
        calling or multi-modality) by selectively overriding the properties on the
        class. Relevant properties are mentioned in the references for each method.
        See this page for detail on all properties:
        https://python.langchain.com/api_reference/standard_tests/unit_tests/langchain_tests.unit_tests.chat_models.ChatModelTests.html
    Testing initialization from environment variables
        Some unit tests may require testing initialization from environment variables.
        These tests can be enabled by overriding the ``init_from_env_params``
        property (see below):
        .. dropdown:: init_from_env_params
            This property is used in unit tests to test initialization from
            environment variables. It should return a tuple of three dictionaries
            that specify the environment variables, additional initialization args,
            and expected instance attributes to check.
            Defaults to empty dicts. If not overridden, the test is skipped.
            Example:
            .. code-block:: python
                @property
                def init_from_env_params(self) -> Tuple[dict, dict, dict]:
                    return (
                        {
                            "MY_API_KEY": "api_key",
                        },
                        {
                            "model": "bird-brain-001",
                        },
                        {
                            "my_api_key": "api_key",
                        },
                    )
    """  # noqa: E501
    @property
    def standard_chat_model_params(self) -> dict:
        """:meta private:"""
        params = super().standard_chat_model_params
        params["api_key"] = "test"
        return params
    @property
    def init_from_env_params(self) -> Tuple[dict, dict, dict]:
-        """Return env vars, init args, and expected instance attrs for initializing
+        """This property is used in unit tests to test initialization from environment
-        from env vars."""
+        variables. It should return a tuple of three dictionaries that specify the
        environment variables, additional initialization args, and expected instance
        attributes to check."""
        return {}, {}, {}
    def test_init(self) -> None:
        """Test model initialization. This should pass for all integrations.
        .. dropdown:: Troubleshooting
            If this test fails, ensure that:
            1. ``chat_model_params`` is specified and the model can be initialized from those params;
            2. The model accommodates standard parameters: https://python.langchain.com/docs/concepts/chat_models/#standard-parameters
        """  # noqa: E501
        model = self.chat_model_class(
            **{**self.standard_chat_model_params, **self.chat_model_params}
        )
        assert model is not None
    def test_init_from_env(self) -> None:
        """Test initialization from environment variables. Relies on the
        ``init_from_env_params`` property. Test is skipped if that property is not
        set.
        .. dropdown:: Troubleshooting
            If this test fails, ensure that ``init_from_env_params`` is specified
            correctly.
        """
        env_params, model_params, expected_attrs = self.init_from_env_params
-        if env_params:
+        if not env_params:
            pytest.skip("init_from_env_params not specified.")
        else:
            with mock.patch.dict(os.environ, env_params):
                model = self.chat_model_class(**model_params)
            assert model is not None
@@ -189,6 +504,14 @@ class ChatModelUnitTests(ChatModelTests):
    def test_init_streaming(
        self,
    ) -> None:
        """Test that model can be initialized with ``streaming=True``. This is for
        backward-compatibility purposes.
        .. dropdown:: Troubleshooting
            If this test fails, ensure that the model can be initialized with a
            boolean ``streaming`` parameter.
        """
        model = self.chat_model_class(
            **{
                **self.standard_chat_model_params,
@@ -202,6 +525,18 @@ class ChatModelUnitTests(ChatModelTests):
        self,
        model: BaseChatModel,
    ) -> None:
        """Test that chat model correctly handles Pydantic models that are passed
        into ``bind_tools``. Test is skipped if the ``has_tool_calling`` property
        on the test class is False.
        .. dropdown:: Troubleshooting
            If this test fails, ensure that the model's ``bind_tools`` method
            properly handles Pydantic V2 models. ``langchain_core`` implements
            a utility function that will accommodate most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
            See example implementation of ``bind_tools`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.bind_tools
        """  # noqa: E501
        if not self.has_tool_calling:
            return
@@ -227,12 +562,35 @@ class ChatModelUnitTests(ChatModelTests):
        model: BaseChatModel,
        schema: Any,
    ) -> None:
        """Test ``with_structured_output`` method. Test is skipped if the
        ``has_structured_output`` property on the test class is False.
        .. dropdown:: Troubleshooting
            If this test fails, ensure that the model's ``bind_tools`` method
            properly handles Pydantic V2 models. ``langchain_core`` implements
            a utility function that will accommodate most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html
            See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
        """  # noqa: E501
        if not self.has_structured_output:
            return
        assert model.with_structured_output(schema) is not None
    def test_standard_params(self, model: BaseChatModel) -> None:
        """Test that model properly generates standard parameters. These are used
        for tracing purposes.
        .. dropdown:: Troubleshooting
            If this test fails, check that the model accommodates standard parameters:
            https://python.langchain.com/docs/concepts/chat_models/#standard-parameters
            Check also that the model class is named according to convention
            (e.g., ``ChatProviderName``).
        """
        class ExpectedParams(BaseModelV1):
            ls_provider: str
            ls_model_name: str
@@ -260,10 +618,20 @@ class ChatModelUnitTests(ChatModelTests):
            pytest.fail(f"Validation error: {e}")
    def test_serdes(self, model: BaseChatModel, snapshot: SnapshotAssertion) -> None:
        """Test serialization and deserialization of the model. Test is skipped if the
        ``is_lc_serializable`` property on the chat model class is not overwritten
        to return ``True``.
        .. dropdown:: Troubleshooting
            If this test fails, check that the ``init_from_env_params`` property is
            correctly set on the test class.
        """
        if not self.chat_model_class.is_lc_serializable():
-            return
+            pytest.skip("Model is not serializable.")
-        env_params, model_params, expected_attrs = self.init_from_env_params
+        else:
-        with mock.patch.dict(os.environ, env_params):
+            env_params, model_params, expected_attrs = self.init_from_env_params
-            ser = dumpd(model)
+            with mock.patch.dict(os.environ, env_params):
-            assert ser == snapshot(name="serialized")
+                ser = dumpd(model)
-            assert model.dict() == load(dumpd(model)).dict()
+                assert ser == snapshot(name="serialized")
                assert model.dict() == load(dumpd(model)).dict()