feat: port various nit changes from wip-v0.4 (#32506)

Lots of work that wasn't directly related to core improvements/messages/testing functionality
2025-09-11 07:50:47 +00:00 · 2025-08-11 15:09:08 -04:00
parent 7db9e60601
commit ee4c2510eb
63 changed files with 2213 additions and 2862 deletions
--- a/libs/standard-tests/README.md
+++ b/libs/standard-tests/README.md
@@ -14,62 +14,68 @@ also break your CI if we introduce tests that your integration doesn't pass.

 Pip:

-    ```bash
-    pip install -U langchain-tests
-    ```
+```bash
+pip install -U langchain-tests
+```

 Poetry:

-    ```bash
-    poetry add langchain-tests
-    ```
+```bash
+poetry add langchain-tests
+```
+
+uv:
+
+```bash
+uv add langchain-tests
+```

 ## Usage

-To add standard tests to an integration package's e.g. ChatModel, you need to create
+To add standard tests to an integration package (e.g., for a ChatModel), you need to create

-1. A unit test class that inherits from ChatModelUnitTests
-2. An integration test class that inherits from ChatModelIntegrationTests
+1. A unit test class that inherits from `ChatModelUnitTests`
+2. An integration test class that inherits from `ChatModelIntegrationTests`

 `tests/unit_tests/test_standard.py`:

-    ```python
-    """Standard LangChain interface tests"""
+```python
+"""Standard LangChain interface tests"""

-    from typing import Type
+from typing import Type

-    import pytest
-    from langchain_core.language_models import BaseChatModel
-    from langchain_tests.unit_tests import ChatModelUnitTests
+import pytest
+from langchain_core.language_models import BaseChatModel
+from langchain_tests.unit_tests import ChatModelUnitTests

-    from langchain_parrot_chain import ChatParrotChain
+from langchain_parrot_chain import ChatParrotChain


-    class TestParrotChainStandard(ChatModelUnitTests):
-        @pytest.fixture
-        def chat_model_class(self) -> Type[BaseChatModel]:
-            return ChatParrotChain
-    ```
+class TestParrotChainStandard(ChatModelUnitTests):
+    @pytest.fixture
+    def chat_model_class(self) -> Type[BaseChatModel]:
+        return ChatParrotChain
+```

 `tests/integration_tests/test_standard.py`:

-    ```python
-    """Standard LangChain interface tests"""
+```python
+"""Standard LangChain interface tests"""

-    from typing import Type
+from typing import Type

-    import pytest
-    from langchain_core.language_models import BaseChatModel
-    from langchain_tests.integration_tests import ChatModelIntegrationTests
+import pytest
+from langchain_core.language_models import BaseChatModel
+from langchain_tests.integration_tests import ChatModelIntegrationTests

-    from langchain_parrot_chain import ChatParrotChain
+from langchain_parrot_chain import ChatParrotChain


-    class TestParrotChainStandard(ChatModelIntegrationTests):
-        @pytest.fixture
-        def chat_model_class(self) -> Type[BaseChatModel]:
-            return ChatParrotChain
-    ```
+class TestParrotChainStandard(ChatModelIntegrationTests):
+    @pytest.fixture
+    def chat_model_class(self) -> Type[BaseChatModel]:
+        return ChatParrotChain
+```

 ## Reference

--- a/libs/standard-tests/langchain_tests/base.py
+++ b/libs/standard-tests/langchain_tests/base.py
@@ -9,7 +9,7 @@ class BaseStandardTests(ABC):

        :private:
        """
-        # find path to standard test implementations
+        # Find path to standard test implementations
        comparison_class = None

        def explore_bases(cls: type) -> None:
--- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py
+++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py
@@ -32,9 +32,7 @@ from pytest_benchmark.fixture import BenchmarkFixture  # type: ignore[import-unt
 from typing_extensions import TypedDict
 from vcr.cassette import Cassette

-from langchain_tests.unit_tests.chat_models import (
-    ChatModelTests,
-)
+from langchain_tests.unit_tests.chat_models import ChatModelTests
 from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION


@@ -110,6 +108,7 @@ def magic_function_no_args() -> int:
 def _validate_tool_call_message(message: BaseMessage) -> None:
    assert isinstance(message, AIMessage)
    assert len(message.tool_calls) == 1
+
    tool_call = message.tool_calls[0]
    assert tool_call["name"] == "magic_function"
    assert tool_call["args"] == {"input": 3}
@@ -120,6 +119,7 @@ def _validate_tool_call_message(message: BaseMessage) -> None:
 def _validate_tool_call_message_no_args(message: BaseMessage) -> None:
    assert isinstance(message, AIMessage)
    assert len(message.tool_calls) == 1
+
    tool_call = message.tool_calls[0]
    assert tool_call["name"] == "magic_function_no_args"
    assert tool_call["args"] == {}
@@ -137,6 +137,7 @@ def unicode_customer(customer_name: str, description: str) -> str:

    Returns:
        A confirmation message about the customer creation.
+
    """
    return f"Created customer: {customer_name} - {description}"

@@ -173,7 +174,7 @@ class ChatModelIntegrationTests(ChatModelTests):
          API references for individual test methods include troubleshooting tips.


-    Test subclasses must implement the following two properties:
+    Test subclasses **must** implement the following two properties:

    chat_model_class
        The chat model class to test, e.g., ``ChatParrotLink``.
@@ -426,10 +427,10 @@ class ChatModelIntegrationTests(ChatModelTests):
    .. dropdown:: returns_usage_metadata

        Boolean property indicating whether the chat model returns usage metadata
-        on invoke and streaming responses.
+        on invoke and streaming responses. Defaults to ``True``.

-        ``usage_metadata`` is an optional dict attribute on AIMessages that track input
-        and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
+        ``usage_metadata`` is an optional dict attribute on ``AIMessage``s that track input
+        and output tokens. `See more. <https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html>`__

        Example:

@@ -440,7 +441,7 @@ class ChatModelIntegrationTests(ChatModelTests):
                return False

        Models supporting ``usage_metadata`` should also return the name of the
-        underlying model in the ``response_metadata`` of the AIMessage.
+        underlying model in the ``response_metadata`` of the ``AIMessage``.

    .. dropdown:: supports_anthropic_inputs

@@ -525,8 +526,8 @@ class ChatModelIntegrationTests(ChatModelTests):
        Property controlling what usage metadata details are emitted in both invoke
        and stream.

-        ``usage_metadata`` is an optional dict attribute on AIMessages that track input
-        and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
+        ``usage_metadata`` is an optional dict attribute on ``AIMessage``s that track input
+        and output tokens. `See more. <https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html>`__

        It includes optional keys ``input_token_details`` and ``output_token_details``
        that can track usage details associated with special types of tokens, such as
@@ -682,13 +683,13 @@ class ChatModelIntegrationTests(ChatModelTests):
        return {}

    def test_invoke(self, model: BaseChatModel) -> None:
-        """Test to verify that `model.invoke(simple_message)` works.
+        """Test to verify that ``model.invoke(simple_message)`` works.

        This should pass for all integrations.

        .. dropdown:: Troubleshooting

-            If this test fails, you should make sure your _generate method
+            If this test fails, you should make sure your ``_generate`` method
            does not raise any exceptions, and that it returns a valid
            :class:`~langchain_core.outputs.chat_result.ChatResult` like so:

@@ -708,7 +709,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        assert len(result.content) > 0

    async def test_ainvoke(self, model: BaseChatModel) -> None:
-        """Test to verify that `await model.ainvoke(simple_message)` works.
+        """Test to verify that ``await model.ainvoke(simple_message)`` works.

        This should pass for all integrations. Passing this test does not indicate
        a "natively async" implementation, but rather that the model can be used
@@ -718,7 +719,7 @@ class ChatModelIntegrationTests(ChatModelTests):

            First, debug
            :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`.
-            because `ainvoke` has a default implementation that calls `invoke` in an
+            because ``ainvoke`` has a default implementation that calls ``invoke`` in an
            async context.

            If that test passes but not this one, you should make sure your _agenerate
@@ -741,7 +742,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        assert len(result.content) > 0

    def test_stream(self, model: BaseChatModel) -> None:
-        """Test to verify that `model.stream(simple_message)` works.
+        """Test to verify that ``model.stream(simple_message)`` works.

        This should pass for all integrations. Passing this test does not indicate
        a "streaming" implementation, but rather that the model can be used in a
@@ -751,10 +752,10 @@ class ChatModelIntegrationTests(ChatModelTests):

            First, debug
            :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`.
-            because `stream` has a default implementation that calls `invoke` and yields
-            the result as a single chunk.
+            because ``stream`` has a default implementation that calls ``invoke`` and
+            yields the result as a single chunk.

-            If that test passes but not this one, you should make sure your _stream
+            If that test passes but not this one, you should make sure your ``_stream``
            method does not raise any exceptions, and that it yields valid
            :class:`~langchain_core.outputs.chat_generation.ChatGenerationChunk`
            objects like so:
@@ -770,11 +771,12 @@ class ChatModelIntegrationTests(ChatModelTests):
        for chunk in model.stream("Hello"):
            assert chunk is not None
            assert isinstance(chunk, AIMessageChunk)
+            assert isinstance(chunk.content, (str, list))
            num_chunks += 1
        assert num_chunks > 0

    async def test_astream(self, model: BaseChatModel) -> None:
-        """Test to verify that `await model.astream(simple_message)` works.
+        """Test to verify that ``await model.astream(simple_message)`` works.

        This should pass for all integrations. Passing this test does not indicate
        a "natively async" or "streaming" implementation, but rather that the model can
@@ -786,11 +788,11 @@ class ChatModelIntegrationTests(ChatModelTests):
            :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_stream`.
            and
            :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke`.
-            because `astream` has a default implementation that calls `_stream` in an
-            async context if it is implemented, or `ainvoke` and yields the result as a
-            single chunk if not.
+            because ``astream`` has a default implementation that calls ``_stream`` in
+            an async context if it is implemented, or ``ainvoke`` and yields the result
+            as a single chunk if not.

-            If those tests pass but not this one, you should make sure your _astream
+            If those tests pass but not this one, you should make sure your ``_astream``
            method does not raise any exceptions, and that it yields valid
            :class:`~langchain_core.outputs.chat_generation.ChatGenerationChunk`
            objects like so:
@@ -811,7 +813,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        assert num_chunks > 0

    def test_batch(self, model: BaseChatModel) -> None:
-        """Test to verify that `model.batch([messages])` works.
+        """Test to verify that ``model.batch([messages])`` works.

        This should pass for all integrations. Tests the model's ability to process
        multiple prompts in a single batch.
@@ -820,12 +822,13 @@ class ChatModelIntegrationTests(ChatModelTests):

            First, debug
            :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`
-            because `batch` has a default implementation that calls `invoke` for each
-            message in the batch.
+            because ``batch`` has a default implementation that calls ``invoke`` for
+            each message in the batch.

-            If that test passes but not this one, you should make sure your `batch`
+            If that test passes but not this one, you should make sure your ``batch``
            method does not raise any exceptions, and that it returns a list of valid
            :class:`~langchain_core.messages.AIMessage` objects.
+
        """
        batch_results = model.batch(["Hello", "Hey"])
        assert batch_results is not None
@@ -838,7 +841,7 @@ class ChatModelIntegrationTests(ChatModelTests):
            assert len(result.content) > 0

    async def test_abatch(self, model: BaseChatModel) -> None:
-        """Test to verify that `await model.abatch([messages])` works.
+        """Test to verify that ``await model.abatch([messages])`` works.

        This should pass for all integrations. Tests the model's ability to process
        multiple prompts in a single batch asynchronously.
@@ -849,12 +852,13 @@ class ChatModelIntegrationTests(ChatModelTests):
            :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_batch`
            and
            :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_ainvoke`
-            because `abatch` has a default implementation that calls `ainvoke` for each
-            message in the batch.
+            because ``abatch`` has a default implementation that calls ``ainvoke`` for
+            each message in the batch.

-            If those tests pass but not this one, you should make sure your `abatch`
+            If those tests pass but not this one, you should make sure your ``abatch``
            method does not raise any exceptions, and that it returns a list of valid
            :class:`~langchain_core.messages.AIMessage` objects.
+
        """
        batch_results = await model.abatch(["Hello", "Hey"])
        assert batch_results is not None
@@ -877,18 +881,20 @@ class ChatModelIntegrationTests(ChatModelTests):

            First, debug
            :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`
-            because this test also uses `model.invoke()`.
+            because this test also uses ``model.invoke()``.

            If that test passes but not this one, you should verify that:
            1. Your model correctly processes the message history
            2. The model maintains appropriate context from previous messages
            3. The response is a valid :class:`~langchain_core.messages.AIMessage`
+
        """
        messages = [
            HumanMessage("hello"),
            AIMessage("hello"),
            HumanMessage("how are you"),
        ]
+
        result = model.invoke(messages)
        assert result is not None
        assert isinstance(result, AIMessage)
@@ -906,17 +912,17 @@ class ChatModelIntegrationTests(ChatModelTests):

            First, debug
            :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_invoke`
-            because this test also uses `model.invoke()`.
+            because this test also uses ``model.invoke()``.

            Second, debug
            :meth:`~langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_conversation`
            because this test is the "basic case" without double messages.

            If that test passes those but not this one, you should verify that:
-            1. Your model API can handle double messages, or the integration should
-               merge messages before sending them to the API.
+            1. Your model API can handle double messages, or the integration should merge messages before sending them to the API.
            2. The response is a valid :class:`~langchain_core.messages.AIMessage`
-        """
+
+        """  # noqa: E501
        messages = [
            SystemMessage("hello"),
            SystemMessage("hello"),
@@ -926,6 +932,7 @@ class ChatModelIntegrationTests(ChatModelTests):
            AIMessage("hello"),
            HumanMessage("how are you"),
        ]
+
        result = model.invoke(messages)
        assert result is not None
        assert isinstance(result, AIMessage)
@@ -940,13 +947,14 @@ class ChatModelIntegrationTests(ChatModelTests):

        .. versionchanged:: 0.3.17

-            Additionally check for the presence of `model_name` in the response
+            Additionally check for the presence of ``model_name`` in the response
            metadata, which is needed for usage tracking in callback handlers.

        .. dropdown:: Configuration

            By default, this test is run.
-            To disable this feature, set `returns_usage_metadata` to False in your
+
+            To disable this feature, set ``returns_usage_metadata`` to ``False`` in your
            test class:

            .. code-block:: python
@@ -957,7 +965,7 @@ class ChatModelIntegrationTests(ChatModelTests):
                        return False

            This test can also check the format of specific kinds of usage metadata
-            based on the `supported_usage_metadata_details` property. This property
+            based on the ``supported_usage_metadata_details`` property. This property
            should be configured as follows with the types of tokens that the model
            supports tracking:

@@ -988,7 +996,7 @@ class ChatModelIntegrationTests(ChatModelTests):

            If this test fails, first verify that your model returns
            :class:`~langchain_core.messages.ai.UsageMetadata` dicts
-            attached to the returned AIMessage object in `_generate`:
+            attached to the returned AIMessage object in ``_generate``:

            .. code-block:: python

@@ -1020,9 +1028,11 @@ class ChatModelIntegrationTests(ChatModelTests):
        """
        if not self.returns_usage_metadata:
            pytest.skip("Not implemented.")
+
        result = model.invoke("Hello")
        assert result is not None
        assert isinstance(result, AIMessage)
+
        assert result.usage_metadata is not None
        assert isinstance(result.usage_metadata["input_tokens"], int)
        assert isinstance(result.usage_metadata["output_tokens"], int)
@@ -1106,13 +1116,13 @@ class ChatModelIntegrationTests(ChatModelTests):

        .. versionchanged:: 0.3.17

-            Additionally check for the presence of `model_name` in the response
+            Additionally check for the presence of ``model_name`` in the response
            metadata, which is needed for usage tracking in callback handlers.

        .. dropdown:: Configuration

            By default, this test is run.
-            To disable this feature, set `returns_usage_metadata` to False in your
+            To disable this feature, set ``returns_usage_metadata`` to ``False`` in your
            test class:

            .. code-block:: python
@@ -1123,7 +1133,7 @@ class ChatModelIntegrationTests(ChatModelTests):
                        return False

            This test can also check the format of specific kinds of usage metadata
-            based on the `supported_usage_metadata_details` property. This property
+            based on the ``supported_usage_metadata_details`` property. This property
            should be configured as follows with the types of tokens that the model
            supports tracking:

@@ -1153,16 +1163,16 @@ class ChatModelIntegrationTests(ChatModelTests):

            If this test fails, first verify that your model yields
            :class:`~langchain_core.messages.ai.UsageMetadata` dicts
-            attached to the returned AIMessage object in `_stream`
+            attached to the returned AIMessage object in ``_stream``
            that sum up to the total usage metadata.

-            Note that `input_tokens` should only be included on one of the chunks
-            (typically the first or the last chunk), and the rest should have 0 or None
-            to avoid counting input tokens multiple times.
+            Note that ``input_tokens`` should only be included on one of the chunks
+            (typically the first or the last chunk), and the rest should have ``0`` or
+            ``None`` to avoid counting input tokens multiple times.

-            `output_tokens` typically count the number of tokens in each chunk, not the
-            sum. This test will pass as long as the sum of `output_tokens` across all
-            chunks is not 0.
+            ``output_tokens`` typically count the number of tokens in each chunk, not
+            the sum. This test will pass as long as the sum of ``output_tokens`` across
+            all chunks is not ``0``.

            .. code-block:: python

@@ -1198,6 +1208,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        """
        if not self.returns_usage_metadata:
            pytest.skip("Not implemented.")
+
        full: Optional[AIMessageChunk] = None
        for chunk in model.stream("Write me 2 haikus. Only include the haikus."):
            assert isinstance(chunk, AIMessageChunk)
@@ -1262,7 +1273,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        """Test that model does not fail when invoked with the ``stop`` parameter,
        which is a standard parameter for stopping generation at a certain token.

-        More on standard parameters here: https://python.langchain.com/docs/concepts/chat_models/#standard-parameters
+        `More on standard parameters <https://python.langchain.com/docs/concepts/chat_models/#standard-parameters>`__

        This should pass for all integrations.

@@ -1336,6 +1347,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
+
        tool_choice_value = None if not self.has_tool_choice else "any"
        # Emit warning if tool_choice_value property is overridden
        if inspect.getattr_static(
@@ -1410,6 +1422,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
+
        tool_choice_value = None if not self.has_tool_choice else "any"
        model_with_tools = model.bind_tools(
            [magic_function], tool_choice=tool_choice_value
@@ -1519,10 +1532,10 @@ class ChatModelIntegrationTests(ChatModelTests):

            If this test fails, check that:

-            1. The model can correctly handle message histories that include AIMessage objects with ``""`` content.
-            2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format.
-            3. The model can correctly handle ToolMessage objects with string content and arbitrary string values for ``tool_call_id``.
-        assert tool_call.get("type") == "tool_call"
+            1. The model can correctly handle message histories that include ``AIMessage`` objects with ``""`` content.
+            2. The ``tool_calls`` attribute on ``AIMessage`` objects is correctly handled and passed to the model in an appropriate format.
+            3. The model can correctly handle ``ToolMessage`` objects with string content and arbitrary string values for ``tool_call_id``.
+
            You can ``xfail`` the test if tool calling is implemented but this format
            is not supported.

@@ -1535,6 +1548,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        """  # noqa: E501
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
+
        model_with_tools = model.bind_tools([my_adder_tool])
        function_name = "my_adder_tool"
        function_args = {"a": "1", "b": "2"}
@@ -1570,7 +1584,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        """Test that message histories are compatible with list tool contents
        (e.g. Anthropic format).

-        These message histories will include AIMessage objects with "tool use" and
+        These message histories will include ``AIMessage`` objects with "tool use" and
        content blocks, e.g.,

        .. code-block:: python
@@ -1604,8 +1618,8 @@ class ChatModelIntegrationTests(ChatModelTests):

            If this test fails, check that:

-            1. The model can correctly handle message histories that include AIMessage objects with list content.
-            2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format.
+            1. The model can correctly handle message histories that include ``AIMessage`` objects with list content.
+            2. The ``tool_calls`` attribute on ``AIMessage`` objects is correctly handled and passed to the model in an appropriate format.
            3. The model can correctly handle ToolMessage objects with string content and arbitrary string values for ``tool_call_id``.

            You can ``xfail`` the test if tool calling is implemented but this format
@@ -1620,6 +1634,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        """  # noqa: E501
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
+
        model_with_tools = model.bind_tools([my_adder_tool])
        function_name = "my_adder_tool"
        function_args = {"a": 1, "b": 2}
@@ -1692,7 +1707,7 @@ class ChatModelIntegrationTests(ChatModelTests):
            pytest.skip("Test requires tool choice.")

        @tool
-        def get_weather(location: str) -> str:  # pylint: disable=unused-argument
+        def get_weather(location: str) -> str:
            """Get weather at a location."""
            return "It's sunny."

@@ -1750,6 +1765,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        """  # noqa: E501
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
+
        tool_choice_value = None if not self.has_tool_choice else "any"
        model_with_tools = model.bind_tools(
            [magic_function_no_args], tool_choice=tool_choice_value
@@ -1767,7 +1783,7 @@ class ChatModelIntegrationTests(ChatModelTests):
    def test_tool_message_error_status(
        self, model: BaseChatModel, my_adder_tool: BaseTool
    ) -> None:
-        """Test that ToolMessage with ``status="error"`` can be handled.
+        """Test that ``ToolMessage`` with ``status="error"`` can be handled.

        These messages may take the form:

@@ -1806,6 +1822,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
+
        model_with_tools = model.bind_tools([my_adder_tool])
        messages = [
            HumanMessage("What is 1 + 2"),
@@ -1860,8 +1877,9 @@ class ChatModelIntegrationTests(ChatModelTests):

        .. dropdown:: Troubleshooting

-            This test uses a utility function in ``langchain_core`` to generate a
-            sequence of messages representing "few-shot" examples: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.tool_example_to_messages.html
+            This test uses `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.tool_example_to_messages.html>`__
+            in ``langchain_core`` to generate a sequence of messages representing
+            "few-shot" examples.

            If this test fails, check that the model can correctly handle this
            sequence of messages.
@@ -1878,6 +1896,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        """
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
+
        model_with_tools = model.bind_tools([my_adder_tool], tool_choice="any")
        function_result = json.dumps({"result": 3})

@@ -1921,10 +1940,12 @@ class ChatModelIntegrationTests(ChatModelTests):

            If this test fails, ensure that the model's ``bind_tools`` method
            properly handles both JSON Schema and Pydantic V2 models.
-            ``langchain_core`` implements a utility function that will accommodate
-            most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html

-            See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
+            ``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
+            that will accommodate most formats.
+
+            See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
+            of ``with_structured_output``.

        """
        if not self.has_structured_output:
@@ -2000,10 +2021,12 @@ class ChatModelIntegrationTests(ChatModelTests):

            If this test fails, ensure that the model's ``bind_tools`` method
            properly handles both JSON Schema and Pydantic V2 models.
-            ``langchain_core`` implements a utility function that will accommodate
-            most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html

-            See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
+            ``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
+            that will accommodate most formats.
+
+            See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
+            of ``with_structured_output``.

        """
        if not self.has_structured_output:
@@ -2052,10 +2075,9 @@ class ChatModelIntegrationTests(ChatModelTests):

    @pytest.mark.skipif(PYDANTIC_MAJOR_VERSION != 2, reason="Test requires pydantic 2.")
    def test_structured_output_pydantic_2_v1(self, model: BaseChatModel) -> None:
-        """Test to verify we can generate structured output using
-        pydantic.v1.BaseModel.
+        """Test to verify we can generate structured output using ``pydantic.v1.BaseModel``.

-        pydantic.v1.BaseModel is available in the pydantic 2 package.
+        ``pydantic.v1.BaseModel`` is available in the Pydantic 2 package.

        This test is optional and should be skipped if the model does not support
        structured output (see Configuration below).
@@ -2079,12 +2101,14 @@ class ChatModelIntegrationTests(ChatModelTests):

            If this test fails, ensure that the model's ``bind_tools`` method
            properly handles both JSON Schema and Pydantic V1 models.
-            ``langchain_core`` implements a utility function that will accommodate
-            most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html

-            See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
+            ``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
+            that will accommodate most formats.

-        """
+            See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
+            of ``with_structured_output``.
+
+        """  # noqa: E501
        if not self.has_structured_output:
            pytest.skip("Test requires structured output.")

@@ -2141,10 +2165,12 @@ class ChatModelIntegrationTests(ChatModelTests):

            If this test fails, ensure that the model's ``bind_tools`` method
            properly handles Pydantic V2 models with optional parameters.
-            ``langchain_core`` implements a utility function that will accommodate
-            most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html

-            See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
+            ``langchain_core`` implements `a utility function <https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html>`__
+            that will accommodate most formats.
+
+            See `example implementation <https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output>`__
+            of ``with_structured_output``.

        """
        if not self.has_structured_output:
@@ -2225,7 +2251,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        # Type ignoring since the interface only officially supports pydantic 1
        # or pydantic.v1.BaseModel but not pydantic.BaseModel from pydantic 2.
        # We'll need to do a pass updating the type signatures.
-        chat = model.with_structured_output(Joke, method="json_mode")  # type: ignore[arg-type]
+        chat = model.with_structured_output(Joke, method="json_mode")
        msg = (
            "Tell me a joke about cats. Return the result as a JSON with 'setup' and "
            "'punchline' keys. Return nothing other than JSON."
@@ -2288,6 +2314,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        """
        if not self.supports_pdf_inputs:
            pytest.skip("Model does not support PDF inputs.")
+
        url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
        pdf_data = base64.b64encode(httpx.get(url).content).decode("utf-8")

@@ -2364,6 +2391,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        """
        if not self.supports_audio_inputs:
            pytest.skip("Model does not support audio inputs.")
+
        url = "https://upload.wikimedia.org/wikipedia/commons/3/3d/Alcal%C3%A1_de_Henares_%28RPS_13-04-2024%29_canto_de_ruise%C3%B1or_%28Luscinia_megarhynchos%29_en_el_Soto_del_Henares.wav"
        audio_data = base64.b64encode(httpx.get(url).content).decode("utf-8")

@@ -2465,6 +2493,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        """
        if not self.supports_image_inputs:
            pytest.skip("Model does not support image message.")
+
        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
        image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")

@@ -2572,6 +2601,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        """
        if not self.supports_image_tool_message:
            pytest.skip("Model does not support image tool message.")
+
        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
        image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8")

@@ -2687,7 +2717,7 @@ class ChatModelIntegrationTests(ChatModelTests):

            1. The model can correctly handle message histories that include message objects with list content.
            2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format.
-            3. HumanMessages with "tool_result" content blocks are correctly handled.
+            3. ``HumanMessage``s with "tool_result" content blocks are correctly handled.

            Otherwise, if Anthropic tool call and result formats are not supported,
            set the ``supports_anthropic_inputs`` property to False.
@@ -2793,7 +2823,7 @@ class ChatModelIntegrationTests(ChatModelTests):
        assert isinstance(response, AIMessage)

    def test_message_with_name(self, model: BaseChatModel) -> None:
-        """Test that HumanMessage with values for the ``name`` field can be handled.
+        """Test that ``HumanMessage`` with values for the ``name`` field can be handled.

        These messages may take the form:

@@ -2842,7 +2872,7 @@ class ChatModelIntegrationTests(ChatModelTests):
            chat model.

            Check also that all required information (e.g., tool calling identifiers)
-            from AIMessage objects is propagated correctly to model payloads.
+            from ``AIMessage`` objects is propagated correctly to model payloads.

            This test may fail if the chat model does not consistently generate tool
            calls in response to an appropriate query. In these cases you can ``xfail``
@@ -2859,7 +2889,7 @@ class ChatModelIntegrationTests(ChatModelTests):
            pytest.skip("Test requires tool calling.")

        @tool
-        def get_weather(location: str) -> str:  # pylint: disable=unused-argument
+        def get_weather(location: str) -> str:
            """Call to surf the web."""
            return "It's sunny."

@@ -2953,12 +2983,13 @@ class ChatModelIntegrationTests(ChatModelTests):

        Args:
            model: The chat model to test
-            tool_choice: Tool choice parameter to pass to bind_tools (provider-specific)
-            force_tool_call: Whether to force a tool call (use tool_choice=True if None)
+            tool_choice: Tool choice parameter to pass to ``bind_tools()`` (provider-specific)
+            force_tool_call: Whether to force a tool call (use ``tool_choice=True`` if None)

        Tests that Unicode characters in tool call arguments are preserved correctly,
-        not escaped as \\uXXXX sequences.
-        """
+        not escaped as ``\\uXXXX`` sequences.
+
+        """  # noqa: E501
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling support.")

--- a/libs/standard-tests/langchain_tests/unit_tests/chat_models.py
+++ b/libs/standard-tests/langchain_tests/unit_tests/chat_models.py
@@ -12,17 +12,11 @@ from langchain_core.load import dumpd, load
 from langchain_core.runnables import RunnableBinding
 from langchain_core.tools import BaseTool, tool
 from pydantic import BaseModel, Field, SecretStr
-from pydantic.v1 import (
-    BaseModel as BaseModelV1,
-)
-from pydantic.v1 import (
-    Field as FieldV1,
-)
-from pydantic.v1 import (
-    ValidationError as ValidationErrorV1,
-)
+from pydantic.v1 import BaseModel as BaseModelV1
+from pydantic.v1 import Field as FieldV1
+from pydantic.v1 import ValidationError as ValidationErrorV1
 from pytest_benchmark.fixture import BenchmarkFixture  # type: ignore[import-untyped]
-from syrupy import SnapshotAssertion
+from syrupy.assertion import SnapshotAssertion

 from langchain_tests.base import BaseStandardTests
 from langchain_tests.utils.pydantic import PYDANTIC_MAJOR_VERSION
@@ -32,6 +26,7 @@ def generate_schema_pydantic_v1_from_2() -> Any:
    """Use to generate a schema from v1 namespace in pydantic 2.

    :private:
+
    """
    if PYDANTIC_MAJOR_VERSION != 2:
        msg = "This function is only compatible with Pydantic v2."
@@ -50,6 +45,7 @@ def generate_schema_pydantic() -> Any:
    """Works with either pydantic 1 or 2.

    :private:
+
    """

    class PersonA(BaseModel):
@@ -71,6 +67,7 @@ class ChatModelTests(BaseStandardTests):
    """Base class for chat model tests.

    :private:
+
    """

    @property
@@ -154,16 +151,12 @@ class ChatModelTests(BaseStandardTests):

    @property
    def supports_image_inputs(self) -> bool:
-        """(bool) whether the chat model supports image inputs, defaults to
-        ``False``.
-        """
+        """(bool) whether the chat model supports image inputs, defaults to ``False``."""  # noqa: E501
        return False

    @property
    def supports_image_urls(self) -> bool:
-        """(bool) whether the chat model supports image inputs from URLs, defaults to
-        ``False``.
-        """
+        """(bool) whether the chat model supports image inputs from URLs, defaults to ``False``."""  # noqa: E501
        return False

    @property
@@ -173,23 +166,21 @@ class ChatModelTests(BaseStandardTests):

    @property
    def supports_audio_inputs(self) -> bool:
-        """(bool) whether the chat model supports audio inputs, defaults to
-        ``False``.
-        """
+        """(bool) whether the chat model supports audio inputs, defaults to ``False``."""  # noqa: E501
        return False

    @property
    def supports_video_inputs(self) -> bool:
        """(bool) whether the chat model supports video inputs, defaults to ``False``.
+
        No current tests are written for this feature.
+
        """
        return False

    @property
    def returns_usage_metadata(self) -> bool:
-        """(bool) whether the chat model returns usage metadata on invoke and streaming
-        responses.
-        """
+        """(bool) whether the chat model returns usage metadata on invoke and streaming responses."""  # noqa: E501
        return True

    @property
@@ -199,9 +190,7 @@ class ChatModelTests(BaseStandardTests):

    @property
    def supports_image_tool_message(self) -> bool:
-        """(bool) whether the chat model supports ToolMessages that include image
-        content.
-        """
+        """(bool) whether the chat model supports ``ToolMessage``s that include image content."""  # noqa: E501
        return False

    @property
@@ -211,6 +200,7 @@ class ChatModelTests(BaseStandardTests):
        .. important::
            See ``enable_vcr_tests`` dropdown :class:`above <ChatModelTests>` for more
            information.
+
        """
        return False

@@ -267,7 +257,7 @@ class ChatModelUnitTests(ChatModelTests):
          API references for individual test methods include troubleshooting tips.


-    Test subclasses must implement the following two properties:
+    Test subclasses **must** implement the following two properties:

    chat_model_class
        The chat model class to test, e.g., ``ChatParrotLink``.
@@ -299,7 +289,7 @@ class ChatModelUnitTests(ChatModelTests):

        Boolean property indicating whether the chat model supports tool calling.

-        By default, this is determined by whether the chat model's `bind_tools` method
+        By default, this is determined by whether the chat model's ``bind_tools`` method
        is overridden. It typically does not need to be overridden on the test class.

        Example override:
@@ -401,7 +391,7 @@ class ChatModelUnitTests(ChatModelTests):
        Defaults to ``False``.

        If set to ``True``, the chat model will be tested using content blocks of the
-        form
+        form.

        .. code-block:: python

@@ -437,7 +427,7 @@ class ChatModelUnitTests(ChatModelTests):
        URLs. Defaults to ``False``.

        If set to ``True``, the chat model will be tested using content blocks of the
-        form
+        form.

        .. code-block:: python

@@ -463,7 +453,7 @@ class ChatModelUnitTests(ChatModelTests):
        Defaults to ``False``.

        If set to ``True``, the chat model will be tested using content blocks of the
-        form
+        form.

        .. code-block:: python

@@ -490,7 +480,7 @@ class ChatModelUnitTests(ChatModelTests):
        Defaults to ``False``.

        If set to ``True``, the chat model will be tested using content blocks of the
-        form
+        form.

        .. code-block:: python

@@ -519,10 +509,10 @@ class ChatModelUnitTests(ChatModelTests):
    .. dropdown:: returns_usage_metadata

        Boolean property indicating whether the chat model returns usage metadata
-        on invoke and streaming responses.
+        on invoke and streaming responses. Defaults to ``True``.

-        ``usage_metadata`` is an optional dict attribute on AIMessages that track input
-        and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
+        ``usage_metadata`` is an optional dict attribute on ``AIMessage``s that track input
+        and output tokens. `See more. <https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html>`__

        Example:

@@ -533,7 +523,7 @@ class ChatModelUnitTests(ChatModelTests):
                return False

        Models supporting ``usage_metadata`` should also return the name of the
-        underlying model in the ``response_metadata`` of the AIMessage.
+        underlying model in the ``response_metadata`` of the ``AIMessage``.

    .. dropdown:: supports_anthropic_inputs

@@ -567,7 +557,7 @@ class ChatModelUnitTests(ChatModelTests):

    .. dropdown:: supports_image_tool_message

-        Boolean property indicating whether the chat model supports ToolMessages
+        Boolean property indicating whether the chat model supports ``ToolMessage``s
        that include image content, e.g.,

        .. code-block:: python
@@ -615,11 +605,11 @@ class ChatModelUnitTests(ChatModelTests):

    .. dropdown:: supported_usage_metadata_details

-        Property controlling what usage metadata details are emitted in both invoke
-        and stream.
+        Property controlling what usage metadata details are emitted in both ``invoke``
+        and ``stream``.

-        ``usage_metadata`` is an optional dict attribute on AIMessages that track input
-        and output tokens: https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html
+        ``usage_metadata`` is an optional dict attribute on ``AIMessage``s that track input
+        and output tokens. `See more. <https://python.langchain.com/api_reference/core/messages/langchain_core.messages.ai.UsageMetadata.html>`__

        It includes optional keys ``input_token_details`` and ``output_token_details``
        that can track usage details associated with special types of tokens, such as
@@ -812,6 +802,7 @@ class ChatModelUnitTests(ChatModelTests):
    def init_from_env_params(self) -> tuple[dict, dict, dict]:
        """(tuple) environment variables, additional initialization args, and expected
        instance attributes for testing initialization from environment variables.
+
        """
        return {}, {}, {}

@@ -823,7 +814,8 @@ class ChatModelUnitTests(ChatModelTests):
            If this test fails, ensure that:

            1. ``chat_model_params`` is specified and the model can be initialized from those params;
-            2. The model accommodates standard parameters: https://python.langchain.com/docs/concepts/chat_models/#standard-parameters
+            2. The model accommodates `standard parameters <https://python.langchain.com/docs/concepts/chat_models/#standard-parameters>`__
+
        """  # noqa: E501
        model = self.chat_model_class(
            **{
@@ -843,6 +835,7 @@ class ChatModelUnitTests(ChatModelTests):
            If this test fails, ensure that ``init_from_env_params`` is specified
            correctly and that model parameters are properly set from environment
            variables during initialization.
+
        """
        env_params, model_params, expected_attrs = self.init_from_env_params
        if not env_params:
@@ -867,6 +860,7 @@ class ChatModelUnitTests(ChatModelTests):

            If this test fails, ensure that the model can be initialized with a
            boolean ``streaming`` parameter.
+
        """
        model = self.chat_model_class(
            **{
@@ -893,6 +887,7 @@ class ChatModelUnitTests(ChatModelTests):
            a utility function that will accommodate most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html

            See example implementation of ``bind_tools`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.bind_tools
+
        """
        if not self.has_tool_calling:
            return
@@ -933,6 +928,7 @@ class ChatModelUnitTests(ChatModelTests):
            a utility function that will accommodate most formats: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.convert_to_openai_tool.html

            See example implementation of ``with_structured_output`` here: https://python.langchain.com/api_reference/_modules/langchain_openai/chat_models/base.html#BaseChatOpenAI.with_structured_output
+
        """
        if not self.has_structured_output:
            return
@@ -955,6 +951,7 @@ class ChatModelUnitTests(ChatModelTests):

            Check also that the model class is named according to convention
            (e.g., ``ChatProviderName``).
+
        """

        class ExpectedParams(BaseModelV1):
@@ -992,6 +989,7 @@ class ChatModelUnitTests(ChatModelTests):

            If this test fails, check that the ``init_from_env_params`` property is
            correctly set on the test class.
+
        """
        if not self.chat_model_class.is_lc_serializable():
            pytest.skip("Model is not serializable.")
@@ -1011,6 +1009,7 @@ class ChatModelUnitTests(ChatModelTests):
    def test_init_time(self, benchmark: BenchmarkFixture) -> None:
        """Test initialization time of the chat model. If this test fails, check that
        we are not introducing undue overhead in the model's initialization.
+
        """

        def _init_in_loop() -> None:
--- a/libs/standard-tests/tests/unit_tests/custom_chat_model.py
+++ b/libs/standard-tests/tests/unit_tests/custom_chat_model.py
@@ -1,15 +1,9 @@
 from collections.abc import Iterator
 from typing import Any, Optional

-from langchain_core.callbacks import (
-    CallbackManagerForLLMRun,
-)
+from langchain_core.callbacks import CallbackManagerForLLMRun
 from langchain_core.language_models import BaseChatModel
-from langchain_core.messages import (
-    AIMessage,
-    AIMessageChunk,
-    BaseMessage,
-)
+from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage
 from langchain_core.messages.ai import UsageMetadata
 from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
 from pydantic import Field
--- a/libs/standard-tests/uv.lock
+++ b/libs/standard-tests/uv.lock
@@ -356,7 +356,7 @@ test = [
 test-integration = []
 typing = [
    { name = "langchain-text-splitters", directory = "../text-splitters" },
-    { name = "mypy", specifier = ">=1.15,<1.16" },
+    { name = "mypy", specifier = ">=1.17.1,<1.18" },
    { name = "types-pyyaml", specifier = ">=6.0.12.2,<7.0.0.0" },
    { name = "types-requests", specifier = ">=2.28.11.5,<3.0.0.0" },
 ]