multiple: enforce standards on tool_choice (#30372)

- Test if models support forcing tool calls via `tool_choice`. If they
do, they should support
  - `"any"` to specify any tool
  - the tool name as a string to force calling a particular tool
- Add `tool_choice` to signature of `BaseChatModel.bind_tools` in core
- Deprecate `tool_choice_value` in standard tests in favor of a boolean
`has_tool_choice`

Will follow up with PRs in external repos (tested in AWS and Google
already).
This commit is contained in:
ccurme
2025-03-20 13:48:59 -04:00
committed by GitHub
parent b86cd8270c
commit de3960d285
10 changed files with 190 additions and 56 deletions

View File

@@ -1,10 +1,12 @@
import base64
import inspect
import json
from typing import Any, List, Literal, Optional, cast
from unittest.mock import MagicMock
import httpx
import pytest
from langchain_core._api import warn_deprecated
from langchain_core.callbacks import BaseCallbackHandler
from langchain_core.language_models import BaseChatModel, GenericFakeChatModel
from langchain_core.messages import (
@@ -204,12 +206,12 @@ class ChatModelIntegrationTests(ChatModelTests):
Value to use for tool choice when used in tests.
Some tests for tool calling features attempt to force tool calling via a
`tool_choice` parameter. A common value for this parameter is "any". Defaults
to `None`.
Note: if the value is set to "tool_name", the name of the tool used in each
test will be set as the value for `tool_choice`.
.. warning:: Deprecated since version 0.3.15:
This property will be removed in version 0.3.20. If a model supports
``tool_choice``, it should accept ``tool_choice="any"`` and
``tool_choice=<string name of tool>``. If a model does not
support forcing tool calling, override the ``has_tool_choice`` property to
return ``False``.
Example:
@@ -219,6 +221,26 @@ class ChatModelIntegrationTests(ChatModelTests):
def tool_choice_value(self) -> Optional[str]:
return "any"
.. dropdown:: has_tool_choice
Boolean property indicating whether the chat model supports forcing tool
calling via a ``tool_choice`` parameter.
By default, this is determined by whether the parameter is included in the
signature for the corresponding ``bind_tools`` method.
If ``True``, the minimum requirement for this feature is that
``tool_choice="any"`` will force a tool call, and ``tool_choice=<tool name>``
will force a call to a specific tool.
Example override:
.. code-block:: python
@property
def has_tool_choice(self) -> bool:
return False
.. dropdown:: has_structured_output
Boolean property indicating whether the chat model supports structured
@@ -989,16 +1011,33 @@ class ChatModelIntegrationTests(ChatModelTests):
def test_tool_calling(self, model: BaseChatModel) -> None:
super().test_tool_calling(model)
Otherwise, ensure that the ``tool_choice_value`` property is correctly
specified on the test class.
Otherwise, in the case that only one tool is bound, ensure that
``tool_choice`` supports the string ``"any"`` to force calling that tool.
"""
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
if self.tool_choice_value == "tool_name":
tool_choice: Optional[str] = "magic_function"
if not self.has_tool_choice:
tool_choice_value = None
else:
tool_choice = self.tool_choice_value
model_with_tools = model.bind_tools([magic_function], tool_choice=tool_choice)
tool_choice_value = "any"
# Emit warning if tool_choice_value property is overridden
if inspect.getattr_static(
self, "tool_choice_value"
) is not inspect.getattr_static(ChatModelIntegrationTests, "tool_choice_value"):
warn_deprecated(
"0.3.15",
message=(
"`tool_choice_value` will be removed in version 0.3.20. If a "
"model supports `tool_choice`, it should accept `tool_choice='any' "
"and `tool_choice=<string name of tool>`. If the model does not "
"support `tool_choice`, override the `supports_tool_choice` "
"property to return `False`."
),
removal="0.3.20",
)
model_with_tools = model.bind_tools(
[magic_function], tool_choice=tool_choice_value
)
# Test invoke
query = "What is the value of magic_function(3)? Use the tool."
@@ -1012,6 +1051,57 @@ class ChatModelIntegrationTests(ChatModelTests):
assert isinstance(full, AIMessage)
_validate_tool_call_message(full)
def test_tool_choice(self, model: BaseChatModel) -> None:
"""Test that the model can force tool calling via the ``tool_choice``
parameter. This test is skipped if the ``has_tool_choice`` property on the
test class is set to False.
This test is optional and should be skipped if the model does not support
tool calling (see Configuration below).
.. dropdown:: Configuration
To disable tool calling tests, set ``has_tool_choice`` to False in your
test class:
.. code-block:: python
class TestMyChatModelIntegration(ChatModelIntegrationTests):
@property
def has_tool_choice(self) -> bool:
return False
.. dropdown:: Troubleshooting
If this test fails, check whether the ``test_tool_calling`` test is passing.
If it is not, refer to the troubleshooting steps in that test first.
If ``test_tool_calling`` is passing, check that the underlying model
supports forced tool calling. If it does, ``bind_tools`` should accept a
``tool_choice`` parameter that can be used to force a tool call.
It should accept (1) the string ``"any"`` to force calling the bound tool,
and (2) the string name of the tool to force calling that tool.
"""
if not self.has_tool_choice or not self.has_tool_calling:
pytest.skip("Test requires tool choice.")
@tool
def get_weather(location: str) -> str:
"""Get weather at a location."""
return "It's sunny."
for tool_choice in ["any", "magic_function"]:
model_with_tools = model.bind_tools(
[magic_function, get_weather], tool_choice=tool_choice
)
result = model_with_tools.invoke("Hello!")
assert isinstance(result, AIMessage)
assert result.tool_calls
if tool_choice == "magic_function":
assert result.tool_calls[0]["name"] == "magic_function"
async def test_tool_calling_async(self, model: BaseChatModel) -> None:
"""Test that the model generates tool calls. This test is skipped if the
``has_tool_calling`` property on the test class is set to False.
@@ -1048,16 +1138,18 @@ class ChatModelIntegrationTests(ChatModelTests):
async def test_tool_calling_async(self, model: BaseChatModel) -> None:
await super().test_tool_calling_async(model)
Otherwise, ensure that the ``tool_choice_value`` property is correctly
specified on the test class.
Otherwise, in the case that only one tool is bound, ensure that
``tool_choice`` supports the string ``"any"`` to force calling that tool.
"""
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
if self.tool_choice_value == "tool_name":
tool_choice: Optional[str] = "magic_function"
if not self.has_tool_choice:
tool_choice_value = None
else:
tool_choice = self.tool_choice_value
model_with_tools = model.bind_tools([magic_function], tool_choice=tool_choice)
tool_choice_value = "any"
model_with_tools = model.bind_tools(
[magic_function], tool_choice=tool_choice_value
)
# Test ainvoke
query = "What is the value of magic_function(3)? Use the tool."
@@ -1109,18 +1201,17 @@ class ChatModelIntegrationTests(ChatModelTests):
def test_tool_calling_with_no_arguments(self, model: BaseChatModel) -> None:
super().test_tool_calling_with_no_arguments(model)
Otherwise, ensure that the ``tool_choice_value`` property is correctly
specified on the test class.
Otherwise, in the case that only one tool is bound, ensure that
``tool_choice`` supports the string ``"any"`` to force calling that tool.
""" # noqa: E501
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")
if self.tool_choice_value == "tool_name":
tool_choice: Optional[str] = "magic_function_no_args"
if not self.has_tool_choice:
tool_choice_value = None
else:
tool_choice = self.tool_choice_value
tool_choice_value = "any"
model_with_tools = model.bind_tools(
[magic_function_no_args], tool_choice=tool_choice
[magic_function_no_args], tool_choice=tool_choice_value
)
query = "What is the value of magic_function_no_args()? Use the tool."
result = model_with_tools.invoke(query)
@@ -1184,10 +1275,10 @@ class ChatModelIntegrationTests(ChatModelTests):
name="greeting_generator",
description="Generate a greeting in a particular style of speaking.",
)
if self.tool_choice_value == "tool_name":
tool_choice: Optional[str] = "greeting_generator"
if not self.has_tool_choice:
tool_choice: Optional[str] = "any"
else:
tool_choice = self.tool_choice_value
tool_choice = None
model_with_tools = model.bind_tools([tool_], tool_choice=tool_choice)
query = "Using the tool, generate a Pirate greeting."
result = model_with_tools.invoke(query)
@@ -2086,9 +2177,6 @@ class ChatModelIntegrationTests(ChatModelTests):
If this test fails, check that the ``status`` field on ``ToolMessage``
objects is either ignored or passed to the model appropriately.
Otherwise, ensure that the ``tool_choice_value`` property is correctly
specified on the test class.
"""
if not self.has_tool_calling:
pytest.skip("Test requires tool calling.")

View File

@@ -2,6 +2,7 @@
:autodoc-options: autoproperty
"""
import inspect
import os
from abc import abstractmethod
from typing import Any, Dict, List, Literal, Optional, Tuple, Type
@@ -127,6 +128,14 @@ class ChatModelTests(BaseStandardTests):
"""(None or str) to use for tool choice when used in tests."""
return None
@property
def has_tool_choice(self) -> bool:
"""(bool) whether the model supports tool calling."""
bind_tools_params = inspect.signature(
self.chat_model_class.bind_tools
).parameters
return "tool_choice" in bind_tools_params
@property
def has_structured_output(self) -> bool:
"""(bool) whether the chat model supports structured output."""
@@ -273,12 +282,11 @@ class ChatModelUnitTests(ChatModelTests):
Value to use for tool choice when used in tests.
Some tests for tool calling features attempt to force tool calling via a
`tool_choice` parameter. A common value for this parameter is "any". Defaults
to `None`.
Note: if the value is set to "tool_name", the name of the tool used in each
test will be set as the value for `tool_choice`.
.. warning:: Deprecated since version 0.3.15:
This property will be removed in version 0.3.20. If a model does not
support forcing tool calling, override the ``has_tool_choice`` property to
return ``False``. Otherwise, models should accept values of ``"any"`` or
the name of a tool in ``tool_choice``.
Example:
@@ -288,6 +296,26 @@ class ChatModelUnitTests(ChatModelTests):
def tool_choice_value(self) -> Optional[str]:
return "any"
.. dropdown:: has_tool_choice
Boolean property indicating whether the chat model supports forcing tool
calling via a ``tool_choice`` parameter.
By default, this is determined by whether the parameter is included in the
signature for the corresponding ``bind_tools`` method.
If ``True``, the minimum requirement for this feature is that
``tool_choice="any"`` will force a tool call, and ``tool_choice=<tool name>``
will force a call to a specific tool.
Example override:
.. code-block:: python
@property
def has_tool_choice(self) -> bool:
return False
.. dropdown:: has_structured_output
Boolean property indicating whether the chat model supports structured