together, standard-tests: specify tool_choice in standard tests (#25548)

Here we allow standard tests to specify a value for `tool_choice` via a `tool_choice_value` property, which defaults to None. Chat models [available in Together](https://docs.together.ai/docs/chat-models) have issues passing standard tool calling tests: - llama 3.1 models currently [appear to rely on user-side parsing](https://docs.together.ai/docs/llama-3-function-calling) in Together; - Mixtral-8x7B and Mistral-7B (currently tested) consistently do not call tools in some tests. Specifying tool_choice also lets us remove an existing `xfail` and use a smaller model in Groq tests.
2025-09-16 23:13:31 +00:00 · 2024-08-19 16:37:36 -04:00
parent 015ab91b83
commit c5bf114c0f
6 changed files with 83 additions and 10 deletions
--- a/libs/standard-tests/langchain_standard_tests/integration_tests/chat_models.py
+++ b/libs/standard-tests/langchain_standard_tests/integration_tests/chat_models.py
@@ -170,7 +170,11 @@ class ChatModelIntegrationTests(ChatModelTests):
    def test_tool_calling(self, model: BaseChatModel) -> None:
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")
-        model_with_tools = model.bind_tools([magic_function])
+        if self.tool_choice_value == "tool_name":
+            tool_choice: Optional[str] = "magic_function"
+        else:
+            tool_choice = self.tool_choice_value
+        model_with_tools = model.bind_tools([magic_function], tool_choice=tool_choice)

        # Test invoke
        query = "What is the value of magic_function(3)? Use the tool."
@@ -188,7 +192,13 @@ class ChatModelIntegrationTests(ChatModelTests):
        if not self.has_tool_calling:
            pytest.skip("Test requires tool calling.")

-        model_with_tools = model.bind_tools([magic_function_no_args])
+        if self.tool_choice_value == "tool_name":
+            tool_choice: Optional[str] = "magic_function_no_args"
+        else:
+            tool_choice = self.tool_choice_value
+        model_with_tools = model.bind_tools(
+            [magic_function_no_args], tool_choice=tool_choice
+        )
        query = "What is the value of magic_function()? Use the tool."
        result = model_with_tools.invoke(query)
        _validate_tool_call_message_no_args(result)
@@ -212,7 +222,11 @@ class ChatModelIntegrationTests(ChatModelTests):
            name="greeting_generator",
            description="Generate a greeting in a particular style of speaking.",
        )
-        model_with_tools = model.bind_tools([tool_])
+        if self.tool_choice_value == "tool_name":
+            tool_choice: Optional[str] = "greeting_generator"
+        else:
+            tool_choice = self.tool_choice_value
+        model_with_tools = model.bind_tools([tool_], tool_choice=tool_choice)
        query = "Using the tool, generate a Pirate greeting."
        result = model_with_tools.invoke(query)
        assert isinstance(result, AIMessage)
--- a/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py
+++ b/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py
@@ -96,6 +96,11 @@ class ChatModelTests(BaseStandardTests):
    def has_tool_calling(self) -> bool:
        return self.chat_model_class.bind_tools is not BaseChatModel.bind_tools

+    @property
+    def tool_choice_value(self) -> Optional[str]:
+        """Value to use for tool choice when used in tests."""
+        return None
+
    @property
    def has_structured_output(self) -> bool:
        return (