From 6d71b6b6ee7433716a59e73c8e859737800a0a86 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 23 Jun 2025 19:22:31 -0400 Subject: [PATCH] standard-tests: refactoring and fixes (#31703) - `libs/core/langchain_core/messages/base.py`: add model name to examples [per docs](https://python.langchain.com/api_reference/standard_tests/integration_tests/langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.html#langchain_tests.integration_tests.chat_models.ChatModelIntegrationTests.test_usage_metadata) ("0.3.17: Additionally check for the presence of model_name in the response metadata, which is needed for usage tracking in callback handlers") - `libs/core/langchain_core/utils/function_calling.py`: correct typo - `libs/standard-tests/langchain_tests/integration_tests/chat_models.py`: - `magic_function(input)` -> `magic_function(_input)` to prevent warning about redefining built in `input` - relocate a few tests for better grouping and narrative flow - suppress some type hint warnings following suit from similar tests - fix a few more typos - validate not only that `model_name` is defined, but that it is not empty (test_usage_metadata) --- libs/core/langchain_core/messages/base.py | 3 +- .../langchain_core/utils/function_calling.py | 4 +- .../integration_tests/chat_models.py | 953 +++++++++--------- 3 files changed, 490 insertions(+), 470 deletions(-) diff --git a/libs/core/langchain_core/messages/base.py b/libs/core/langchain_core/messages/base.py index 4bd56b4cf7f..ba976286b75 100644 --- a/libs/core/langchain_core/messages/base.py +++ b/libs/core/langchain_core/messages/base.py @@ -34,7 +34,8 @@ class BaseMessage(Serializable): """ response_metadata: dict = Field(default_factory=dict) - """Response metadata. For example: response headers, logprobs, token counts.""" + """Response metadata. For example: response headers, logprobs, token counts, model + name.""" type: str """The type of the message. Must be a string that is unique to the message type. diff --git a/libs/core/langchain_core/utils/function_calling.py b/libs/core/langchain_core/utils/function_calling.py index 0c3e2b88cdb..729898fe9da 100644 --- a/libs/core/langchain_core/utils/function_calling.py +++ b/libs/core/langchain_core/utils/function_calling.py @@ -634,7 +634,7 @@ def tool_example_to_messages( 1) HumanMessage: contains the content from which content should be extracted. 2) AIMessage: contains the extracted information from the model 3) ToolMessage: contains confirmation to the model that the model requested a tool - correctly. + correctly. If `ai_response` is specified, there will be a final AIMessage with that response. @@ -668,7 +668,7 @@ def tool_example_to_messages( ..., description="The color of the person's hair if known" ) height_in_meters: Optional[str] = Field( - ..., description="Height in METERs" + ..., description="Height in METERS" ) examples = [ diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py index 0a699e1c828..90b7777df07 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py +++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py @@ -98,9 +98,9 @@ class _MagicFunctionSchema(BaseModel): @tool(args_schema=_MagicFunctionSchema) -def magic_function(input: int) -> int: +def magic_function(_input: int) -> int: """Applies a magic function to an input.""" - return input + 2 + return _input + 2 @tool @@ -116,7 +116,7 @@ def _validate_tool_call_message(message: BaseMessage) -> None: assert tool_call["name"] == "magic_function" assert tool_call["args"] == {"input": 3} assert tool_call["id"] is not None - assert tool_call["type"] == "tool_call" + assert tool_call.get("type") == "tool_call" def _validate_tool_call_message_no_args(message: BaseMessage) -> None: @@ -126,7 +126,7 @@ def _validate_tool_call_message_no_args(message: BaseMessage) -> None: assert tool_call["name"] == "magic_function_no_args" assert tool_call["args"] == {} assert tool_call["id"] is not None - assert tool_call["type"] == "tool_call" + assert tool_call.get("type") == "tool_call" class ChatModelIntegrationTests(ChatModelTests): @@ -750,12 +750,12 @@ class ChatModelIntegrationTests(ChatModelTests): message=AIMessageChunk(content="chunk text") ) """ - num_tokens = 0 - for token in model.stream("Hello"): - assert token is not None - assert isinstance(token, AIMessageChunk) - num_tokens += len(token.content) - assert num_tokens > 0 + num_chunks = 0 + for chunk in model.stream("Hello"): + assert chunk is not None + assert isinstance(chunk, AIMessageChunk) + num_chunks += 1 + assert num_chunks > 0 async def test_astream(self, model: BaseChatModel) -> None: """Test to verify that `await model.astream(simple_message)` works. @@ -785,12 +785,13 @@ class ChatModelIntegrationTests(ChatModelTests): message=AIMessageChunk(content="chunk text") ) """ - num_tokens = 0 - async for token in model.astream("Hello"): - assert token is not None - assert isinstance(token, AIMessageChunk) - num_tokens += len(token.content) - assert num_tokens > 0 + num_chunks = 0 + async for chunk in model.astream("Hello"): + assert chunk is not None + assert isinstance(chunk, AIMessageChunk) + assert isinstance(chunk.content, (str, list)) + num_chunks += 1 + assert num_chunks > 0 def test_batch(self, model: BaseChatModel) -> None: """Test to verify that `model.batch([messages])` works. @@ -1014,62 +1015,72 @@ class ChatModelIntegrationTests(ChatModelTests): # Needed for langchain_core.callbacks.usage model_name = result.response_metadata.get("model_name") assert isinstance(model_name, str) - assert model_name + assert model_name != "", "model_name is empty" + # `input_tokens` is the total, possibly including other unclassified or + # system-level tokens. if "audio_input" in self.supported_usage_metadata_details["invoke"]: - msg = self.invoke_with_audio_input() - assert msg.usage_metadata is not None - assert msg.usage_metadata["input_token_details"] is not None - assert isinstance(msg.usage_metadata["input_token_details"]["audio"], int) - assert msg.usage_metadata["input_tokens"] >= sum( - (v or 0) # type: ignore[misc] - for v in msg.usage_metadata["input_token_details"].values() + # Checks if the specific chat model integration being tested has declared + # that it supports reporting token counts specifically for `audio_input` + msg = self.invoke_with_audio_input() # To be implemented in test subclass + assert (usage_metadata := msg.usage_metadata) is not None + assert ( + input_token_details := usage_metadata.get("input_token_details") + ) is not None + assert isinstance(input_token_details.get("audio"), int) + # Asserts that total input tokens are at least the sum of the token counts + total_detailed_tokens = sum( + v for v in input_token_details.values() if isinstance(v, int) ) + assert usage_metadata.get("input_tokens", 0) >= total_detailed_tokens if "audio_output" in self.supported_usage_metadata_details["invoke"]: msg = self.invoke_with_audio_output() - assert msg.usage_metadata is not None - assert msg.usage_metadata["output_token_details"] is not None - assert isinstance(msg.usage_metadata["output_token_details"]["audio"], int) - assert int(msg.usage_metadata["output_tokens"]) >= sum( - (v or 0) # type: ignore[misc] - for v in msg.usage_metadata["output_token_details"].values() + assert (usage_metadata := msg.usage_metadata) is not None + assert ( + output_token_details := usage_metadata.get("output_token_details") + ) is not None + assert isinstance(output_token_details.get("audio"), int) + # Asserts that total output tokens are at least the sum of the token counts + total_detailed_tokens = sum( + v for v in output_token_details.values() if isinstance(v, int) ) + assert usage_metadata.get("output_tokens", 0) >= total_detailed_tokens if "reasoning_output" in self.supported_usage_metadata_details["invoke"]: msg = self.invoke_with_reasoning_output() - assert msg.usage_metadata is not None - assert msg.usage_metadata["output_token_details"] is not None - assert isinstance( - msg.usage_metadata["output_token_details"]["reasoning"], - int, - ) - assert msg.usage_metadata["output_tokens"] >= sum( - (v or 0) # type: ignore[misc] - for v in msg.usage_metadata["output_token_details"].values() + assert (usage_metadata := msg.usage_metadata) is not None + assert ( + output_token_details := usage_metadata.get("output_token_details") + ) is not None + assert isinstance(output_token_details.get("reasoning"), int) + # Asserts that total output tokens are at least the sum of the token counts + total_detailed_tokens = sum( + v for v in output_token_details.values() if isinstance(v, int) ) + assert usage_metadata.get("output_tokens", 0) >= total_detailed_tokens if "cache_read_input" in self.supported_usage_metadata_details["invoke"]: msg = self.invoke_with_cache_read_input() - assert msg.usage_metadata is not None - assert msg.usage_metadata["input_token_details"] is not None - assert isinstance( - msg.usage_metadata["input_token_details"]["cache_read"], - int, - ) - assert msg.usage_metadata["input_tokens"] >= sum( - (v or 0) # type: ignore[misc] - for v in msg.usage_metadata["input_token_details"].values() + assert (usage_metadata := msg.usage_metadata) is not None + assert ( + input_token_details := usage_metadata.get("input_token_details") + ) is not None + assert isinstance(input_token_details.get("cache_read"), int) + # Asserts that total input tokens are at least the sum of the token counts + total_detailed_tokens = sum( + v for v in input_token_details.values() if isinstance(v, int) ) + assert usage_metadata.get("input_tokens", 0) >= total_detailed_tokens if "cache_creation_input" in self.supported_usage_metadata_details["invoke"]: msg = self.invoke_with_cache_creation_input() - assert msg.usage_metadata is not None - assert msg.usage_metadata["input_token_details"] is not None - assert isinstance( - msg.usage_metadata["input_token_details"]["cache_creation"], - int, - ) - assert msg.usage_metadata["input_tokens"] >= sum( - (v or 0) # type: ignore[misc] - for v in msg.usage_metadata["input_token_details"].values() + assert (usage_metadata := msg.usage_metadata) is not None + assert ( + input_token_details := usage_metadata.get("input_token_details") + ) is not None + assert isinstance(input_token_details.get("cache_creation"), int) + # Asserts that total input tokens are at least the sum of the token counts + total_detailed_tokens = sum( + v for v in input_token_details.values() if isinstance(v, int) ) + assert usage_metadata.get("input_tokens", 0) >= total_detailed_tokens def test_usage_metadata_streaming(self, model: BaseChatModel) -> None: """ @@ -1173,7 +1184,8 @@ class ChatModelIntegrationTests(ChatModelTests): assert isinstance(chunk, AIMessageChunk) # only one chunk is allowed to set usage_metadata.input_tokens # if multiple do, it's likely a bug that will result in overcounting - # input tokens + # input tokens (since the total number of input tokens applies to the full + # generation, not individual chunks) if full and full.usage_metadata and full.usage_metadata["input_tokens"]: assert ( not chunk.usage_metadata or not chunk.usage_metadata["input_tokens"] @@ -1193,30 +1205,37 @@ class ChatModelIntegrationTests(ChatModelTests): # Needed for langchain_core.callbacks.usage model_name = full.response_metadata.get("model_name") assert isinstance(model_name, str) - assert model_name + assert model_name != "", "model_name is empty" if "audio_input" in self.supported_usage_metadata_details["stream"]: msg = self.invoke_with_audio_input(stream=True) - assert isinstance(msg.usage_metadata["input_token_details"]["audio"], int) # type: ignore[index] + assert msg.usage_metadata is not None + assert isinstance( + msg.usage_metadata.get("input_token_details", {}).get("audio"), int + ) if "audio_output" in self.supported_usage_metadata_details["stream"]: msg = self.invoke_with_audio_output(stream=True) - assert isinstance(msg.usage_metadata["output_token_details"]["audio"], int) # type: ignore[index] + assert msg.usage_metadata is not None + assert isinstance( + msg.usage_metadata.get("output_token_details", {}).get("audio"), int + ) if "reasoning_output" in self.supported_usage_metadata_details["stream"]: msg = self.invoke_with_reasoning_output(stream=True) + assert msg.usage_metadata is not None assert isinstance( - msg.usage_metadata["output_token_details"]["reasoning"], # type: ignore[index] - int, + msg.usage_metadata.get("output_token_details", {}).get("reasoning"), int ) if "cache_read_input" in self.supported_usage_metadata_details["stream"]: msg = self.invoke_with_cache_read_input(stream=True) + assert msg.usage_metadata is not None assert isinstance( - msg.usage_metadata["input_token_details"]["cache_read"], # type: ignore[index] - int, + msg.usage_metadata.get("input_token_details", {}).get("cache_read"), int ) if "cache_creation_input" in self.supported_usage_metadata_details["stream"]: msg = self.invoke_with_cache_creation_input(stream=True) + assert msg.usage_metadata is not None assert isinstance( - msg.usage_metadata["input_token_details"]["cache_creation"], # type: ignore[index] + msg.usage_metadata.get("input_token_details", {}).get("cache_creation"), int, ) @@ -1331,57 +1350,6 @@ class ChatModelIntegrationTests(ChatModelTests): assert isinstance(full, AIMessage) _validate_tool_call_message(full) - def test_tool_choice(self, model: BaseChatModel) -> None: - """Test that the model can force tool calling via the ``tool_choice`` - parameter. This test is skipped if the ``has_tool_choice`` property on the - test class is set to False. - - This test is optional and should be skipped if the model does not support - tool calling (see Configuration below). - - .. dropdown:: Configuration - - To disable tool calling tests, set ``has_tool_choice`` to False in your - test class: - - .. code-block:: python - - class TestMyChatModelIntegration(ChatModelIntegrationTests): - @property - def has_tool_choice(self) -> bool: - return False - - .. dropdown:: Troubleshooting - - If this test fails, check whether the ``test_tool_calling`` test is passing. - If it is not, refer to the troubleshooting steps in that test first. - - If ``test_tool_calling`` is passing, check that the underlying model - supports forced tool calling. If it does, ``bind_tools`` should accept a - ``tool_choice`` parameter that can be used to force a tool call. - - It should accept (1) the string ``"any"`` to force calling the bound tool, - and (2) the string name of the tool to force calling that tool. - - """ - if not self.has_tool_choice or not self.has_tool_calling: - pytest.skip("Test requires tool choice.") - - @tool - def get_weather(location: str) -> str: - """Get weather at a location.""" - return "It's sunny." - - for tool_choice in ["any", "magic_function"]: - model_with_tools = model.bind_tools( - [magic_function, get_weather], tool_choice=tool_choice - ) - result = model_with_tools.invoke("Hello!") - assert isinstance(result, AIMessage) - assert result.tool_calls - if tool_choice == "magic_function": - assert result.tool_calls[0]["name"] == "magic_function" - async def test_tool_calling_async(self, model: BaseChatModel) -> None: """Test that the model generates tool calls. This test is skipped if the ``has_tool_calling`` property on the test class is set to False. @@ -1443,66 +1411,6 @@ class ChatModelIntegrationTests(ChatModelTests): assert isinstance(full, AIMessage) _validate_tool_call_message(full) - def test_tool_calling_with_no_arguments(self, model: BaseChatModel) -> None: - """Test that the model generates tool calls for tools with no arguments. - This test is skipped if the ``has_tool_calling`` property on the test class - is set to False. - - This test is optional and should be skipped if the model does not support - tool calling (see Configuration below). - - .. dropdown:: Configuration - - To disable tool calling tests, set ``has_tool_calling`` to False in your - test class: - - .. code-block:: python - - class TestMyChatModelIntegration(ChatModelIntegrationTests): - @property - def has_tool_calling(self) -> bool: - return False - - .. dropdown:: Troubleshooting - - If this test fails, check that ``bind_tools`` is implemented to correctly - translate LangChain tool objects into the appropriate schema for your - chat model. It should correctly handle the case where a tool has no - arguments. - - This test may fail if the chat model does not support a ``tool_choice`` - parameter. This parameter can be used to force a tool call. It may also - fail if a provider does not support this form of tool. In these cases, - you can ``xfail`` the test: - - .. code-block:: python - - @pytest.mark.xfail(reason=("Does not support tool_choice.")) - def test_tool_calling_with_no_arguments(self, model: BaseChatModel) -> None: - super().test_tool_calling_with_no_arguments(model) - - Otherwise, in the case that only one tool is bound, ensure that - ``tool_choice`` supports the string ``"any"`` to force calling that tool. - """ # noqa: E501 - if not self.has_tool_calling: - pytest.skip("Test requires tool calling.") - if not self.has_tool_choice: - tool_choice_value = None - else: - tool_choice_value = "any" - model_with_tools = model.bind_tools( - [magic_function_no_args], tool_choice=tool_choice_value - ) - query = "What is the value of magic_function_no_args()? Use the tool." - result = model_with_tools.invoke(query) - _validate_tool_call_message_no_args(result) - - full: Optional[BaseMessageChunk] = None - for chunk in model_with_tools.stream(query): - full = chunk if full is None else full + chunk # type: ignore - assert isinstance(full, AIMessage) - _validate_tool_call_message_no_args(full) - def test_bind_runnables_as_tools(self, model: BaseChatModel) -> None: """Test that the model generates tool calls for tools that are derived from LangChain runnables. This test is skipped if the ``has_tool_calling`` property @@ -1566,7 +1474,406 @@ class ChatModelIntegrationTests(ChatModelTests): assert result.tool_calls tool_call = result.tool_calls[0] assert tool_call["args"].get("answer_style") - assert tool_call["type"] == "tool_call" + assert tool_call.get("type") == "tool_call" + + def test_tool_message_histories_string_content( + self, model: BaseChatModel, my_adder_tool: BaseTool + ) -> None: + """Test that message histories are compatible with string tool contents + (e.g. OpenAI format). If a model passes this test, it should be compatible + with messages generated from providers following OpenAI format. + + This test should be skipped if the model does not support tool calling + (see Configuration below). + + .. dropdown:: Configuration + + To disable tool calling tests, set ``has_tool_calling`` to False in your + test class: + + .. code-block:: python + + class TestMyChatModelIntegration(ChatModelIntegrationTests): + @property + def has_tool_calling(self) -> bool: + return False + + .. dropdown:: Troubleshooting + + If this test fails, check that: + + 1. The model can correctly handle message histories that include AIMessage objects with ``""`` content. + 2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format. + 3. The model can correctly handle ToolMessage objects with string content and arbitrary string values for ``tool_call_id``. + assert tool_call.get("type") == "tool_call" + You can ``xfail`` the test if tool calling is implemented but this format + is not supported. + + .. code-block:: python + + @pytest.mark.xfail(reason=("Not implemented.")) + def test_tool_message_histories_string_content(self, *args: Any) -> None: + super().test_tool_message_histories_string_content(*args) + """ # noqa: E501 + if not self.has_tool_calling: + pytest.skip("Test requires tool calling.") + model_with_tools = model.bind_tools([my_adder_tool]) + function_name = "my_adder_tool" + function_args = {"a": "1", "b": "2"} + + messages_string_content = [ + HumanMessage("What is 1 + 2"), + # string content (e.g. OpenAI) + AIMessage( + "", + tool_calls=[ + { + "name": function_name, + "args": function_args, + "id": "abc123", + "type": "tool_call", + }, + ], + ), + ToolMessage( + json.dumps({"result": 3}), + name=function_name, + tool_call_id="abc123", + ), + ] + result_string_content = model_with_tools.invoke(messages_string_content) + assert isinstance(result_string_content, AIMessage) + + def test_tool_message_histories_list_content( + self, + model: BaseChatModel, + my_adder_tool: BaseTool, + ) -> None: + """Test that message histories are compatible with list tool contents + (e.g. Anthropic format). + + These message histories will include AIMessage objects with "tool use" and + content blocks, e.g., + + .. code-block:: python + + [ + {"type": "text", "text": "Hmm let me think about that"}, + { + "type": "tool_use", + "input": {"fav_color": "green"}, + "id": "foo", + "name": "color_picker", + }, + ] + + This test should be skipped if the model does not support tool calling + (see Configuration below). + + .. dropdown:: Configuration + + To disable tool calling tests, set ``has_tool_calling`` to False in your + test class: + + .. code-block:: python + + class TestMyChatModelIntegration(ChatModelIntegrationTests): + @property + def has_tool_calling(self) -> bool: + return False + + .. dropdown:: Troubleshooting + + If this test fails, check that: + + 1. The model can correctly handle message histories that include AIMessage objects with list content. + 2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format. + 3. The model can correctly handle ToolMessage objects with string content and arbitrary string values for ``tool_call_id``. + + You can ``xfail`` the test if tool calling is implemented but this format + is not supported. + + .. code-block:: python + + @pytest.mark.xfail(reason=("Not implemented.")) + def test_tool_message_histories_list_content(self, *args: Any) -> None: + super().test_tool_message_histories_list_content(*args) + """ # noqa: E501 + if not self.has_tool_calling: + pytest.skip("Test requires tool calling.") + model_with_tools = model.bind_tools([my_adder_tool]) + function_name = "my_adder_tool" + function_args = {"a": 1, "b": 2} + + messages_list_content = [ + HumanMessage("What is 1 + 2"), + # List content (e.g., Anthropic) + AIMessage( + [ + {"type": "text", "text": "some text"}, + { + "type": "tool_use", + "id": "abc123", + "name": function_name, + "input": function_args, + }, + ], + tool_calls=[ + { + "name": function_name, + "args": function_args, + "id": "abc123", + "type": "tool_call", + }, + ], + ), + ToolMessage( + json.dumps({"result": 3}), + name=function_name, + tool_call_id="abc123", + ), + ] + result_list_content = model_with_tools.invoke(messages_list_content) + assert isinstance(result_list_content, AIMessage) + + def test_tool_choice(self, model: BaseChatModel) -> None: + """Test that the model can force tool calling via the ``tool_choice`` + parameter. This test is skipped if the ``has_tool_choice`` property on the + test class is set to False. + + This test is optional and should be skipped if the model does not support + tool calling (see Configuration below). + + .. dropdown:: Configuration + + To disable tool calling tests, set ``has_tool_choice`` to False in your + test class: + + .. code-block:: python + + class TestMyChatModelIntegration(ChatModelIntegrationTests): + @property + def has_tool_choice(self) -> bool: + return False + + .. dropdown:: Troubleshooting + + If this test fails, check whether the ``test_tool_calling`` test is passing. + If it is not, refer to the troubleshooting steps in that test first. + + If ``test_tool_calling`` is passing, check that the underlying model + supports forced tool calling. If it does, ``bind_tools`` should accept a + ``tool_choice`` parameter that can be used to force a tool call. + + It should accept (1) the string ``"any"`` to force calling the bound tool, + and (2) the string name of the tool to force calling that tool. + + """ + if not self.has_tool_choice or not self.has_tool_calling: + pytest.skip("Test requires tool choice.") + + @tool + def get_weather(location: str) -> str: # pylint: disable=unused-argument + """Get weather at a location.""" + return "It's sunny." + + for tool_choice in ["any", "magic_function"]: + model_with_tools = model.bind_tools( + [magic_function, get_weather], tool_choice=tool_choice + ) + result = model_with_tools.invoke("Hello!") + assert isinstance(result, AIMessage) + assert result.tool_calls + if tool_choice == "magic_function": + assert result.tool_calls[0]["name"] == "magic_function" + + def test_tool_calling_with_no_arguments(self, model: BaseChatModel) -> None: + """Test that the model generates tool calls for tools with no arguments. + This test is skipped if the ``has_tool_calling`` property on the test class + is set to False. + + This test is optional and should be skipped if the model does not support + tool calling (see Configuration below). + + .. dropdown:: Configuration + + To disable tool calling tests, set ``has_tool_calling`` to False in your + test class: + + .. code-block:: python + + class TestMyChatModelIntegration(ChatModelIntegrationTests): + @property + def has_tool_calling(self) -> bool: + return False + + .. dropdown:: Troubleshooting + + If this test fails, check that ``bind_tools`` is implemented to correctly + translate LangChain tool objects into the appropriate schema for your + chat model. It should correctly handle the case where a tool has no + arguments. + + This test may fail if the chat model does not support a ``tool_choice`` + parameter. This parameter can be used to force a tool call. It may also + fail if a provider does not support this form of tool. In these cases, + you can ``xfail`` the test: + + .. code-block:: python + + @pytest.mark.xfail(reason=("Does not support tool_choice.")) + def test_tool_calling_with_no_arguments(self, model: BaseChatModel) -> None: + super().test_tool_calling_with_no_arguments(model) + + Otherwise, in the case that only one tool is bound, ensure that + ``tool_choice`` supports the string ``"any"`` to force calling that tool. + """ # noqa: E501 + if not self.has_tool_calling: + pytest.skip("Test requires tool calling.") + if not self.has_tool_choice: + tool_choice_value = None + else: + tool_choice_value = "any" + model_with_tools = model.bind_tools( + [magic_function_no_args], tool_choice=tool_choice_value + ) + query = "What is the value of magic_function_no_args()? Use the tool." + result = model_with_tools.invoke(query) + _validate_tool_call_message_no_args(result) + + full: Optional[BaseMessageChunk] = None + for chunk in model_with_tools.stream(query): + full = chunk if full is None else full + chunk # type: ignore + assert isinstance(full, AIMessage) + _validate_tool_call_message_no_args(full) + + def test_tool_message_error_status( + self, model: BaseChatModel, my_adder_tool: BaseTool + ) -> None: + """Test that ToolMessage with ``status="error"`` can be handled. + + These messages may take the form: + + .. code-block:: python + + ToolMessage( + "Error: Missing required argument 'b'.", + name="my_adder_tool", + tool_call_id="abc123", + status="error", + ) + + If possible, the ``status`` field should be parsed and passed appropriately + to the model. + + This test is optional and should be skipped if the model does not support + tool calling (see Configuration below). + + .. dropdown:: Configuration + + To disable tool calling tests, set ``has_tool_calling`` to False in your + test class: + + .. code-block:: python + + class TestMyChatModelIntegration(ChatModelIntegrationTests): + @property + def has_tool_calling(self) -> bool: + return False + + .. dropdown:: Troubleshooting + + If this test fails, check that the ``status`` field on ``ToolMessage`` + objects is either ignored or passed to the model appropriately. + """ + if not self.has_tool_calling: + pytest.skip("Test requires tool calling.") + model_with_tools = model.bind_tools([my_adder_tool]) + messages = [ + HumanMessage("What is 1 + 2"), + AIMessage( + "", + tool_calls=[ + { + "name": "my_adder_tool", + "args": {"a": 1}, + "id": "abc123", + "type": "tool_call", + }, + ], + ), + ToolMessage( + "Error: Missing required argument 'b'.", + name="my_adder_tool", + tool_call_id="abc123", + status="error", + ), + ] + result = model_with_tools.invoke(messages) + assert isinstance(result, AIMessage) + + def test_structured_few_shot_examples( + self, model: BaseChatModel, my_adder_tool: BaseTool + ) -> None: + """Test that the model can process few-shot examples with tool calls. + + These are represented as a sequence of messages of the following form: + + - ``HumanMessage`` with string content; + - ``AIMessage`` with the ``tool_calls`` attribute populated; + - ``ToolMessage`` with string content; + - ``AIMessage`` with string content (an answer); + - ``HumanMessage`` with string content (a follow-up question). + + This test should be skipped if the model does not support tool calling + (see Configuration below). + + .. dropdown:: Configuration + + To disable tool calling tests, set ``has_tool_calling`` to False in your + test class: + + .. code-block:: python + + class TestMyChatModelIntegration(ChatModelIntegrationTests): + @property + def has_tool_calling(self) -> bool: + return False + + .. dropdown:: Troubleshooting + + This test uses a utility function in ``langchain_core`` to generate a + sequence of messages representing "few-shot" examples: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.tool_example_to_messages.html + + If this test fails, check that the model can correctly handle this + sequence of messages. + + You can ``xfail`` the test if tool calling is implemented but this format + is not supported. + + .. code-block:: python + + @pytest.mark.xfail(reason=("Not implemented.")) + def test_structured_few_shot_examples(self, *args: Any) -> None: + super().test_structured_few_shot_examples(*args) + """ # noqa: E501 + if not self.has_tool_calling: + pytest.skip("Test requires tool calling.") + model_with_tools = model.bind_tools([my_adder_tool], tool_choice="any") + function_result = json.dumps({"result": 3}) + + tool_schema = my_adder_tool.args_schema + assert isinstance(tool_schema, type) and issubclass(tool_schema, BaseModel) + few_shot_messages = tool_example_to_messages( + "What is 1 + 2", + [tool_schema(a=1, b=2)], + tool_outputs=[function_result], + ai_response=function_result, + ) + + messages = few_shot_messages + [HumanMessage("What is 3 + 4")] + result = model_with_tools.invoke(messages) + assert isinstance(result, AIMessage) @pytest.mark.parametrize("schema_type", ["pydantic", "typeddict", "json_schema"]) def test_structured_output(self, model: BaseChatModel, schema_type: str) -> None: @@ -1917,229 +2224,6 @@ class ChatModelIntegrationTests(ChatModelTests): assert isinstance(chunk, dict) # for mypy assert set(chunk.keys()) == {"setup", "punchline"} - def test_tool_message_histories_string_content( - self, model: BaseChatModel, my_adder_tool: BaseTool - ) -> None: - """Test that message histories are compatible with string tool contents - (e.g. OpenAI format). If a model passes this test, it should be compatible - with messages generated from providers following OpenAI format. - - This test should be skipped if the model does not support tool calling - (see Configuration below). - - .. dropdown:: Configuration - - To disable tool calling tests, set ``has_tool_calling`` to False in your - test class: - - .. code-block:: python - - class TestMyChatModelIntegration(ChatModelIntegrationTests): - @property - def has_tool_calling(self) -> bool: - return False - - .. dropdown:: Troubleshooting - - If this test fails, check that: - - 1. The model can correctly handle message histories that include AIMessage objects with ``""`` content. - 2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format. - 3. The model can correctly handle ToolMessage objects with string content and arbitrary string values for ``tool_call_id``. - - You can ``xfail`` the test if tool calling is implemented but this format - is not supported. - - .. code-block:: python - - @pytest.mark.xfail(reason=("Not implemented.")) - def test_tool_message_histories_string_content(self, *args: Any) -> None: - super().test_tool_message_histories_string_content(*args) - """ # noqa: E501 - if not self.has_tool_calling: - pytest.skip("Test requires tool calling.") - model_with_tools = model.bind_tools([my_adder_tool]) - function_name = "my_adder_tool" - function_args = {"a": "1", "b": "2"} - - messages_string_content = [ - HumanMessage("What is 1 + 2"), - # string content (e.g. OpenAI) - AIMessage( - "", - tool_calls=[ - { - "name": function_name, - "args": function_args, - "id": "abc123", - "type": "tool_call", - }, - ], - ), - ToolMessage( - json.dumps({"result": 3}), - name=function_name, - tool_call_id="abc123", - ), - ] - result_string_content = model_with_tools.invoke(messages_string_content) - assert isinstance(result_string_content, AIMessage) - - def test_tool_message_histories_list_content( - self, - model: BaseChatModel, - my_adder_tool: BaseTool, - ) -> None: - """Test that message histories are compatible with list tool contents - (e.g. Anthropic format). - - These message histories will include AIMessage objects with "tool use" and - content blocks, e.g., - - .. code-block:: python - - [ - {"type": "text", "text": "Hmm let me think about that"}, - { - "type": "tool_use", - "input": {"fav_color": "green"}, - "id": "foo", - "name": "color_picker", - }, - ] - - This test should be skipped if the model does not support tool calling - (see Configuration below). - - .. dropdown:: Configuration - - To disable tool calling tests, set ``has_tool_calling`` to False in your - test class: - - .. code-block:: python - - class TestMyChatModelIntegration(ChatModelIntegrationTests): - @property - def has_tool_calling(self) -> bool: - return False - - .. dropdown:: Troubleshooting - - If this test fails, check that: - - 1. The model can correctly handle message histories that include AIMessage objects with list content. - 2. The ``tool_calls`` attribute on AIMessage objects is correctly handled and passed to the model in an appropriate format. - 3. The model can correctly handle ToolMessage objects with string content and arbitrary string values for ``tool_call_id``. - - You can ``xfail`` the test if tool calling is implemented but this format - is not supported. - - .. code-block:: python - - @pytest.mark.xfail(reason=("Not implemented.")) - def test_tool_message_histories_list_content(self, *args: Any) -> None: - super().test_tool_message_histories_list_content(*args) - """ # noqa: E501 - if not self.has_tool_calling: - pytest.skip("Test requires tool calling.") - model_with_tools = model.bind_tools([my_adder_tool]) - function_name = "my_adder_tool" - function_args = {"a": 1, "b": 2} - - messages_list_content = [ - HumanMessage("What is 1 + 2"), - # List content (e.g., Anthropic) - AIMessage( - [ - {"type": "text", "text": "some text"}, - { - "type": "tool_use", - "id": "abc123", - "name": function_name, - "input": function_args, - }, - ], - tool_calls=[ - { - "name": function_name, - "args": function_args, - "id": "abc123", - "type": "tool_call", - }, - ], - ), - ToolMessage( - json.dumps({"result": 3}), - name=function_name, - tool_call_id="abc123", - ), - ] - result_list_content = model_with_tools.invoke(messages_list_content) - assert isinstance(result_list_content, AIMessage) - - def test_structured_few_shot_examples( - self, model: BaseChatModel, my_adder_tool: BaseTool - ) -> None: - """Test that the model can process few-shot examples with tool calls. - - These are represented as a sequence of messages of the following form: - - - ``HumanMessage`` with string content; - - ``AIMessage`` with the ``tool_calls`` attribute populated; - - ``ToolMessage`` with string content; - - ``AIMessage`` with string content (an answer); - - ``HuamnMessage`` with string content (a follow-up question). - - This test should be skipped if the model does not support tool calling - (see Configuration below). - - .. dropdown:: Configuration - - To disable tool calling tests, set ``has_tool_calling`` to False in your - test class: - - .. code-block:: python - - class TestMyChatModelIntegration(ChatModelIntegrationTests): - @property - def has_tool_calling(self) -> bool: - return False - - .. dropdown:: Troubleshooting - - This test uses a utility function in ``langchain_core`` to generate a - sequence of messages representing "few-shot" examples: https://python.langchain.com/api_reference/core/utils/langchain_core.utils.function_calling.tool_example_to_messages.html - - If this test fails, check that the model can correctly handle this - sequence of messages. - - You can ``xfail`` the test if tool calling is implemented but this format - is not supported. - - .. code-block:: python - - @pytest.mark.xfail(reason=("Not implemented.")) - def test_structured_few_shot_examples(self, *args: Any) -> None: - super().test_structured_few_shot_examples(*args) - """ # noqa: E501 - if not self.has_tool_calling: - pytest.skip("Test requires tool calling.") - model_with_tools = model.bind_tools([my_adder_tool], tool_choice="any") - function_result = json.dumps({"result": 3}) - - tool_schema = my_adder_tool.args_schema - assert isinstance(tool_schema, type) and issubclass(tool_schema, BaseModel) - few_shot_messages = tool_example_to_messages( - "What is 1 + 2", - [tool_schema(a=1, b=2)], - tool_outputs=[function_result], - ai_response=function_result, - ) - - messages = few_shot_messages + [HumanMessage("What is 3 + 4")] - result = model_with_tools.invoke(messages) - assert isinstance(result, AIMessage) - def test_pdf_inputs(self, model: BaseChatModel) -> None: """Test that the model can process PDF inputs. @@ -2678,71 +2762,6 @@ class ChatModelIntegrationTests(ChatModelTests): response = model.invoke(messages) assert isinstance(response, AIMessage) - def test_tool_message_error_status( - self, model: BaseChatModel, my_adder_tool: BaseTool - ) -> None: - """Test that ToolMessage with ``status="error"`` can be handled. - - These messages may take the form: - - .. code-block:: python - - ToolMessage( - "Error: Missing required argument 'b'.", - name="my_adder_tool", - tool_call_id="abc123", - status="error", - ) - - If possible, the ``status`` field should be parsed and passed appropriately - to the model. - - This test is optional and should be skipped if the model does not support - tool calling (see Configuration below). - - .. dropdown:: Configuration - - To disable tool calling tests, set ``has_tool_calling`` to False in your - test class: - - .. code-block:: python - - class TestMyChatModelIntegration(ChatModelIntegrationTests): - @property - def has_tool_calling(self) -> bool: - return False - - .. dropdown:: Troubleshooting - - If this test fails, check that the ``status`` field on ``ToolMessage`` - objects is either ignored or passed to the model appropriately. - """ - if not self.has_tool_calling: - pytest.skip("Test requires tool calling.") - model_with_tools = model.bind_tools([my_adder_tool]) - messages = [ - HumanMessage("What is 1 + 2"), - AIMessage( - "", - tool_calls=[ - { - "name": "my_adder_tool", - "args": {"a": 1}, - "id": "abc123", - "type": "tool_call", - }, - ], - ), - ToolMessage( - "Error: Missing required argument 'b'.", - name="my_adder_tool", - tool_call_id="abc123", - status="error", - ), - ] - result = model_with_tools.invoke(messages) - assert isinstance(result, AIMessage) - def test_message_with_name(self, model: BaseChatModel) -> None: """Test that HumanMessage with values for the ``name`` field can be handled. @@ -2809,7 +2828,7 @@ class ChatModelIntegrationTests(ChatModelTests): pytest.skip("Test requires tool calling.") @tool - def get_weather(location: str) -> str: + def get_weather(location: str) -> str: # pylint: disable=unused-argument """Call to surf the web.""" return "It's sunny."