From 0cebb8514bdb37cc3b86b9936fdb8f328a85f8ed Mon Sep 17 00:00:00 2001 From: "open-swe[bot]" Date: Wed, 6 Aug 2025 16:53:40 +0000 Subject: [PATCH] Apply patch [skip ci] --- .../integration_tests/test_gpt_oss_tools.py | 369 ++++++++++++++++++ 1 file changed, 369 insertions(+) create mode 100644 libs/partners/ollama/tests/integration_tests/test_gpt_oss_tools.py diff --git a/libs/partners/ollama/tests/integration_tests/test_gpt_oss_tools.py b/libs/partners/ollama/tests/integration_tests/test_gpt_oss_tools.py new file mode 100644 index 00000000000..d748ff19676 --- /dev/null +++ b/libs/partners/ollama/tests/integration_tests/test_gpt_oss_tools.py @@ -0,0 +1,369 @@ +"""Integration tests for gpt-oss model tool calling support in ChatOllama. + +These tests require an actual Ollama instance running with a gpt-oss model installed. +To run these tests: +1. Install Ollama: https://ollama.ai/ +2. Pull a gpt-oss model: `ollama pull gpt-oss:20b` +3. Run these tests with: `pytest tests/integration_tests/test_gpt_oss_tools.py` + +Note: These tests will be skipped if Ollama is not available or the model is not installed. +""" + +import os +from typing import Any, Dict, Optional + +import pytest +from langchain_core.messages import AIMessage, HumanMessage +from langchain_core.tools import tool + +from langchain_ollama import ChatOllama + + +# Skip all tests in this module if OLLAMA_BASE_URL is not set or Ollama is not available +pytestmark = pytest.mark.skipif( + os.environ.get("OLLAMA_BASE_URL") is None, + reason="OLLAMA_BASE_URL not set, skipping Ollama integration tests", +) + + +@tool +def get_weather(location: str, unit: str = "celsius") -> str: + """Get the current weather for a location. + + Args: + location: The city to get weather for. + unit: Temperature unit (celsius or fahrenheit). + """ + # Mock implementation for testing + return f"The weather in {location} is sunny and 22 {unit}" + + +@tool +def search_web(query: str, max_results: int = 5) -> str: + """Search the web for information. + + Args: + query: The search query. + max_results: Maximum number of results to return. + """ + return f"Found {max_results} results for '{query}'" + + +@tool +def calculate(expression: str) -> str: + """Calculate a mathematical expression. + + Args: + expression: The mathematical expression to evaluate. + """ + # Simple mock calculation + return "42" + + +def check_ollama_available() -> bool: + """Check if Ollama is available and running.""" + try: + llm = ChatOllama(model="llama2") # Use a common model to test connectivity + llm.invoke("test") + return True + except Exception: + return False + + +def check_model_available(model_name: str) -> bool: + """Check if a specific model is available in Ollama.""" + try: + llm = ChatOllama(model=model_name) + llm.invoke("test") + return True + except Exception: + return False + + +@pytest.mark.integration +class TestGptOssToolCallingIntegration: + """Integration tests for gpt-oss model tool calling.""" + + @pytest.mark.skipif( + not check_ollama_available(), + reason="Ollama is not available or not running" + ) + @pytest.mark.skipif( + not check_model_available("gpt-oss:20b"), + reason="gpt-oss:20b model is not installed" + ) + def test_single_tool_call(self) -> None: + """Test calling a single tool with gpt-oss model.""" + llm = ChatOllama(model="gpt-oss:20b", temperature=0) + llm_with_tools = llm.bind_tools([get_weather]) + + # Ask a question that should trigger tool use + response = llm_with_tools.invoke( + "What's the weather like in London? Please use the available tool." + ) + + # Check that the response is an AIMessage + assert isinstance(response, AIMessage) + + # Check if tool calls were made (model might not always call tools) + if response.tool_calls: + assert len(response.tool_calls) > 0 + tool_call = response.tool_calls[0] + assert tool_call["name"] == "get_weather" + assert "location" in tool_call["args"] + # The model should identify London as the location + assert "london" in tool_call["args"]["location"].lower() + + @pytest.mark.skipif( + not check_ollama_available(), + reason="Ollama is not available or not running" + ) + @pytest.mark.skipif( + not check_model_available("gpt-oss:20b"), + reason="gpt-oss:20b model is not installed" + ) + def test_multiple_tools_binding(self) -> None: + """Test binding multiple tools to gpt-oss model.""" + llm = ChatOllama(model="gpt-oss:20b", temperature=0) + llm_with_tools = llm.bind_tools([get_weather, search_web, calculate]) + + # Test that tools are properly bound + assert hasattr(llm_with_tools, "kwargs") + assert "tools" in llm_with_tools.kwargs + tools = llm_with_tools.kwargs["tools"] + assert len(tools) == 3 + + # Verify tool names + tool_names = {tool["function"]["name"] for tool in tools} + assert tool_names == {"get_weather", "search_web", "calculate"} + + # Test invocation with a query that might use search + response = llm_with_tools.invoke( + "Search for information about Python programming. Use the search tool." + ) + + assert isinstance(response, AIMessage) + # The model may or may not call the tool depending on its decision + + @pytest.mark.skipif( + not check_ollama_available(), + reason="Ollama is not available or not running" + ) + @pytest.mark.skipif( + not check_model_available("gpt-oss:20b"), + reason="gpt-oss:20b model is not installed" + ) + def test_tool_call_with_conversation(self) -> None: + """Test tool calling within a conversation context.""" + llm = ChatOllama(model="gpt-oss:20b", temperature=0) + llm_with_tools = llm.bind_tools([get_weather, calculate]) + + # Create a conversation + messages = [ + HumanMessage(content="Hi, I need help with two things."), + AIMessage(content="Hello! I'd be happy to help. What do you need?"), + HumanMessage( + content="First, what's the weather in Paris? Second, calculate 15 * 28. " + "Please use the available tools for both tasks." + ), + ] + + response = llm_with_tools.invoke(messages) + + assert isinstance(response, AIMessage) + # Check if the model made tool calls + if response.tool_calls: + # The model might call one or both tools + tool_names = {call["name"] for call in response.tool_calls} + # At least one tool should be called + assert len(tool_names) > 0 + # The tools called should be from our available tools + assert tool_names.issubset({"get_weather", "calculate"}) + + @pytest.mark.skipif( + not check_ollama_available(), + reason="Ollama is not available or not running" + ) + @pytest.mark.skipif( + not check_model_available("gpt-oss:20b"), + reason="gpt-oss:20b model is not installed" + ) + def test_streaming_with_tools(self) -> None: + """Test streaming responses with tool calls.""" + llm = ChatOllama(model="gpt-oss:20b", temperature=0) + llm_with_tools = llm.bind_tools([get_weather]) + + # Stream a response + chunks = [] + for chunk in llm_with_tools.stream( + "What's the weather in Tokyo? Use the weather tool." + ): + chunks.append(chunk) + + # Should have received chunks + assert len(chunks) > 0 + + # Combine chunks to get the full response + final_message = chunks[0] + for chunk in chunks[1:]: + final_message += chunk + + # Check if tool calls were made in the final combined message + if hasattr(final_message, "tool_calls") and final_message.tool_calls: + tool_call = final_message.tool_calls[0] + assert tool_call["name"] == "get_weather" + assert "location" in tool_call["args"] + + @pytest.mark.skipif( + not check_ollama_available(), + reason="Ollama is not available or not running" + ) + @pytest.mark.skipif( + not check_model_available("gpt-oss:20b"), + reason="gpt-oss:20b model is not installed" + ) + async def test_async_tool_calling(self) -> None: + """Test asynchronous tool calling with gpt-oss model.""" + llm = ChatOllama(model="gpt-oss:20b", temperature=0) + llm_with_tools = llm.bind_tools([calculate]) + + # Test async invocation + response = await llm_with_tools.ainvoke( + "Calculate 42 times 10. Please use the calculate tool." + ) + + assert isinstance(response, AIMessage) + # Check if tool was called + if response.tool_calls: + assert len(response.tool_calls) > 0 + tool_call = response.tool_calls[0] + assert tool_call["name"] == "calculate" + assert "expression" in tool_call["args"] + + +@pytest.mark.integration +class TestGptOssModelCompatibility: + """Test compatibility of different gpt-oss model variants.""" + + @pytest.mark.skipif( + not check_ollama_available(), + reason="Ollama is not available or not running" + ) + def test_gpt_oss_variants(self) -> None: + """Test that different gpt-oss model variants are detected correctly.""" + # Test various gpt-oss model names that might be available + model_variants = [ + "gpt-oss", + "gpt-oss:latest", + "gpt-oss:20b", + "gpt-oss:7b", + ] + + for model_name in model_variants: + if check_model_available(model_name): + llm = ChatOllama(model=model_name) + llm_with_tools = llm.bind_tools([get_weather]) + + # Verify tools are in Harmony format + tools = llm_with_tools.kwargs["tools"] + assert len(tools) == 1 + tool = tools[0] + assert tool["type"] == "function" + assert "function" in tool + + # Check parameter types are strings + props = tool["function"]["parameters"]["properties"] + for prop in props.values(): + if "type" in prop: + assert isinstance(prop["type"], str) + + @pytest.mark.skipif( + not check_ollama_available(), + reason="Ollama is not available or not running" + ) + def test_non_gpt_oss_models_unchanged(self) -> None: + """Test that non-gpt-oss models still work with standard format.""" + # Test with a non-gpt-oss model if available + non_gpt_models = ["llama2", "mistral", "codellama"] + + for model_name in non_gpt_models: + if check_model_available(model_name): + llm = ChatOllama(model=model_name) + llm_with_tools = llm.bind_tools([get_weather]) + + # Tools should still be bound + tools = llm_with_tools.kwargs["tools"] + assert len(tools) == 1 + + # Should use standard OpenAI format + tool = tools[0] + assert tool["type"] == "function" + assert "function" in tool + + # The format should be compatible with standard Ollama models + break # Test with at least one non-gpt-oss model + + +@pytest.mark.integration +class TestGptOssErrorHandling: + """Test error handling for gpt-oss models with tools.""" + + @pytest.mark.skipif( + not check_ollama_available(), + reason="Ollama is not available or not running" + ) + @pytest.mark.skipif( + not check_model_available("gpt-oss:20b"), + reason="gpt-oss:20b model is not installed" + ) + def test_malformed_tool_response_handling(self) -> None: + """Test that malformed tool responses are handled gracefully.""" + llm = ChatOllama(model="gpt-oss:20b", temperature=1.5) # High temp for randomness + + # Create a tool that might cause parsing issues + @tool + def complex_tool( + data: Dict[str, Any], + nested: Optional[Dict[str, Any]] = None, + ) -> str: + """A complex tool with nested parameters. + + Args: + data: Complex data structure. + nested: Optional nested data. + """ + return "processed" + + llm_with_tools = llm.bind_tools([complex_tool]) + + # This should not raise an error even if the model returns malformed tool calls + try: + response = llm_with_tools.invoke( + "Use the complex tool with some data." + ) + assert isinstance(response, AIMessage) + except Exception as e: + # The error should be handled gracefully + pytest.fail(f"Tool calling raised an unexpected error: {e}") + + @pytest.mark.skipif( + not check_ollama_available(), + reason="Ollama is not available or not running" + ) + @pytest.mark.skipif( + not check_model_available("gpt-oss:20b"), + reason="gpt-oss:20b model is not installed" + ) + def test_empty_tool_list(self) -> None: + """Test that binding an empty tool list works correctly.""" + llm = ChatOllama(model="gpt-oss:20b") + + # Binding empty tool list should work + llm_with_no_tools = llm.bind_tools([]) + + # Should still be able to invoke + response = llm_with_no_tools.invoke("Hello, how are you?") + assert isinstance(response, AIMessage) + + # Should have no tool calls + assert not response.tool_calls or len(response.tool_calls) == 0