From 0cebb8514bdb37cc3b86b9936fdb8f328a85f8ed Mon Sep 17 00:00:00 2001
From: "open-swe[bot]" <open-swe@users.noreply.github.com>
Date: Wed, 6 Aug 2025 16:53:40 +0000
Subject: [PATCH] Apply patch [skip ci]

---
 .../integration_tests/test_gpt_oss_tools.py   | 369 ++++++++++++++++++
 1 file changed, 369 insertions(+)
 create mode 100644 libs/partners/ollama/tests/integration_tests/test_gpt_oss_tools.py

diff --git a/libs/partners/ollama/tests/integration_tests/test_gpt_oss_tools.py b/libs/partners/ollama/tests/integration_tests/test_gpt_oss_tools.py
new file mode 100644
index 00000000000..d748ff19676
--- /dev/null
+++ b/libs/partners/ollama/tests/integration_tests/test_gpt_oss_tools.py
@@ -0,0 +1,369 @@
+"""Integration tests for gpt-oss model tool calling support in ChatOllama.
+
+These tests require an actual Ollama instance running with a gpt-oss model installed.
+To run these tests:
+1. Install Ollama: https://ollama.ai/
+2. Pull a gpt-oss model: `ollama pull gpt-oss:20b`
+3. Run these tests with: `pytest tests/integration_tests/test_gpt_oss_tools.py`
+
+Note: These tests will be skipped if Ollama is not available or the model is not installed.
+"""
+
+import os
+from typing import Any, Dict, Optional
+
+import pytest
+from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.tools import tool
+
+from langchain_ollama import ChatOllama
+
+
+# Skip all tests in this module if OLLAMA_BASE_URL is not set or Ollama is not available
+pytestmark = pytest.mark.skipif(
+    os.environ.get("OLLAMA_BASE_URL") is None,
+    reason="OLLAMA_BASE_URL not set, skipping Ollama integration tests",
+)
+
+
+@tool
+def get_weather(location: str, unit: str = "celsius") -> str:
+    """Get the current weather for a location.
+    
+    Args:
+        location: The city to get weather for.
+        unit: Temperature unit (celsius or fahrenheit).
+    """
+    # Mock implementation for testing
+    return f"The weather in {location} is sunny and 22 {unit}"
+
+
+@tool
+def search_web(query: str, max_results: int = 5) -> str:
+    """Search the web for information.
+    
+    Args:
+        query: The search query.
+        max_results: Maximum number of results to return.
+    """
+    return f"Found {max_results} results for '{query}'"
+
+
+@tool
+def calculate(expression: str) -> str:
+    """Calculate a mathematical expression.
+    
+    Args:
+        expression: The mathematical expression to evaluate.
+    """
+    # Simple mock calculation
+    return "42"
+
+
+def check_ollama_available() -> bool:
+    """Check if Ollama is available and running."""
+    try:
+        llm = ChatOllama(model="llama2")  # Use a common model to test connectivity
+        llm.invoke("test")
+        return True
+    except Exception:
+        return False
+
+
+def check_model_available(model_name: str) -> bool:
+    """Check if a specific model is available in Ollama."""
+    try:
+        llm = ChatOllama(model=model_name)
+        llm.invoke("test")
+        return True
+    except Exception:
+        return False
+
+
+@pytest.mark.integration
+class TestGptOssToolCallingIntegration:
+    """Integration tests for gpt-oss model tool calling."""
+    
+    @pytest.mark.skipif(
+        not check_ollama_available(),
+        reason="Ollama is not available or not running"
+    )
+    @pytest.mark.skipif(
+        not check_model_available("gpt-oss:20b"),
+        reason="gpt-oss:20b model is not installed"
+    )
+    def test_single_tool_call(self) -> None:
+        """Test calling a single tool with gpt-oss model."""
+        llm = ChatOllama(model="gpt-oss:20b", temperature=0)
+        llm_with_tools = llm.bind_tools([get_weather])
+        
+        # Ask a question that should trigger tool use
+        response = llm_with_tools.invoke(
+            "What's the weather like in London? Please use the available tool."
+        )
+        
+        # Check that the response is an AIMessage
+        assert isinstance(response, AIMessage)
+        
+        # Check if tool calls were made (model might not always call tools)
+        if response.tool_calls:
+            assert len(response.tool_calls) > 0
+            tool_call = response.tool_calls[0]
+            assert tool_call["name"] == "get_weather"
+            assert "location" in tool_call["args"]
+            # The model should identify London as the location
+            assert "london" in tool_call["args"]["location"].lower()
+    
+    @pytest.mark.skipif(
+        not check_ollama_available(),
+        reason="Ollama is not available or not running"
+    )
+    @pytest.mark.skipif(
+        not check_model_available("gpt-oss:20b"),
+        reason="gpt-oss:20b model is not installed"
+    )
+    def test_multiple_tools_binding(self) -> None:
+        """Test binding multiple tools to gpt-oss model."""
+        llm = ChatOllama(model="gpt-oss:20b", temperature=0)
+        llm_with_tools = llm.bind_tools([get_weather, search_web, calculate])
+        
+        # Test that tools are properly bound
+        assert hasattr(llm_with_tools, "kwargs")
+        assert "tools" in llm_with_tools.kwargs
+        tools = llm_with_tools.kwargs["tools"]
+        assert len(tools) == 3
+        
+        # Verify tool names
+        tool_names = {tool["function"]["name"] for tool in tools}
+        assert tool_names == {"get_weather", "search_web", "calculate"}
+        
+        # Test invocation with a query that might use search
+        response = llm_with_tools.invoke(
+            "Search for information about Python programming. Use the search tool."
+        )
+        
+        assert isinstance(response, AIMessage)
+        # The model may or may not call the tool depending on its decision
+    
+    @pytest.mark.skipif(
+        not check_ollama_available(),
+        reason="Ollama is not available or not running"
+    )
+    @pytest.mark.skipif(
+        not check_model_available("gpt-oss:20b"),
+        reason="gpt-oss:20b model is not installed"
+    )
+    def test_tool_call_with_conversation(self) -> None:
+        """Test tool calling within a conversation context."""
+        llm = ChatOllama(model="gpt-oss:20b", temperature=0)
+        llm_with_tools = llm.bind_tools([get_weather, calculate])
+        
+        # Create a conversation
+        messages = [
+            HumanMessage(content="Hi, I need help with two things."),
+            AIMessage(content="Hello! I'd be happy to help. What do you need?"),
+            HumanMessage(
+                content="First, what's the weather in Paris? Second, calculate 15 * 28. "
+                "Please use the available tools for both tasks."
+            ),
+        ]
+        
+        response = llm_with_tools.invoke(messages)
+        
+        assert isinstance(response, AIMessage)
+        # Check if the model made tool calls
+        if response.tool_calls:
+            # The model might call one or both tools
+            tool_names = {call["name"] for call in response.tool_calls}
+            # At least one tool should be called
+            assert len(tool_names) > 0
+            # The tools called should be from our available tools
+            assert tool_names.issubset({"get_weather", "calculate"})
+    
+    @pytest.mark.skipif(
+        not check_ollama_available(),
+        reason="Ollama is not available or not running"
+    )
+    @pytest.mark.skipif(
+        not check_model_available("gpt-oss:20b"),
+        reason="gpt-oss:20b model is not installed"
+    )
+    def test_streaming_with_tools(self) -> None:
+        """Test streaming responses with tool calls."""
+        llm = ChatOllama(model="gpt-oss:20b", temperature=0)
+        llm_with_tools = llm.bind_tools([get_weather])
+        
+        # Stream a response
+        chunks = []
+        for chunk in llm_with_tools.stream(
+            "What's the weather in Tokyo? Use the weather tool."
+        ):
+            chunks.append(chunk)
+        
+        # Should have received chunks
+        assert len(chunks) > 0
+        
+        # Combine chunks to get the full response
+        final_message = chunks[0]
+        for chunk in chunks[1:]:
+            final_message += chunk
+        
+        # Check if tool calls were made in the final combined message
+        if hasattr(final_message, "tool_calls") and final_message.tool_calls:
+            tool_call = final_message.tool_calls[0]
+            assert tool_call["name"] == "get_weather"
+            assert "location" in tool_call["args"]
+    
+    @pytest.mark.skipif(
+        not check_ollama_available(),
+        reason="Ollama is not available or not running"
+    )
+    @pytest.mark.skipif(
+        not check_model_available("gpt-oss:20b"),
+        reason="gpt-oss:20b model is not installed"
+    )
+    async def test_async_tool_calling(self) -> None:
+        """Test asynchronous tool calling with gpt-oss model."""
+        llm = ChatOllama(model="gpt-oss:20b", temperature=0)
+        llm_with_tools = llm.bind_tools([calculate])
+        
+        # Test async invocation
+        response = await llm_with_tools.ainvoke(
+            "Calculate 42 times 10. Please use the calculate tool."
+        )
+        
+        assert isinstance(response, AIMessage)
+        # Check if tool was called
+        if response.tool_calls:
+            assert len(response.tool_calls) > 0
+            tool_call = response.tool_calls[0]
+            assert tool_call["name"] == "calculate"
+            assert "expression" in tool_call["args"]
+
+
+@pytest.mark.integration
+class TestGptOssModelCompatibility:
+    """Test compatibility of different gpt-oss model variants."""
+    
+    @pytest.mark.skipif(
+        not check_ollama_available(),
+        reason="Ollama is not available or not running"
+    )
+    def test_gpt_oss_variants(self) -> None:
+        """Test that different gpt-oss model variants are detected correctly."""
+        # Test various gpt-oss model names that might be available
+        model_variants = [
+            "gpt-oss",
+            "gpt-oss:latest",
+            "gpt-oss:20b",
+            "gpt-oss:7b",
+        ]
+        
+        for model_name in model_variants:
+            if check_model_available(model_name):
+                llm = ChatOllama(model=model_name)
+                llm_with_tools = llm.bind_tools([get_weather])
+                
+                # Verify tools are in Harmony format
+                tools = llm_with_tools.kwargs["tools"]
+                assert len(tools) == 1
+                tool = tools[0]
+                assert tool["type"] == "function"
+                assert "function" in tool
+                
+                # Check parameter types are strings
+                props = tool["function"]["parameters"]["properties"]
+                for prop in props.values():
+                    if "type" in prop:
+                        assert isinstance(prop["type"], str)
+    
+    @pytest.mark.skipif(
+        not check_ollama_available(),
+        reason="Ollama is not available or not running"
+    )
+    def test_non_gpt_oss_models_unchanged(self) -> None:
+        """Test that non-gpt-oss models still work with standard format."""
+        # Test with a non-gpt-oss model if available
+        non_gpt_models = ["llama2", "mistral", "codellama"]
+        
+        for model_name in non_gpt_models:
+            if check_model_available(model_name):
+                llm = ChatOllama(model=model_name)
+                llm_with_tools = llm.bind_tools([get_weather])
+                
+                # Tools should still be bound
+                tools = llm_with_tools.kwargs["tools"]
+                assert len(tools) == 1
+                
+                # Should use standard OpenAI format
+                tool = tools[0]
+                assert tool["type"] == "function"
+                assert "function" in tool
+                
+                # The format should be compatible with standard Ollama models
+                break  # Test with at least one non-gpt-oss model
+
+
+@pytest.mark.integration
+class TestGptOssErrorHandling:
+    """Test error handling for gpt-oss models with tools."""
+    
+    @pytest.mark.skipif(
+        not check_ollama_available(),
+        reason="Ollama is not available or not running"
+    )
+    @pytest.mark.skipif(
+        not check_model_available("gpt-oss:20b"),
+        reason="gpt-oss:20b model is not installed"
+    )
+    def test_malformed_tool_response_handling(self) -> None:
+        """Test that malformed tool responses are handled gracefully."""
+        llm = ChatOllama(model="gpt-oss:20b", temperature=1.5)  # High temp for randomness
+        
+        # Create a tool that might cause parsing issues
+        @tool
+        def complex_tool(
+            data: Dict[str, Any],
+            nested: Optional[Dict[str, Any]] = None,
+        ) -> str:
+            """A complex tool with nested parameters.
+            
+            Args:
+                data: Complex data structure.
+                nested: Optional nested data.
+            """
+            return "processed"
+        
+        llm_with_tools = llm.bind_tools([complex_tool])
+        
+        # This should not raise an error even if the model returns malformed tool calls
+        try:
+            response = llm_with_tools.invoke(
+                "Use the complex tool with some data."
+            )
+            assert isinstance(response, AIMessage)
+        except Exception as e:
+            # The error should be handled gracefully
+            pytest.fail(f"Tool calling raised an unexpected error: {e}")
+    
+    @pytest.mark.skipif(
+        not check_ollama_available(),
+        reason="Ollama is not available or not running"
+    )
+    @pytest.mark.skipif(
+        not check_model_available("gpt-oss:20b"),
+        reason="gpt-oss:20b model is not installed"
+    )
+    def test_empty_tool_list(self) -> None:
+        """Test that binding an empty tool list works correctly."""
+        llm = ChatOllama(model="gpt-oss:20b")
+        
+        # Binding empty tool list should work
+        llm_with_no_tools = llm.bind_tools([])
+        
+        # Should still be able to invoke
+        response = llm_with_no_tools.invoke("Hello, how are you?")
+        assert isinstance(response, AIMessage)
+        
+        # Should have no tool calls
+        assert not response.tool_calls or len(response.tool_calls) == 0