Apply patch [skip ci]

This commit is contained in:
open-swe[bot]
2025-08-06 16:53:40 +00:00
parent b16578fbce
commit 0cebb8514b

View File

@@ -0,0 +1,369 @@
"""Integration tests for gpt-oss model tool calling support in ChatOllama.
These tests require an actual Ollama instance running with a gpt-oss model installed.
To run these tests:
1. Install Ollama: https://ollama.ai/
2. Pull a gpt-oss model: `ollama pull gpt-oss:20b`
3. Run these tests with: `pytest tests/integration_tests/test_gpt_oss_tools.py`
Note: These tests will be skipped if Ollama is not available or the model is not installed.
"""
import os
from typing import Any, Dict, Optional
import pytest
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.tools import tool
from langchain_ollama import ChatOllama
# Skip all tests in this module if OLLAMA_BASE_URL is not set or Ollama is not available
pytestmark = pytest.mark.skipif(
os.environ.get("OLLAMA_BASE_URL") is None,
reason="OLLAMA_BASE_URL not set, skipping Ollama integration tests",
)
@tool
def get_weather(location: str, unit: str = "celsius") -> str:
"""Get the current weather for a location.
Args:
location: The city to get weather for.
unit: Temperature unit (celsius or fahrenheit).
"""
# Mock implementation for testing
return f"The weather in {location} is sunny and 22 {unit}"
@tool
def search_web(query: str, max_results: int = 5) -> str:
"""Search the web for information.
Args:
query: The search query.
max_results: Maximum number of results to return.
"""
return f"Found {max_results} results for '{query}'"
@tool
def calculate(expression: str) -> str:
"""Calculate a mathematical expression.
Args:
expression: The mathematical expression to evaluate.
"""
# Simple mock calculation
return "42"
def check_ollama_available() -> bool:
"""Check if Ollama is available and running."""
try:
llm = ChatOllama(model="llama2") # Use a common model to test connectivity
llm.invoke("test")
return True
except Exception:
return False
def check_model_available(model_name: str) -> bool:
"""Check if a specific model is available in Ollama."""
try:
llm = ChatOllama(model=model_name)
llm.invoke("test")
return True
except Exception:
return False
@pytest.mark.integration
class TestGptOssToolCallingIntegration:
"""Integration tests for gpt-oss model tool calling."""
@pytest.mark.skipif(
not check_ollama_available(),
reason="Ollama is not available or not running"
)
@pytest.mark.skipif(
not check_model_available("gpt-oss:20b"),
reason="gpt-oss:20b model is not installed"
)
def test_single_tool_call(self) -> None:
"""Test calling a single tool with gpt-oss model."""
llm = ChatOllama(model="gpt-oss:20b", temperature=0)
llm_with_tools = llm.bind_tools([get_weather])
# Ask a question that should trigger tool use
response = llm_with_tools.invoke(
"What's the weather like in London? Please use the available tool."
)
# Check that the response is an AIMessage
assert isinstance(response, AIMessage)
# Check if tool calls were made (model might not always call tools)
if response.tool_calls:
assert len(response.tool_calls) > 0
tool_call = response.tool_calls[0]
assert tool_call["name"] == "get_weather"
assert "location" in tool_call["args"]
# The model should identify London as the location
assert "london" in tool_call["args"]["location"].lower()
@pytest.mark.skipif(
not check_ollama_available(),
reason="Ollama is not available or not running"
)
@pytest.mark.skipif(
not check_model_available("gpt-oss:20b"),
reason="gpt-oss:20b model is not installed"
)
def test_multiple_tools_binding(self) -> None:
"""Test binding multiple tools to gpt-oss model."""
llm = ChatOllama(model="gpt-oss:20b", temperature=0)
llm_with_tools = llm.bind_tools([get_weather, search_web, calculate])
# Test that tools are properly bound
assert hasattr(llm_with_tools, "kwargs")
assert "tools" in llm_with_tools.kwargs
tools = llm_with_tools.kwargs["tools"]
assert len(tools) == 3
# Verify tool names
tool_names = {tool["function"]["name"] for tool in tools}
assert tool_names == {"get_weather", "search_web", "calculate"}
# Test invocation with a query that might use search
response = llm_with_tools.invoke(
"Search for information about Python programming. Use the search tool."
)
assert isinstance(response, AIMessage)
# The model may or may not call the tool depending on its decision
@pytest.mark.skipif(
not check_ollama_available(),
reason="Ollama is not available or not running"
)
@pytest.mark.skipif(
not check_model_available("gpt-oss:20b"),
reason="gpt-oss:20b model is not installed"
)
def test_tool_call_with_conversation(self) -> None:
"""Test tool calling within a conversation context."""
llm = ChatOllama(model="gpt-oss:20b", temperature=0)
llm_with_tools = llm.bind_tools([get_weather, calculate])
# Create a conversation
messages = [
HumanMessage(content="Hi, I need help with two things."),
AIMessage(content="Hello! I'd be happy to help. What do you need?"),
HumanMessage(
content="First, what's the weather in Paris? Second, calculate 15 * 28. "
"Please use the available tools for both tasks."
),
]
response = llm_with_tools.invoke(messages)
assert isinstance(response, AIMessage)
# Check if the model made tool calls
if response.tool_calls:
# The model might call one or both tools
tool_names = {call["name"] for call in response.tool_calls}
# At least one tool should be called
assert len(tool_names) > 0
# The tools called should be from our available tools
assert tool_names.issubset({"get_weather", "calculate"})
@pytest.mark.skipif(
not check_ollama_available(),
reason="Ollama is not available or not running"
)
@pytest.mark.skipif(
not check_model_available("gpt-oss:20b"),
reason="gpt-oss:20b model is not installed"
)
def test_streaming_with_tools(self) -> None:
"""Test streaming responses with tool calls."""
llm = ChatOllama(model="gpt-oss:20b", temperature=0)
llm_with_tools = llm.bind_tools([get_weather])
# Stream a response
chunks = []
for chunk in llm_with_tools.stream(
"What's the weather in Tokyo? Use the weather tool."
):
chunks.append(chunk)
# Should have received chunks
assert len(chunks) > 0
# Combine chunks to get the full response
final_message = chunks[0]
for chunk in chunks[1:]:
final_message += chunk
# Check if tool calls were made in the final combined message
if hasattr(final_message, "tool_calls") and final_message.tool_calls:
tool_call = final_message.tool_calls[0]
assert tool_call["name"] == "get_weather"
assert "location" in tool_call["args"]
@pytest.mark.skipif(
not check_ollama_available(),
reason="Ollama is not available or not running"
)
@pytest.mark.skipif(
not check_model_available("gpt-oss:20b"),
reason="gpt-oss:20b model is not installed"
)
async def test_async_tool_calling(self) -> None:
"""Test asynchronous tool calling with gpt-oss model."""
llm = ChatOllama(model="gpt-oss:20b", temperature=0)
llm_with_tools = llm.bind_tools([calculate])
# Test async invocation
response = await llm_with_tools.ainvoke(
"Calculate 42 times 10. Please use the calculate tool."
)
assert isinstance(response, AIMessage)
# Check if tool was called
if response.tool_calls:
assert len(response.tool_calls) > 0
tool_call = response.tool_calls[0]
assert tool_call["name"] == "calculate"
assert "expression" in tool_call["args"]
@pytest.mark.integration
class TestGptOssModelCompatibility:
"""Test compatibility of different gpt-oss model variants."""
@pytest.mark.skipif(
not check_ollama_available(),
reason="Ollama is not available or not running"
)
def test_gpt_oss_variants(self) -> None:
"""Test that different gpt-oss model variants are detected correctly."""
# Test various gpt-oss model names that might be available
model_variants = [
"gpt-oss",
"gpt-oss:latest",
"gpt-oss:20b",
"gpt-oss:7b",
]
for model_name in model_variants:
if check_model_available(model_name):
llm = ChatOllama(model=model_name)
llm_with_tools = llm.bind_tools([get_weather])
# Verify tools are in Harmony format
tools = llm_with_tools.kwargs["tools"]
assert len(tools) == 1
tool = tools[0]
assert tool["type"] == "function"
assert "function" in tool
# Check parameter types are strings
props = tool["function"]["parameters"]["properties"]
for prop in props.values():
if "type" in prop:
assert isinstance(prop["type"], str)
@pytest.mark.skipif(
not check_ollama_available(),
reason="Ollama is not available or not running"
)
def test_non_gpt_oss_models_unchanged(self) -> None:
"""Test that non-gpt-oss models still work with standard format."""
# Test with a non-gpt-oss model if available
non_gpt_models = ["llama2", "mistral", "codellama"]
for model_name in non_gpt_models:
if check_model_available(model_name):
llm = ChatOllama(model=model_name)
llm_with_tools = llm.bind_tools([get_weather])
# Tools should still be bound
tools = llm_with_tools.kwargs["tools"]
assert len(tools) == 1
# Should use standard OpenAI format
tool = tools[0]
assert tool["type"] == "function"
assert "function" in tool
# The format should be compatible with standard Ollama models
break # Test with at least one non-gpt-oss model
@pytest.mark.integration
class TestGptOssErrorHandling:
"""Test error handling for gpt-oss models with tools."""
@pytest.mark.skipif(
not check_ollama_available(),
reason="Ollama is not available or not running"
)
@pytest.mark.skipif(
not check_model_available("gpt-oss:20b"),
reason="gpt-oss:20b model is not installed"
)
def test_malformed_tool_response_handling(self) -> None:
"""Test that malformed tool responses are handled gracefully."""
llm = ChatOllama(model="gpt-oss:20b", temperature=1.5) # High temp for randomness
# Create a tool that might cause parsing issues
@tool
def complex_tool(
data: Dict[str, Any],
nested: Optional[Dict[str, Any]] = None,
) -> str:
"""A complex tool with nested parameters.
Args:
data: Complex data structure.
nested: Optional nested data.
"""
return "processed"
llm_with_tools = llm.bind_tools([complex_tool])
# This should not raise an error even if the model returns malformed tool calls
try:
response = llm_with_tools.invoke(
"Use the complex tool with some data."
)
assert isinstance(response, AIMessage)
except Exception as e:
# The error should be handled gracefully
pytest.fail(f"Tool calling raised an unexpected error: {e}")
@pytest.mark.skipif(
not check_ollama_available(),
reason="Ollama is not available or not running"
)
@pytest.mark.skipif(
not check_model_available("gpt-oss:20b"),
reason="gpt-oss:20b model is not installed"
)
def test_empty_tool_list(self) -> None:
"""Test that binding an empty tool list works correctly."""
llm = ChatOllama(model="gpt-oss:20b")
# Binding empty tool list should work
llm_with_no_tools = llm.bind_tools([])
# Should still be able to invoke
response = llm_with_no_tools.invoke("Hello, how are you?")
assert isinstance(response, AIMessage)
# Should have no tool calls
assert not response.tool_calls or len(response.tool_calls) == 0