fix(groq): use qwen for tool choice integration tests (#38380)

Groq's `openai/gpt-oss-20b` can return reasoning content instead of
honoring forced tool choice in live integration tests. The forced
tool-choice coverage now uses Groq's recommended `qwen/qwen3.6-27b`
replacement with reasoning disabled, while leaving gpt-oss coverage in
place for reasoning-specific tests.

## Changes
- Add a dedicated tool-calling model constant for forced tool-choice
integration coverage.
- Update sync and async tool-choice tests to use `qwen/qwen3.6-27b` with
`reasoning_effort="none"`.
- Remove stale xfail markers from streaming tool-call tests and assert
the normalized aggregated stream output, including preserved tool-call
IDs.
This commit is contained in:
Mason Daugherty
2026-06-22 21:10:25 -04:00
committed by GitHub
parent 844cef444b
commit c669da7662

View File

@@ -25,6 +25,8 @@ from tests.unit_tests.fake.callbacks import (
)
DEFAULT_MODEL_NAME = "openai/gpt-oss-20b"
TOOL_CALLING_MODEL_NAME = "qwen/qwen3.6-27b"
TOOL_CALLING_MODEL_KWARGS: dict[str, Any] = {"reasoning_effort": "none"}
# gpt-oss doesn't support `reasoning_effort`
REASONING_MODEL_NAME = "qwen/qwen3-32b"
@@ -405,7 +407,7 @@ def test_system_message() -> None:
def test_tool_choice() -> None:
"""Test that tool choice is respected."""
llm = ChatGroq(model=DEFAULT_MODEL_NAME)
llm = ChatGroq(model=TOOL_CALLING_MODEL_NAME, **TOOL_CALLING_MODEL_KWARGS)
class MyTool(BaseModel):
name: str
@@ -435,7 +437,7 @@ def test_tool_choice() -> None:
def test_tool_choice_bool() -> None:
"""Test that tool choice is respected just passing in True."""
llm = ChatGroq(model=DEFAULT_MODEL_NAME)
llm = ChatGroq(model=TOOL_CALLING_MODEL_NAME, **TOOL_CALLING_MODEL_KWARGS)
class MyTool(BaseModel):
name: str
@@ -457,10 +459,9 @@ def test_tool_choice_bool() -> None:
assert tool_call["type"] == "function"
@pytest.mark.xfail(reason="Groq tool_choice doesn't currently force a tool call")
def test_streaming_tool_call() -> None:
"""Test that tool choice is respected."""
llm = ChatGroq(model=DEFAULT_MODEL_NAME)
llm = ChatGroq(model=TOOL_CALLING_MODEL_NAME, **TOOL_CALLING_MODEL_KWARGS)
class MyTool(BaseModel):
name: str
@@ -468,37 +469,32 @@ def test_streaming_tool_call() -> None:
with_tool = llm.bind_tools([MyTool], tool_choice="MyTool")
resp = with_tool.stream("Who was the 27 year old named Erick?")
additional_kwargs = None
resp = with_tool.stream("Who was the 27 year old named Erick? Use the tool.")
full: AIMessageChunk | None = None
for chunk in resp:
assert isinstance(chunk, AIMessageChunk)
assert chunk.content == "" # should just be tool call
additional_kwargs = chunk.additional_kwargs
full = chunk if full is None else full + chunk
assert additional_kwargs is not None
tool_calls = additional_kwargs["tool_calls"]
assert len(tool_calls) == 1
tool_call = tool_calls[0]
assert tool_call["function"]["name"] == "MyTool"
assert json.loads(tool_call["function"]["arguments"]) == {
"age": 27,
"name": "Erick",
}
assert tool_call["type"] == "function"
assert full is not None
assert full.content == "" # should just be tool call
assert len(full.tool_calls) == 1
tool_call = full.tool_calls[0]
assert tool_call["name"] == "MyTool"
assert tool_call["args"] == {"name": "Erick", "age": 27}
assert tool_call["id"] is not None
assert isinstance(chunk, AIMessageChunk)
assert isinstance(chunk.tool_call_chunks, list)
assert len(chunk.tool_call_chunks) == 1
tool_call_chunk = chunk.tool_call_chunks[0]
assert isinstance(full.tool_call_chunks, list)
assert len(full.tool_call_chunks) == 1
tool_call_chunk = full.tool_call_chunks[0]
assert tool_call_chunk["name"] == "MyTool"
assert tool_call_chunk["id"] == tool_call["id"]
assert isinstance(tool_call_chunk["args"], str)
assert json.loads(tool_call_chunk["args"]) == {"name": "Erick", "age": 27}
@pytest.mark.xfail(reason="Groq tool_choice doesn't currently force a tool call")
async def test_astreaming_tool_call() -> None:
"""Test that tool choice is respected."""
llm = ChatGroq(model=DEFAULT_MODEL_NAME)
llm = ChatGroq(model=TOOL_CALLING_MODEL_NAME, **TOOL_CALLING_MODEL_KWARGS)
class MyTool(BaseModel):
name: str
@@ -506,29 +502,25 @@ async def test_astreaming_tool_call() -> None:
with_tool = llm.bind_tools([MyTool], tool_choice="MyTool")
resp = with_tool.astream("Who was the 27 year old named Erick?")
additional_kwargs = None
resp = with_tool.astream("Who was the 27 year old named Erick? Use the tool.")
full: AIMessageChunk | None = None
async for chunk in resp:
assert isinstance(chunk, AIMessageChunk)
assert chunk.content == "" # should just be tool call
additional_kwargs = chunk.additional_kwargs
full = chunk if full is None else full + chunk
assert additional_kwargs is not None
tool_calls = additional_kwargs["tool_calls"]
assert len(tool_calls) == 1
tool_call = tool_calls[0]
assert tool_call["function"]["name"] == "MyTool"
assert json.loads(tool_call["function"]["arguments"]) == {
"age": 27,
"name": "Erick",
}
assert tool_call["type"] == "function"
assert full is not None
assert full.content == "" # should just be tool call
assert len(full.tool_calls) == 1
tool_call = full.tool_calls[0]
assert tool_call["name"] == "MyTool"
assert tool_call["args"] == {"name": "Erick", "age": 27}
assert tool_call["id"] is not None
assert isinstance(chunk, AIMessageChunk)
assert isinstance(chunk.tool_call_chunks, list)
assert len(chunk.tool_call_chunks) == 1
tool_call_chunk = chunk.tool_call_chunks[0]
assert isinstance(full.tool_call_chunks, list)
assert len(full.tool_call_chunks) == 1
tool_call_chunk = full.tool_call_chunks[0]
assert tool_call_chunk["name"] == "MyTool"
assert tool_call_chunk["id"] == tool_call["id"]
assert isinstance(tool_call_chunk["args"], str)
assert json.loads(tool_call_chunk["args"]) == {"name": "Erick", "age": 27}