feat(groq): openai-oss (#32411)

use new openai-oss for integration tests, set module-level testing model names and improve robustness of tool tests
2025-09-26 05:48:40 +00:00 · 2025-08-05 14:18:56 -04:00
parent 4011257c25
commit 419c173225
1 changed files with 35 additions and 32 deletions
--- a/libs/partners/groq/tests/integration_tests/test_chat_models.py
+++ b/libs/partners/groq/tests/integration_tests/test_chat_models.py
@@ -24,7 +24,8 @@ from tests.unit_tests.fake.callbacks import (
    FakeCallbackHandlerWithChatStart,
 )

-MODEL_NAME = "llama-3.3-70b-versatile"
+DEFAULT_MODEL_NAME = "openai/gpt-oss-20b"
+REASONING_MODEL_NAME = "deepseek-r1-distill-llama-70b"


 #
@@ -34,7 +35,7 @@ MODEL_NAME = "llama-3.3-70b-versatile"
 def test_invoke() -> None:
    """Test Chat wrapper."""
    chat = ChatGroq(
-        model=MODEL_NAME,
+        model=DEFAULT_MODEL_NAME,
        temperature=0.7,
        base_url=None,
        groq_proxy=None,
@@ -55,7 +56,7 @@ def test_invoke() -> None:
@pytest.mark.scheduled
 async def test_ainvoke() -> None:
    """Test ainvoke tokens from ChatGroq."""
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10)

    result = await chat.ainvoke("Welcome to the Groqetship!", config={"tags": ["foo"]})
    assert isinstance(result, BaseMessage)
@@ -65,7 +66,7 @@ async def test_ainvoke() -> None:
@pytest.mark.scheduled
 def test_batch() -> None:
    """Test batch tokens from ChatGroq."""
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10)

    result = chat.batch(["Hello!", "Welcome to the Groqetship!"])
    for token in result:
@@ -76,7 +77,7 @@ def test_batch() -> None:
@pytest.mark.scheduled
 async def test_abatch() -> None:
    """Test abatch tokens from ChatGroq."""
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10)

    result = await chat.abatch(["Hello!", "Welcome to the Groqetship!"])
    for token in result:
@@ -87,7 +88,7 @@ async def test_abatch() -> None:
@pytest.mark.scheduled
 async def test_stream() -> None:
    """Test streaming tokens from Groq."""
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10)

    for token in chat.stream("Welcome to the Groqetship!"):
        assert isinstance(token, BaseMessageChunk)
@@ -97,7 +98,7 @@ async def test_stream() -> None:
@pytest.mark.scheduled
 async def test_astream() -> None:
    """Test streaming tokens from Groq."""
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10)

    full: Optional[BaseMessageChunk] = None
    chunks_with_token_counts = 0
@@ -136,7 +137,7 @@ async def test_astream() -> None:
 def test_generate() -> None:
    """Test sync generate."""
    n = 1
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10)
    message = HumanMessage(content="Hello", n=1)
    response = chat.generate([[message], [message]])
    assert isinstance(response, LLMResult)
@@ -155,7 +156,7 @@ def test_generate() -> None:
 async def test_agenerate() -> None:
    """Test async generation."""
    n = 1
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10, n=1)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10, n=1)
    message = HumanMessage(content="Hello")
    response = await chat.agenerate([[message], [message]])
    assert isinstance(response, LLMResult)
@@ -178,7 +179,7 @@ def test_invoke_streaming() -> None:
    """Test that streaming correctly invokes on_llm_new_token callback."""
    callback_handler = FakeCallbackHandler()
    chat = ChatGroq(
-        model=MODEL_NAME,
+        model=DEFAULT_MODEL_NAME,
        max_tokens=2,
        streaming=True,
        temperature=0,
@@ -195,7 +196,7 @@ async def test_agenerate_streaming() -> None:
    """Test that streaming correctly invokes on_llm_new_token callback."""
    callback_handler = FakeCallbackHandlerWithChatStart()
    chat = ChatGroq(
-        model=MODEL_NAME,
+        model=DEFAULT_MODEL_NAME,
        max_tokens=10,
        streaming=True,
        temperature=0,
@@ -222,7 +223,7 @@ async def test_agenerate_streaming() -> None:
 def test_reasoning_output_invoke() -> None:
    """Test reasoning output from ChatGroq with invoke."""
    chat = ChatGroq(
-        model="deepseek-r1-distill-llama-70b",
+        model=REASONING_MODEL_NAME,
        reasoning_format="parsed",
    )
    message = [
@@ -241,7 +242,7 @@ def test_reasoning_output_invoke() -> None:
 def test_reasoning_output_stream() -> None:
    """Test reasoning output from ChatGroq with stream."""
    chat = ChatGroq(
-        model="deepseek-r1-distill-llama-70b",
+        model=REASONING_MODEL_NAME,
        reasoning_format="parsed",
    )
    message = [
@@ -300,7 +301,7 @@ def test_streaming_generation_info() -> None:

    callback = _FakeCallback()
    chat = ChatGroq(
-        model=MODEL_NAME,
+        model="llama-3.1-8b-instant",  # Use a model that properly streams content
        max_tokens=2,
        temperature=0,
        callbacks=[callback],
@@ -314,7 +315,7 @@ def test_streaming_generation_info() -> None:

 def test_system_message() -> None:
    """Test ChatGroq wrapper with system message."""
-    chat = ChatGroq(model=MODEL_NAME, max_tokens=10)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10)
    system_message = SystemMessage(content="You are to chat with the user.")
    human_message = HumanMessage(content="Hello")
    response = chat.invoke([system_message, human_message])
@@ -324,7 +325,7 @@ def test_system_message() -> None:

 def test_tool_choice() -> None:
    """Test that tool choice is respected."""
-    llm = ChatGroq(model=MODEL_NAME)
+    llm = ChatGroq(model=DEFAULT_MODEL_NAME)

    class MyTool(BaseModel):
        name: str
@@ -332,7 +333,7 @@ def test_tool_choice() -> None:

    with_tool = llm.bind_tools([MyTool], tool_choice="MyTool")

-    resp = with_tool.invoke("Who was the 27 year old named Erick?")
+    resp = with_tool.invoke("Who was the 27 year old named Erick? Use the tool.")
    assert isinstance(resp, AIMessage)
    assert resp.content == ""  # should just be tool call
    tool_calls = resp.additional_kwargs["tool_calls"]
@@ -354,7 +355,7 @@ def test_tool_choice() -> None:

 def test_tool_choice_bool() -> None:
    """Test that tool choice is respected just passing in True."""
-    llm = ChatGroq(model=MODEL_NAME)
+    llm = ChatGroq(model=DEFAULT_MODEL_NAME)

    class MyTool(BaseModel):
        name: str
@@ -362,7 +363,7 @@ def test_tool_choice_bool() -> None:

    with_tool = llm.bind_tools([MyTool], tool_choice=True)

-    resp = with_tool.invoke("Who was the 27 year old named Erick?")
+    resp = with_tool.invoke("Who was the 27 year old named Erick? Use the tool.")
    assert isinstance(resp, AIMessage)
    assert resp.content == ""  # should just be tool call
    tool_calls = resp.additional_kwargs["tool_calls"]
@@ -379,7 +380,7 @@ def test_tool_choice_bool() -> None:
@pytest.mark.xfail(reason="Groq tool_choice doesn't currently force a tool call")
 def test_streaming_tool_call() -> None:
    """Test that tool choice is respected."""
-    llm = ChatGroq(model=MODEL_NAME)
+    llm = ChatGroq(model=DEFAULT_MODEL_NAME)

    class MyTool(BaseModel):
        name: str
@@ -417,7 +418,7 @@ def test_streaming_tool_call() -> None:
@pytest.mark.xfail(reason="Groq tool_choice doesn't currently force a tool call")
 async def test_astreaming_tool_call() -> None:
    """Test that tool choice is respected."""
-    llm = ChatGroq(model=MODEL_NAME)
+    llm = ChatGroq(model=DEFAULT_MODEL_NAME)

    class MyTool(BaseModel):
        name: str
@@ -462,7 +463,9 @@ def test_json_mode_structured_output() -> None:
        setup: str = Field(description="question to set up a joke")
        punchline: str = Field(description="answer to resolve the joke")

-    chat = ChatGroq(model=MODEL_NAME).with_structured_output(Joke, method="json_mode")
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME).with_structured_output(
+        Joke, method="json_mode"
+    )
    result = chat.invoke(
        "Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys"
    )
@@ -476,38 +479,38 @@ def test_setting_service_tier_class() -> None:
    message = HumanMessage(content="Welcome to the Groqetship")

    # Initialization
-    chat = ChatGroq(model=MODEL_NAME, service_tier="auto")
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="auto")
    assert chat.service_tier == "auto"
    response = chat.invoke([message])
    assert isinstance(response, BaseMessage)
    assert isinstance(response.content, str)
    assert response.response_metadata.get("service_tier") == "auto"

-    chat = ChatGroq(model=MODEL_NAME, service_tier="flex")
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="flex")
    assert chat.service_tier == "flex"
    response = chat.invoke([message])
    assert response.response_metadata.get("service_tier") == "flex"

-    chat = ChatGroq(model=MODEL_NAME, service_tier="on_demand")
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="on_demand")
    assert chat.service_tier == "on_demand"
    response = chat.invoke([message])
    assert response.response_metadata.get("service_tier") == "on_demand"

-    chat = ChatGroq(model=MODEL_NAME)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME)
    assert chat.service_tier == "on_demand"
    response = chat.invoke([message])
    assert response.response_metadata.get("service_tier") == "on_demand"

    with pytest.raises(ValueError):
-        ChatGroq(model=MODEL_NAME, service_tier=None)  # type: ignore[arg-type]
+        ChatGroq(model=DEFAULT_MODEL_NAME, service_tier=None)  # type: ignore[arg-type]
    with pytest.raises(ValueError):
-        ChatGroq(model=MODEL_NAME, service_tier="invalid")  # type: ignore[arg-type]
+        ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="invalid")  # type: ignore[arg-type]


 def test_setting_service_tier_request() -> None:
    """Test setting service tier defined at request level."""
    message = HumanMessage(content="Welcome to the Groqetship")
-    chat = ChatGroq(model=MODEL_NAME)
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME)

    response = chat.invoke(
        [message],
@@ -537,7 +540,7 @@ def test_setting_service_tier_request() -> None:

    # If an `invoke` call is made with no service tier, we fall back to the class level
    # setting
-    chat = ChatGroq(model=MODEL_NAME, service_tier="auto")
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="auto")
    response = chat.invoke(
        [message],
    )
@@ -564,7 +567,7 @@ def test_setting_service_tier_request() -> None:

 def test_setting_service_tier_streaming() -> None:
    """Test service tier settings for streaming calls."""
-    chat = ChatGroq(model=MODEL_NAME, service_tier="flex")
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="flex")
    chunks = list(chat.stream("Why is the sky blue?", service_tier="auto"))

    assert chunks[-1].response_metadata.get("service_tier") == "auto"
@@ -572,7 +575,7 @@ def test_setting_service_tier_streaming() -> None:

 async def test_setting_service_tier_request_async() -> None:
    """Test async setting of service tier at the request level."""
-    chat = ChatGroq(model=MODEL_NAME, service_tier="flex")
+    chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="flex")
    response = await chat.ainvoke("Hello!", service_tier="on_demand")

    assert response.response_metadata.get("service_tier") == "on_demand"