From 419c17322547f9d2bc7b133dee86b577677991b5 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Tue, 5 Aug 2025 14:18:56 -0400 Subject: [PATCH] feat(groq): openai-oss (#32411) use new openai-oss for integration tests, set module-level testing model names and improve robustness of tool tests --- .../integration_tests/test_chat_models.py | 67 ++++++++++--------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/libs/partners/groq/tests/integration_tests/test_chat_models.py b/libs/partners/groq/tests/integration_tests/test_chat_models.py index fd4ad486637..ec63ab96071 100644 --- a/libs/partners/groq/tests/integration_tests/test_chat_models.py +++ b/libs/partners/groq/tests/integration_tests/test_chat_models.py @@ -24,7 +24,8 @@ from tests.unit_tests.fake.callbacks import ( FakeCallbackHandlerWithChatStart, ) -MODEL_NAME = "llama-3.3-70b-versatile" +DEFAULT_MODEL_NAME = "openai/gpt-oss-20b" +REASONING_MODEL_NAME = "deepseek-r1-distill-llama-70b" # @@ -34,7 +35,7 @@ MODEL_NAME = "llama-3.3-70b-versatile" def test_invoke() -> None: """Test Chat wrapper.""" chat = ChatGroq( - model=MODEL_NAME, + model=DEFAULT_MODEL_NAME, temperature=0.7, base_url=None, groq_proxy=None, @@ -55,7 +56,7 @@ def test_invoke() -> None: @pytest.mark.scheduled async def test_ainvoke() -> None: """Test ainvoke tokens from ChatGroq.""" - chat = ChatGroq(model=MODEL_NAME, max_tokens=10) + chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10) result = await chat.ainvoke("Welcome to the Groqetship!", config={"tags": ["foo"]}) assert isinstance(result, BaseMessage) @@ -65,7 +66,7 @@ async def test_ainvoke() -> None: @pytest.mark.scheduled def test_batch() -> None: """Test batch tokens from ChatGroq.""" - chat = ChatGroq(model=MODEL_NAME, max_tokens=10) + chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10) result = chat.batch(["Hello!", "Welcome to the Groqetship!"]) for token in result: @@ -76,7 +77,7 @@ def test_batch() -> None: @pytest.mark.scheduled async def test_abatch() -> None: """Test abatch tokens from ChatGroq.""" - chat = ChatGroq(model=MODEL_NAME, max_tokens=10) + chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10) result = await chat.abatch(["Hello!", "Welcome to the Groqetship!"]) for token in result: @@ -87,7 +88,7 @@ async def test_abatch() -> None: @pytest.mark.scheduled async def test_stream() -> None: """Test streaming tokens from Groq.""" - chat = ChatGroq(model=MODEL_NAME, max_tokens=10) + chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10) for token in chat.stream("Welcome to the Groqetship!"): assert isinstance(token, BaseMessageChunk) @@ -97,7 +98,7 @@ async def test_stream() -> None: @pytest.mark.scheduled async def test_astream() -> None: """Test streaming tokens from Groq.""" - chat = ChatGroq(model=MODEL_NAME, max_tokens=10) + chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10) full: Optional[BaseMessageChunk] = None chunks_with_token_counts = 0 @@ -136,7 +137,7 @@ async def test_astream() -> None: def test_generate() -> None: """Test sync generate.""" n = 1 - chat = ChatGroq(model=MODEL_NAME, max_tokens=10) + chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10) message = HumanMessage(content="Hello", n=1) response = chat.generate([[message], [message]]) assert isinstance(response, LLMResult) @@ -155,7 +156,7 @@ def test_generate() -> None: async def test_agenerate() -> None: """Test async generation.""" n = 1 - chat = ChatGroq(model=MODEL_NAME, max_tokens=10, n=1) + chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10, n=1) message = HumanMessage(content="Hello") response = await chat.agenerate([[message], [message]]) assert isinstance(response, LLMResult) @@ -178,7 +179,7 @@ def test_invoke_streaming() -> None: """Test that streaming correctly invokes on_llm_new_token callback.""" callback_handler = FakeCallbackHandler() chat = ChatGroq( - model=MODEL_NAME, + model=DEFAULT_MODEL_NAME, max_tokens=2, streaming=True, temperature=0, @@ -195,7 +196,7 @@ async def test_agenerate_streaming() -> None: """Test that streaming correctly invokes on_llm_new_token callback.""" callback_handler = FakeCallbackHandlerWithChatStart() chat = ChatGroq( - model=MODEL_NAME, + model=DEFAULT_MODEL_NAME, max_tokens=10, streaming=True, temperature=0, @@ -222,7 +223,7 @@ async def test_agenerate_streaming() -> None: def test_reasoning_output_invoke() -> None: """Test reasoning output from ChatGroq with invoke.""" chat = ChatGroq( - model="deepseek-r1-distill-llama-70b", + model=REASONING_MODEL_NAME, reasoning_format="parsed", ) message = [ @@ -241,7 +242,7 @@ def test_reasoning_output_invoke() -> None: def test_reasoning_output_stream() -> None: """Test reasoning output from ChatGroq with stream.""" chat = ChatGroq( - model="deepseek-r1-distill-llama-70b", + model=REASONING_MODEL_NAME, reasoning_format="parsed", ) message = [ @@ -300,7 +301,7 @@ def test_streaming_generation_info() -> None: callback = _FakeCallback() chat = ChatGroq( - model=MODEL_NAME, + model="llama-3.1-8b-instant", # Use a model that properly streams content max_tokens=2, temperature=0, callbacks=[callback], @@ -314,7 +315,7 @@ def test_streaming_generation_info() -> None: def test_system_message() -> None: """Test ChatGroq wrapper with system message.""" - chat = ChatGroq(model=MODEL_NAME, max_tokens=10) + chat = ChatGroq(model=DEFAULT_MODEL_NAME, max_tokens=10) system_message = SystemMessage(content="You are to chat with the user.") human_message = HumanMessage(content="Hello") response = chat.invoke([system_message, human_message]) @@ -324,7 +325,7 @@ def test_system_message() -> None: def test_tool_choice() -> None: """Test that tool choice is respected.""" - llm = ChatGroq(model=MODEL_NAME) + llm = ChatGroq(model=DEFAULT_MODEL_NAME) class MyTool(BaseModel): name: str @@ -332,7 +333,7 @@ def test_tool_choice() -> None: with_tool = llm.bind_tools([MyTool], tool_choice="MyTool") - resp = with_tool.invoke("Who was the 27 year old named Erick?") + resp = with_tool.invoke("Who was the 27 year old named Erick? Use the tool.") assert isinstance(resp, AIMessage) assert resp.content == "" # should just be tool call tool_calls = resp.additional_kwargs["tool_calls"] @@ -354,7 +355,7 @@ def test_tool_choice() -> None: def test_tool_choice_bool() -> None: """Test that tool choice is respected just passing in True.""" - llm = ChatGroq(model=MODEL_NAME) + llm = ChatGroq(model=DEFAULT_MODEL_NAME) class MyTool(BaseModel): name: str @@ -362,7 +363,7 @@ def test_tool_choice_bool() -> None: with_tool = llm.bind_tools([MyTool], tool_choice=True) - resp = with_tool.invoke("Who was the 27 year old named Erick?") + resp = with_tool.invoke("Who was the 27 year old named Erick? Use the tool.") assert isinstance(resp, AIMessage) assert resp.content == "" # should just be tool call tool_calls = resp.additional_kwargs["tool_calls"] @@ -379,7 +380,7 @@ def test_tool_choice_bool() -> None: @pytest.mark.xfail(reason="Groq tool_choice doesn't currently force a tool call") def test_streaming_tool_call() -> None: """Test that tool choice is respected.""" - llm = ChatGroq(model=MODEL_NAME) + llm = ChatGroq(model=DEFAULT_MODEL_NAME) class MyTool(BaseModel): name: str @@ -417,7 +418,7 @@ def test_streaming_tool_call() -> None: @pytest.mark.xfail(reason="Groq tool_choice doesn't currently force a tool call") async def test_astreaming_tool_call() -> None: """Test that tool choice is respected.""" - llm = ChatGroq(model=MODEL_NAME) + llm = ChatGroq(model=DEFAULT_MODEL_NAME) class MyTool(BaseModel): name: str @@ -462,7 +463,9 @@ def test_json_mode_structured_output() -> None: setup: str = Field(description="question to set up a joke") punchline: str = Field(description="answer to resolve the joke") - chat = ChatGroq(model=MODEL_NAME).with_structured_output(Joke, method="json_mode") + chat = ChatGroq(model=DEFAULT_MODEL_NAME).with_structured_output( + Joke, method="json_mode" + ) result = chat.invoke( "Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys" ) @@ -476,38 +479,38 @@ def test_setting_service_tier_class() -> None: message = HumanMessage(content="Welcome to the Groqetship") # Initialization - chat = ChatGroq(model=MODEL_NAME, service_tier="auto") + chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="auto") assert chat.service_tier == "auto" response = chat.invoke([message]) assert isinstance(response, BaseMessage) assert isinstance(response.content, str) assert response.response_metadata.get("service_tier") == "auto" - chat = ChatGroq(model=MODEL_NAME, service_tier="flex") + chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="flex") assert chat.service_tier == "flex" response = chat.invoke([message]) assert response.response_metadata.get("service_tier") == "flex" - chat = ChatGroq(model=MODEL_NAME, service_tier="on_demand") + chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="on_demand") assert chat.service_tier == "on_demand" response = chat.invoke([message]) assert response.response_metadata.get("service_tier") == "on_demand" - chat = ChatGroq(model=MODEL_NAME) + chat = ChatGroq(model=DEFAULT_MODEL_NAME) assert chat.service_tier == "on_demand" response = chat.invoke([message]) assert response.response_metadata.get("service_tier") == "on_demand" with pytest.raises(ValueError): - ChatGroq(model=MODEL_NAME, service_tier=None) # type: ignore[arg-type] + ChatGroq(model=DEFAULT_MODEL_NAME, service_tier=None) # type: ignore[arg-type] with pytest.raises(ValueError): - ChatGroq(model=MODEL_NAME, service_tier="invalid") # type: ignore[arg-type] + ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="invalid") # type: ignore[arg-type] def test_setting_service_tier_request() -> None: """Test setting service tier defined at request level.""" message = HumanMessage(content="Welcome to the Groqetship") - chat = ChatGroq(model=MODEL_NAME) + chat = ChatGroq(model=DEFAULT_MODEL_NAME) response = chat.invoke( [message], @@ -537,7 +540,7 @@ def test_setting_service_tier_request() -> None: # If an `invoke` call is made with no service tier, we fall back to the class level # setting - chat = ChatGroq(model=MODEL_NAME, service_tier="auto") + chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="auto") response = chat.invoke( [message], ) @@ -564,7 +567,7 @@ def test_setting_service_tier_request() -> None: def test_setting_service_tier_streaming() -> None: """Test service tier settings for streaming calls.""" - chat = ChatGroq(model=MODEL_NAME, service_tier="flex") + chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="flex") chunks = list(chat.stream("Why is the sky blue?", service_tier="auto")) assert chunks[-1].response_metadata.get("service_tier") == "auto" @@ -572,7 +575,7 @@ def test_setting_service_tier_streaming() -> None: async def test_setting_service_tier_request_async() -> None: """Test async setting of service tier at the request level.""" - chat = ChatGroq(model=MODEL_NAME, service_tier="flex") + chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="flex") response = await chat.ainvoke("Hello!", service_tier="on_demand") assert response.response_metadata.get("service_tier") == "on_demand"