From 8e8177487003026f25198068a137270fe369f2d4 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Mon, 22 Jun 2026 12:00:17 -0400 Subject: [PATCH] test(groq): xfail flaky tool choice test (#38365) Groq's standard integration suite already treats several tool-calling checks as flaky because provider behavior is inconsistent. The forced `tool_choice` check now hits the same provider-side `tool_use_failed` 400 on generic prompts, so the Groq-specific suite marks that case as expected flaky instead of failing scheduled integration runs. ## Changes - Add a Groq-specific `test_tool_choice` override that retries and xfails the shared standard test. - Keep the rest of the Groq tool-calling coverage unchanged, including the existing xfail/retry behavior for related standard tests. --- libs/partners/groq/tests/integration_tests/test_standard.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libs/partners/groq/tests/integration_tests/test_standard.py b/libs/partners/groq/tests/integration_tests/test_standard.py index c9b75352b1f..ead21235f42 100644 --- a/libs/partners/groq/tests/integration_tests/test_standard.py +++ b/libs/partners/groq/tests/integration_tests/test_standard.py @@ -39,6 +39,11 @@ class TestGroq(ChatModelIntegrationTests): def test_tool_calling(self, model: BaseChatModel) -> None: super().test_tool_calling(model) + @pytest.mark.xfail(reason="Retry flaky tool choice behavior") + @pytest.mark.retry(count=3, delay=1) + def test_tool_choice(self, model: BaseChatModel) -> None: + super().test_tool_choice(model) + @pytest.mark.xfail(reason="Retry flaky tool calling behavior") @pytest.mark.retry(count=3, delay=1) async def test_tool_calling_async(self, model: BaseChatModel) -> None: