From 9d14a5e06d98355e5c0eccd0736b961fbe419f87 Mon Sep 17 00:00:00 2001 From: Mason Daugherty Date: Sun, 21 Jun 2026 01:40:38 -0400 Subject: [PATCH] feat(groq): add `performance` service tier (#38339) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Groq's API now exposes a fourth service tier, `performance` — their highest tier, providing reliable low latency for the most critical production applications. `ChatGroq.service_tier` only accepted `on_demand`, `flex`, and `auto`, so users who wanted to route requests to the performance tier had no type-safe way to do so. This widens the `service_tier` `Literal` to include `performance` and documents it alongside the existing tiers. The value is passed straight through to the Groq SDK as a constrained enum, so no validation or mapping logic changes were needed. Reference: [Groq service tiers documentation](https://console.groq.com/docs/service-tiers). An integration test case was added to `test_setting_service_tier_class` mirroring the existing per-tier assertions; it exercises a live request and so runs only with a Groq API key. --- libs/partners/groq/langchain_groq/chat_models.py | 6 +++++- .../groq/tests/integration_tests/test_chat_models.py | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/libs/partners/groq/langchain_groq/chat_models.py b/libs/partners/groq/langchain_groq/chat_models.py index 9cfaa31042a..d98eb734610 100644 --- a/libs/partners/groq/langchain_groq/chat_models.py +++ b/libs/partners/groq/langchain_groq/chat_models.py @@ -430,7 +430,9 @@ class ChatGroq(BaseChatModel): max_tokens: int | None = None """Maximum number of tokens to generate.""" - service_tier: Literal["on_demand", "flex", "auto"] = Field(default="on_demand") + service_tier: Literal["on_demand", "flex", "auto", "performance"] = Field( + default="on_demand" + ) """Optional parameter that you can include to specify the service tier you'd like to use for requests. @@ -440,6 +442,8 @@ class ChatGroq(BaseChatModel): reliability for workloads that don't require guaranteed processing. - `'auto'`: Uses on-demand rate limits, then falls back to `'flex'` if those limits are exceeded + - `'performance'`: Highest tier, providing reliable low latency for the most + critical production applications. See the [Groq documentation](https://console.groq.com/docs/flex-processing) for more details and a list of service tiers and descriptions. diff --git a/libs/partners/groq/tests/integration_tests/test_chat_models.py b/libs/partners/groq/tests/integration_tests/test_chat_models.py index bd43972b1f3..e1cba270728 100644 --- a/libs/partners/groq/tests/integration_tests/test_chat_models.py +++ b/libs/partners/groq/tests/integration_tests/test_chat_models.py @@ -574,6 +574,9 @@ def test_setting_service_tier_class() -> None: response = chat.invoke([message]) assert response.response_metadata.get("service_tier") == "on_demand" + chat = ChatGroq(model=DEFAULT_MODEL_NAME, service_tier="performance") + assert chat.service_tier == "performance" + chat = ChatGroq(model=DEFAULT_MODEL_NAME) assert chat.service_tier == "on_demand" response = chat.invoke([message])