Files
langchain/libs/partners/groq/langchain_groq/chat_models.py
Mason Daugherty 24f922bb19 chore(groq): remove Kimi K2 from docstring (#36773)
No longer available
2026-04-15 17:49:48 -06:00

1621 lines
63 KiB
Python

"""Groq Chat wrapper."""
from __future__ import annotations
import json
import warnings
from collections.abc import AsyncIterator, Callable, Iterator, Mapping, Sequence
from operator import itemgetter
from typing import Any, Literal, cast
from langchain_core.callbacks import (
AsyncCallbackManagerForLLMRun,
CallbackManagerForLLMRun,
)
from langchain_core.language_models import (
LanguageModelInput,
ModelProfile,
ModelProfileRegistry,
)
from langchain_core.language_models.chat_models import (
BaseChatModel,
LangSmithParams,
agenerate_from_stream,
generate_from_stream,
)
from langchain_core.messages import (
AIMessage,
AIMessageChunk,
BaseMessage,
BaseMessageChunk,
ChatMessage,
ChatMessageChunk,
FunctionMessage,
FunctionMessageChunk,
HumanMessage,
HumanMessageChunk,
InvalidToolCall,
SystemMessage,
SystemMessageChunk,
ToolCall,
ToolMessage,
ToolMessageChunk,
is_data_content_block,
)
from langchain_core.messages.ai import (
InputTokenDetails,
OutputTokenDetails,
UsageMetadata,
)
from langchain_core.messages.block_translators.openai import (
convert_to_openai_data_block,
)
from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser
from langchain_core.output_parsers.base import OutputParserLike
from langchain_core.output_parsers.openai_tools import (
JsonOutputKeyToolsParser,
PydanticToolsParser,
make_invalid_tool_call,
parse_tool_call,
)
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough
from langchain_core.tools import BaseTool
from langchain_core.utils import from_env, get_pydantic_field_names, secret_from_env
from langchain_core.utils.function_calling import (
convert_to_json_schema,
convert_to_openai_tool,
)
from langchain_core.utils.pydantic import is_basemodel_subclass
from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator
from typing_extensions import Self
from langchain_groq._compat import _convert_from_v1_to_groq
from langchain_groq.data._profiles import _PROFILES
from langchain_groq.version import __version__
_MODEL_PROFILES = cast("ModelProfileRegistry", _PROFILES)
_STRICT_STRUCTURED_OUTPUT_MODELS = frozenset(
{
"openai/gpt-oss-20b",
"openai/gpt-oss-120b",
}
)
def _get_default_model_profile(model_name: str) -> ModelProfile:
default = _MODEL_PROFILES.get(model_name) or {}
return default.copy()
class ChatGroq(BaseChatModel):
r"""Groq Chat large language models API.
To use, you should have the
environment variable `GROQ_API_KEY` set with your API key.
Any parameters that are valid to be passed to the groq.create call
can be passed in, even if not explicitly saved on this class.
Setup:
Install `langchain-groq` and set environment variable
`GROQ_API_KEY`.
```bash
pip install -U langchain-groq
export GROQ_API_KEY="your-api-key"
```
Key init args — completion params:
model:
Name of Groq model to use, e.g. `llama-3.1-8b-instant`.
temperature:
Sampling temperature. Ranges from `0.0` to `1.0`.
max_tokens:
Max number of tokens to generate.
reasoning_format:
The format for reasoning output. Groq will default to `raw` if left
undefined.
- `'parsed'`: Separates reasoning into a dedicated field while keeping the
response concise. Reasoning will be returned in the
`additional_kwargs.reasoning_content` field of the response.
- `'raw'`: Includes reasoning within think tags (e.g.
`<think>{reasoning_content}</think>`).
- `'hidden'`: Returns only the final answer content. Note: this only
suppresses reasoning content in the response; the model will still perform
reasoning unless overridden in `reasoning_effort`.
See the [Groq documentation](https://console.groq.com/docs/reasoning#reasoning)
for more details and a list of supported models.
model_kwargs:
Holds any model parameters valid for create call not
explicitly specified.
Key init args — client params:
timeout:
Timeout for requests.
max_retries:
Max number of retries.
api_key:
Groq API key. If not passed in will be read from env var `GROQ_API_KEY`.
base_url:
Base URL path for API requests, leave blank if not using a proxy
or service emulator.
custom_get_token_ids:
Optional encoder to use for counting tokens.
See full list of supported init args and their descriptions in the params
section.
Instantiate:
```python
from langchain_groq import ChatGroq
model = ChatGroq(
model="llama-3.1-8b-instant",
temperature=0.0,
max_retries=2,
# other params...
)
```
Invoke:
```python
messages = [
("system", "You are a helpful translator. Translate the user sentence to French."),
("human", "I love programming."),
]
model.invoke(messages)
```
```python
AIMessage(content='The English sentence "I love programming" can
be translated to French as "J\'aime programmer". The word
"programming" is translated as "programmer" in French.',
response_metadata={'token_usage': {'completion_tokens': 38,
'prompt_tokens': 28, 'total_tokens': 66, 'completion_time':
0.057975474, 'prompt_time': 0.005366091, 'queue_time': None,
'total_time': 0.063341565}, 'model_name': 'llama-3.1-8b-instant',
'system_fingerprint': 'fp_c5f20b5bb1', 'finish_reason': 'stop',
'logprobs': None}, id='run-ecc71d70-e10c-4b69-8b8c-b8027d95d4b8-0')
```
Vision:
```python
from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage
model = ChatGroq(model="meta-llama/llama-4-scout-17b-16e-instruct")
message = HumanMessage(
content=[
{"type": "text", "text": "Describe this image in detail"},
{"type": "image_url", "image_url": {"url": "example_url.jpg"}},
]
)
response = model.invoke([message])
print(response.content)
```
See [Groq model docs](https://console.groq.com/docs/vision#supported-models)
for the latest available vision models.
Maximum image size: 20MB per request.
Stream:
```python
# Streaming `text` for each content chunk received
for chunk in model.stream(messages):
print(chunk.text, end="")
```
```python
content='' id='run-4e9f926b-73f5-483b-8ef5-09533d925853'
content='The' id='run-4e9f926b-73f5-483b-8ef5-09533d925853'
content=' English' id='run-4e9f926b-73f5-483b-8ef5-09533d925853'
content=' sentence' id='run-4e9f926b-73f5-483b-8ef5-09533d925853'
...
content=' program' id='run-4e9f926b-73f5-483b-8ef5-09533d925853'
content='".' id='run-4e9f926b-73f5-483b-8ef5-09533d925853'
content='' response_metadata={'finish_reason': 'stop'}
id='run-4e9f926b-73f5-483b-8ef5-09533d925853
```
```python
# Reconstructing a full response
stream = model.stream(messages)
full = next(stream)
for chunk in stream:
full += chunk
full
```
```python
AIMessageChunk(content='The English sentence "I love programming"
can be translated to French as "J\'aime programmer". Here\'s the
breakdown of the sentence: "J\'aime" is the French equivalent of "
I love", and "programmer" is the French infinitive for "to program".
So, the literal translation is "I love to program". However, in
English we often omit the "to" when talking about activities we
love, and the same applies to French. Therefore, "J\'aime
programmer" is the correct and natural way to express "I love
programming" in French.', response_metadata={'finish_reason':
'stop'}, id='run-a3c35ac4-0750-4d08-ac55-bfc63805de76')
```
Async:
```python
await model.ainvoke(messages)
```
```python
AIMessage(content='The English sentence "I love programming" can
be translated to French as "J\'aime programmer". The word
"programming" is translated as "programmer" in French. I hope
this helps! Let me know if you have any other questions.',
response_metadata={'token_usage': {'completion_tokens': 53,
'prompt_tokens': 28, 'total_tokens': 81, 'completion_time':
0.083623752, 'prompt_time': 0.007365126, 'queue_time': None,
'total_time': 0.090988878}, 'model_name': 'llama-3.1-8b-instant',
'system_fingerprint': 'fp_c5f20b5bb1', 'finish_reason': 'stop',
'logprobs': None}, id='run-897f3391-1bea-42e2-82e0-686e2367bcf8-0')
```
Tool calling:
```python
from pydantic import BaseModel, Field
class GetWeather(BaseModel):
'''Get the current weather in a given location'''
location: str = Field(..., description="The city and state, e.g. San Francisco, CA")
class GetPopulation(BaseModel):
'''Get the current population in a given location'''
location: str = Field(..., description="The city and state, e.g. San Francisco, CA")
model_with_tools = model.bind_tools([GetWeather, GetPopulation])
ai_msg = model_with_tools.invoke("What is the population of NY?")
ai_msg.tool_calls
```
```python
[
{
"name": "GetPopulation",
"args": {"location": "NY"},
"id": "call_bb8d",
}
]
```
See `ChatGroq.bind_tools()` method for more.
Structured output:
```python
from typing import Optional
from pydantic import BaseModel, Field
class Joke(BaseModel):
'''Joke to tell user.'''
setup: str = Field(description="The setup of the joke")
punchline: str = Field(description="The punchline to the joke")
rating: int | None = Field(description="How funny the joke is, from 1 to 10")
structured_model = model.with_structured_output(Joke)
structured_model.invoke("Tell me a joke about cats")
```
```python
Joke(
setup="Why don't cats play poker in the jungle?",
punchline="Too many cheetahs!",
rating=None,
)
```
See `ChatGroq.with_structured_output()` for more.
Response metadata:
```python
ai_msg = model.invoke(messages)
ai_msg.response_metadata
```
```python
{
"token_usage": {
"completion_tokens": 70,
"prompt_tokens": 28,
"total_tokens": 98,
"completion_time": 0.111956391,
"prompt_time": 0.007518279,
"queue_time": None,
"total_time": 0.11947467,
},
"model_name": "llama-3.1-8b-instant",
"system_fingerprint": "fp_c5f20b5bb1",
"finish_reason": "stop",
"logprobs": None,
}
```
""" # noqa: E501
client: Any = Field(default=None, exclude=True)
async_client: Any = Field(default=None, exclude=True)
model_name: str = Field(alias="model")
"""Model name to use."""
@property
def model(self) -> str:
"""Same as model_name."""
return self.model_name
temperature: float = 0.7
"""What sampling temperature to use."""
stop: list[str] | str | None = Field(default=None, alias="stop_sequences")
"""Default stop sequences."""
reasoning_format: Literal["parsed", "raw", "hidden"] | None = Field(default=None)
"""The format for reasoning output. Groq will default to raw if left undefined.
- `'parsed'`: Separates reasoning into a dedicated field while keeping the
response concise. Reasoning will be returned in the
`additional_kwargs.reasoning_content` field of the response.
- `'raw'`: Includes reasoning within think tags (e.g.
`<think>{reasoning_content}</think>`).
- `'hidden'`: Returns only the final answer content. Note: this only suppresses
reasoning content in the response; the model will still perform reasoning unless
overridden in `reasoning_effort`.
See the [Groq documentation](https://console.groq.com/docs/reasoning#reasoning)
for more details and a list of supported models.
"""
reasoning_effort: str | None = Field(default=None)
"""The level of effort the model will put into reasoning. Groq will default to
enabling reasoning if left undefined.
See the [Groq documentation](https://console.groq.com/docs/reasoning#options-for-reasoning-effort)
for more details and a list of options and models that support setting a reasoning
effort.
"""
model_kwargs: dict[str, Any] = Field(default_factory=dict)
"""Holds any model parameters valid for `create` call not explicitly specified."""
groq_api_key: SecretStr | None = Field(
alias="api_key", default_factory=secret_from_env("GROQ_API_KEY", default=None)
)
"""Automatically inferred from env var `GROQ_API_KEY` if not provided."""
groq_api_base: str | None = Field(
alias="base_url", default_factory=from_env("GROQ_API_BASE", default=None)
)
"""Base URL path for API requests. Leave blank if not using a proxy or service
emulator.
"""
# to support explicit proxy for Groq
groq_proxy: str | None = Field(default_factory=from_env("GROQ_PROXY", default=None))
request_timeout: float | tuple[float, float] | Any | None = Field(
default=None, alias="timeout"
)
"""Timeout for requests to Groq completion API. Can be float, `httpx.Timeout` or
`None`.
"""
max_retries: int = 2
"""Maximum number of retries to make when generating."""
streaming: bool = False
"""Whether to stream the results or not."""
n: int = 1
"""Number of chat completions to generate for each prompt."""
max_tokens: int | None = None
"""Maximum number of tokens to generate."""
service_tier: Literal["on_demand", "flex", "auto"] = Field(default="on_demand")
"""Optional parameter that you can include to specify the service tier you'd like to
use for requests.
- `'on_demand'`: Default.
- `'flex'`: On-demand processing when capacity is available, with rapid timeouts
if resources are constrained. Provides balance between performance and
reliability for workloads that don't require guaranteed processing.
- `'auto'`: Uses on-demand rate limits, then falls back to `'flex'` if those
limits are exceeded
See the [Groq documentation](https://console.groq.com/docs/flex-processing) for more
details and a list of service tiers and descriptions.
"""
default_headers: Mapping[str, str] | None = None
default_query: Mapping[str, object] | None = None
# Configure a custom httpx client. See the
# [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
http_client: Any | None = None
"""Optional `httpx.Client`."""
http_async_client: Any | None = None
"""Optional `httpx.AsyncClient`.
Only used for async invocations. Must specify `http_client` as well if you'd like a
custom client for sync invocations.
"""
model_config = ConfigDict(
populate_by_name=True,
)
@model_validator(mode="before")
@classmethod
def build_extra(cls, values: dict[str, Any]) -> Any:
"""Build extra kwargs from additional params that were passed in."""
all_required_field_names = get_pydantic_field_names(cls)
extra = values.get("model_kwargs", {})
for field_name in list(values):
if field_name in extra:
msg = f"Found {field_name} supplied twice."
raise ValueError(msg)
if field_name not in all_required_field_names:
warnings.warn(
f"""WARNING! {field_name} is not default parameter.
{field_name} was transferred to model_kwargs.
Please confirm that {field_name} is what you intended.""",
stacklevel=2,
)
extra[field_name] = values.pop(field_name)
invalid_model_kwargs = all_required_field_names.intersection(extra.keys())
if invalid_model_kwargs:
msg = (
f"Parameters {invalid_model_kwargs} should be specified explicitly. "
f"Instead they were passed in as part of `model_kwargs` parameter."
)
raise ValueError(msg)
values["model_kwargs"] = extra
return values
@model_validator(mode="after")
def validate_environment(self) -> Self:
"""Validate that api key and python package exists in environment."""
if self.n < 1:
msg = "n must be at least 1."
raise ValueError(msg)
if self.n > 1 and self.streaming:
msg = "n must be 1 when streaming."
raise ValueError(msg)
if self.temperature == 0:
self.temperature = 1e-8
default_headers = {"User-Agent": f"langchain/{__version__}"} | dict(
self.default_headers or {}
)
client_params: dict[str, Any] = {
"api_key": (
self.groq_api_key.get_secret_value() if self.groq_api_key else None
),
"base_url": self.groq_api_base,
"timeout": self.request_timeout,
"max_retries": self.max_retries,
"default_headers": default_headers,
"default_query": self.default_query,
}
try:
import groq # noqa: PLC0415
sync_specific: dict[str, Any] = {"http_client": self.http_client}
if not self.client:
self.client = groq.Groq(
**client_params, **sync_specific
).chat.completions
if not self.async_client:
async_specific: dict[str, Any] = {"http_client": self.http_async_client}
self.async_client = groq.AsyncGroq(
**client_params, **async_specific
).chat.completions
except ImportError as exc:
msg = (
"Could not import groq python package. "
"Please install it with `pip install groq`."
)
raise ImportError(msg) from exc
return self
def _resolve_model_profile(self) -> ModelProfile | None:
return _get_default_model_profile(self.model_name) or None
#
# Serializable class method overrides
#
@property
def lc_secrets(self) -> dict[str, str]:
"""Mapping of secret environment variables."""
return {"groq_api_key": "GROQ_API_KEY"}
@classmethod
def is_lc_serializable(cls) -> bool:
"""Return whether this model can be serialized by LangChain."""
return True
#
# BaseChatModel method overrides
#
@property
def _llm_type(self) -> str:
"""Return type of model."""
return "groq-chat"
def _get_ls_params(
self, stop: list[str] | None = None, **kwargs: Any
) -> LangSmithParams:
"""Get standard params for tracing."""
params = self._get_invocation_params(stop=stop, **kwargs)
ls_params = LangSmithParams(
ls_provider="groq",
ls_model_name=params.get("model", self.model_name),
ls_model_type="chat",
ls_temperature=params.get("temperature", self.temperature),
)
if ls_max_tokens := params.get("max_tokens", self.max_tokens):
ls_params["ls_max_tokens"] = ls_max_tokens
if ls_stop := stop or params.get("stop", None) or self.stop:
ls_params["ls_stop"] = ls_stop if isinstance(ls_stop, list) else [ls_stop]
return ls_params
def _should_stream(
self,
*,
async_api: bool,
run_manager: CallbackManagerForLLMRun
| AsyncCallbackManagerForLLMRun
| None = None,
**kwargs: Any,
) -> bool:
"""Determine if a given model call should hit the streaming API."""
base_should_stream = super()._should_stream(
async_api=async_api, run_manager=run_manager, **kwargs
)
if base_should_stream and ("response_format" in kwargs):
# Streaming not supported in JSON mode or structured outputs.
response_format = kwargs["response_format"]
if isinstance(response_format, dict) and response_format.get("type") in {
"json_schema",
"json_object",
}:
return False
return base_should_stream
def _generate(
self,
messages: list[BaseMessage],
stop: list[str] | None = None,
run_manager: CallbackManagerForLLMRun | None = None,
**kwargs: Any,
) -> ChatResult:
if self.streaming:
stream_iter = self._stream(
messages, stop=stop, run_manager=run_manager, **kwargs
)
return generate_from_stream(stream_iter)
message_dicts, params = self._create_message_dicts(messages, stop)
params = {
**params,
**kwargs,
}
response = self.client.create(messages=message_dicts, **params)
return self._create_chat_result(response, params)
async def _agenerate(
self,
messages: list[BaseMessage],
stop: list[str] | None = None,
run_manager: AsyncCallbackManagerForLLMRun | None = None,
**kwargs: Any,
) -> ChatResult:
if self.streaming:
stream_iter = self._astream(
messages, stop=stop, run_manager=run_manager, **kwargs
)
return await agenerate_from_stream(stream_iter)
message_dicts, params = self._create_message_dicts(messages, stop)
params = {
**params,
**kwargs,
}
response = await self.async_client.create(messages=message_dicts, **params)
return self._create_chat_result(response, params)
def _stream(
self,
messages: list[BaseMessage],
stop: list[str] | None = None,
run_manager: CallbackManagerForLLMRun | None = None,
**kwargs: Any,
) -> Iterator[ChatGenerationChunk]:
message_dicts, params = self._create_message_dicts(messages, stop)
params = {**params, **kwargs, "stream": True}
default_chunk_class: type[BaseMessageChunk] = AIMessageChunk
for chunk in self.client.create(messages=message_dicts, **params):
if not isinstance(chunk, dict):
chunk = chunk.model_dump() # noqa: PLW2901
if len(chunk["choices"]) == 0:
continue
choice = chunk["choices"][0]
message_chunk = _convert_chunk_to_message_chunk(chunk, default_chunk_class)
generation_info = {}
if finish_reason := choice.get("finish_reason"):
generation_info["finish_reason"] = finish_reason
generation_info["model_name"] = self.model_name
if system_fingerprint := chunk.get("system_fingerprint"):
generation_info["system_fingerprint"] = system_fingerprint
service_tier = params.get("service_tier") or self.service_tier
generation_info["service_tier"] = service_tier
reasoning_effort = (
params.get("reasoning_effort") or self.reasoning_effort
)
if reasoning_effort:
generation_info["reasoning_effort"] = reasoning_effort
logprobs = choice.get("logprobs")
if logprobs:
generation_info["logprobs"] = logprobs
if generation_info:
message_chunk = message_chunk.model_copy(
update={"response_metadata": generation_info}
)
default_chunk_class = message_chunk.__class__
generation_chunk = ChatGenerationChunk(
message=message_chunk, generation_info=generation_info or None
)
if run_manager:
run_manager.on_llm_new_token(
generation_chunk.text, chunk=generation_chunk, logprobs=logprobs
)
yield generation_chunk
async def _astream(
self,
messages: list[BaseMessage],
stop: list[str] | None = None,
run_manager: AsyncCallbackManagerForLLMRun | None = None,
**kwargs: Any,
) -> AsyncIterator[ChatGenerationChunk]:
message_dicts, params = self._create_message_dicts(messages, stop)
params = {**params, **kwargs, "stream": True}
default_chunk_class: type[BaseMessageChunk] = AIMessageChunk
async for chunk in await self.async_client.create(
messages=message_dicts, **params
):
if not isinstance(chunk, dict):
chunk = chunk.model_dump() # noqa: PLW2901
if len(chunk["choices"]) == 0:
continue
choice = chunk["choices"][0]
message_chunk = _convert_chunk_to_message_chunk(chunk, default_chunk_class)
generation_info = {}
if finish_reason := choice.get("finish_reason"):
generation_info["finish_reason"] = finish_reason
generation_info["model_name"] = self.model_name
if system_fingerprint := chunk.get("system_fingerprint"):
generation_info["system_fingerprint"] = system_fingerprint
service_tier = params.get("service_tier") or self.service_tier
generation_info["service_tier"] = service_tier
reasoning_effort = (
params.get("reasoning_effort") or self.reasoning_effort
)
if reasoning_effort:
generation_info["reasoning_effort"] = reasoning_effort
logprobs = choice.get("logprobs")
if logprobs:
generation_info["logprobs"] = logprobs
if generation_info:
message_chunk = message_chunk.model_copy(
update={"response_metadata": generation_info}
)
default_chunk_class = message_chunk.__class__
generation_chunk = ChatGenerationChunk(
message=message_chunk, generation_info=generation_info or None
)
if run_manager:
await run_manager.on_llm_new_token(
token=generation_chunk.text,
chunk=generation_chunk,
logprobs=logprobs,
)
yield generation_chunk
#
# Internal methods
#
@property
def _default_params(self) -> dict[str, Any]:
"""Get the default parameters for calling Groq API."""
params = {
"model": self.model_name,
"stream": self.streaming,
"n": self.n,
"temperature": self.temperature,
"stop": self.stop,
"reasoning_format": self.reasoning_format,
"reasoning_effort": self.reasoning_effort,
"service_tier": self.service_tier,
**self.model_kwargs,
}
if self.max_tokens is not None:
params["max_tokens"] = self.max_tokens
return params
def _create_chat_result(
self, response: dict | BaseModel, params: dict
) -> ChatResult:
generations = []
if not isinstance(response, dict):
response = response.model_dump()
token_usage = response.get("usage", {})
for res in response["choices"]:
message = _convert_dict_to_message(res["message"])
if token_usage and isinstance(message, AIMessage):
message.usage_metadata = _create_usage_metadata(token_usage)
generation_info = {"finish_reason": res.get("finish_reason")}
if "logprobs" in res:
generation_info["logprobs"] = res["logprobs"]
gen = ChatGeneration(
message=message,
generation_info=generation_info,
)
generations.append(gen)
llm_output = {
"token_usage": token_usage,
"model_name": self.model_name,
"system_fingerprint": response.get("system_fingerprint", ""),
}
llm_output["service_tier"] = params.get("service_tier") or self.service_tier
reasoning_effort = params.get("reasoning_effort") or self.reasoning_effort
if reasoning_effort:
llm_output["reasoning_effort"] = reasoning_effort
return ChatResult(generations=generations, llm_output=llm_output)
def _create_message_dicts(
self, messages: list[BaseMessage], stop: list[str] | None
) -> tuple[list[dict[str, Any]], dict[str, Any]]:
params = self._default_params
if stop is not None:
params["stop"] = stop
message_dicts = [_convert_message_to_dict(m) for m in messages]
return message_dicts, params
def _combine_llm_outputs(self, llm_outputs: list[dict | None]) -> dict:
overall_token_usage: dict = {}
system_fingerprint = None
for output in llm_outputs:
if output is None:
# Happens in streaming
continue
token_usage = output["token_usage"]
if token_usage is not None:
for k, v in token_usage.items():
if k in overall_token_usage and v is not None:
# Handle nested dictionaries
if isinstance(v, dict):
if k not in overall_token_usage:
overall_token_usage[k] = {}
for nested_k, nested_v in v.items():
if (
nested_k in overall_token_usage[k]
and nested_v is not None
):
overall_token_usage[k][nested_k] += nested_v
else:
overall_token_usage[k][nested_k] = nested_v
else:
overall_token_usage[k] += v
else:
overall_token_usage[k] = v
if system_fingerprint is None:
system_fingerprint = output.get("system_fingerprint")
combined = {"token_usage": overall_token_usage, "model_name": self.model_name}
if system_fingerprint:
combined["system_fingerprint"] = system_fingerprint
if self.service_tier:
combined["service_tier"] = self.service_tier
return combined
def bind_tools(
self,
tools: Sequence[dict[str, Any] | type[BaseModel] | Callable | BaseTool],
*,
tool_choice: dict | str | bool | None = None,
**kwargs: Any,
) -> Runnable[LanguageModelInput, AIMessage]:
"""Bind tool-like objects to this chat model.
Args:
tools: A list of tool definitions to bind to this chat model.
Supports any tool definition handled by [`convert_to_openai_tool`][langchain_core.utils.function_calling.convert_to_openai_tool].
tool_choice: Which tool to require the model to call.
Must be the name of the single provided function,
`'auto'` to automatically determine which function to call
with the option to not call any function, `'any'` to enforce that some
function is called, or a dict of the form:
`{"type": "function", "function": {"name": <<tool_name>>}}`.
**kwargs: Any additional parameters to pass to the
`langchain.runnable.Runnable` constructor.
""" # noqa: E501
# strict tool-calling not supported by Groq
_ = kwargs.pop("strict", None)
formatted_tools = [convert_to_openai_tool(tool) for tool in tools]
if tool_choice is not None and tool_choice:
if tool_choice == "any":
tool_choice = "required"
if isinstance(tool_choice, str) and (
tool_choice not in ("auto", "none", "required")
):
tool_choice = {"type": "function", "function": {"name": tool_choice}}
if isinstance(tool_choice, bool):
if len(tools) > 1:
msg = (
"tool_choice can only be True when there is one tool. Received "
f"{len(tools)} tools."
)
raise ValueError(msg)
tool_name = formatted_tools[0]["function"]["name"]
tool_choice = {
"type": "function",
"function": {"name": tool_name},
}
kwargs["tool_choice"] = tool_choice
return super().bind(tools=formatted_tools, **kwargs)
def with_structured_output(
self,
schema: dict | type[BaseModel] | None = None,
*,
method: Literal[
"function_calling", "json_mode", "json_schema"
] = "function_calling",
include_raw: bool = False,
strict: bool | None = None,
**kwargs: Any,
) -> Runnable[LanguageModelInput, dict | BaseModel]:
r"""Model wrapper that returns outputs formatted to match the given schema.
Args:
schema: The output schema. Can be passed in as:
- An OpenAI function/tool schema,
- A JSON Schema,
- A `TypedDict` class,
- Or a Pydantic class.
If `schema` is a Pydantic class then the model output will be a
Pydantic instance of that class, and the model-generated fields will be
validated by the Pydantic class. Otherwise the model output will be a
dict and will not be validated.
See `langchain_core.utils.function_calling.convert_to_openai_tool` for
more on how to properly specify types and descriptions of schema fields
when specifying a Pydantic or `TypedDict` class.
!!! warning "Behavior changed in `langchain-groq` 0.3.8"
Added support for Groq's dedicated structured output feature via
`method="json_schema"`.
method: The method for steering model generation, one of:
- `'function_calling'`:
Uses Groq's tool-calling [API](https://console.groq.com/docs/tool-use)
- `'json_schema'`:
Uses Groq's [Structured Output API](https://console.groq.com/docs/structured-outputs).
Supported for a subset of models. See [docs](https://console.groq.com/docs/structured-outputs)
for details.
- `'json_mode'`:
Uses Groq's [JSON mode](https://console.groq.com/docs/structured-outputs#json-object-mode).
Note that if using JSON mode then you must include instructions for
formatting the output into the desired schema into the model call
Learn more about the differences between the methods and which models
support which methods [here](https://console.groq.com/docs/structured-outputs).
method:
The method for steering model generation, either `'function_calling'`
or `'json_mode'`. If `'function_calling'` then the schema will be converted
to an OpenAI function and the returned model will make use of the
function-calling API. If `'json_mode'` then JSON mode will be used.
!!! note
If using `'json_mode'` then you must include instructions for formatting
the output into the desired schema into the model call. (either via the
prompt itself or in the system message/prompt/instructions).
!!! warning
`'json_mode'` does not support streaming responses stop sequences.
include_raw:
If `False` then only the parsed structured output is returned.
If an error occurs during model output parsing it will be raised.
If `True` then both the raw model response (a `BaseMessage`) and the
parsed model response will be returned.
If an error occurs during output parsing it will be caught and returned
as well.
The final output is always a `dict` with keys `'raw'`, `'parsed'`, and
`'parsing_error'`.
strict:
Only used with `method="json_schema"`. When `True`, Groq's Structured
Output API uses constrained decoding to guarantee schema compliance.
This requires every object to set `additionalProperties: false` and
all properties to be listed in `required`. When `False`, schema
adherence is best-effort. If `None`, the argument is omitted.
Strict mode is only supported for `openai/gpt-oss-20b` and
`openai/gpt-oss-120b`. For other models, `strict=True` is ignored.
kwargs:
Any additional parameters to pass to the `langchain.runnable.Runnable`
constructor.
Returns:
A `Runnable` that takes same inputs as a
`langchain_core.language_models.chat.BaseChatModel`. If `include_raw` is
`False` and `schema` is a Pydantic class, `Runnable` outputs an instance
of `schema` (i.e., a Pydantic object). Otherwise, if `include_raw` is
`False` then `Runnable` outputs a `dict`.
If `include_raw` is `True`, then `Runnable` outputs a `dict` with keys:
- `'raw'`: `BaseMessage`
- `'parsed'`: `None` if there was a parsing error, otherwise the type
depends on the `schema` as described above.
- `'parsing_error'`: `BaseException | None`
Example: schema=Pydantic class, method="function_calling", include_raw=False:
```python
from typing import Optional
from langchain_groq import ChatGroq
from pydantic import BaseModel, Field
class AnswerWithJustification(BaseModel):
'''An answer to the user question along with justification for the answer.'''
answer: str
# If we provide default values and/or descriptions for fields, these will be passed
# to the model. This is an important part of improving a model's ability to
# correctly return structured outputs.
justification: str | None = Field(default=None, description="A justification for the answer.")
model = ChatGroq(model="openai/gpt-oss-120b", temperature=0)
structured_model = model.with_structured_output(AnswerWithJustification)
structured_model.invoke("What weighs more a pound of bricks or a pound of feathers")
# -> AnswerWithJustification(
# answer='They weigh the same',
# justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
# )
```
Example: schema=Pydantic class, method="function_calling", include_raw=True:
```python
from langchain_groq import ChatGroq
from pydantic import BaseModel
class AnswerWithJustification(BaseModel):
'''An answer to the user question along with justification for the answer.'''
answer: str
justification: str
model = ChatGroq(model="openai/gpt-oss-120b", temperature=0)
structured_model = model.with_structured_output(
AnswerWithJustification,
include_raw=True,
)
structured_model.invoke("What weighs more a pound of bricks or a pound of feathers")
# -> {
# 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}),
# 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'),
# 'parsing_error': None
# }
```
Example: schema=TypedDict class, method="function_calling", include_raw=False:
```python
from typing_extensions import Annotated, TypedDict
from langchain_groq import ChatGroq
class AnswerWithJustification(TypedDict):
'''An answer to the user question along with justification for the answer.'''
answer: str
justification: Annotated[str | None, None, "A justification for the answer."]
model = ChatGroq(model="openai/gpt-oss-120b", temperature=0)
structured_model = model.with_structured_output(AnswerWithJustification)
structured_model.invoke("What weighs more a pound of bricks or a pound of feathers")
# -> {
# 'answer': 'They weigh the same',
# 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
# }
```
Example: schema=OpenAI function schema, method="function_calling", include_raw=False:
```python
from langchain_groq import ChatGroq
oai_schema = {
'name': 'AnswerWithJustification',
'description': 'An answer to the user question along with justification for the answer.',
'parameters': {
'type': 'object',
'properties': {
'answer': {'type': 'string'},
'justification': {'description': 'A justification for the answer.', 'type': 'string'}
},
'required': ['answer']
}
model = ChatGroq(model="openai/gpt-oss-120b", temperature=0)
structured_model = model.with_structured_output(oai_schema)
structured_model.invoke(
"What weighs more a pound of bricks or a pound of feathers"
)
# -> {
# 'answer': 'They weigh the same',
# 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.'
# }
```
Example: schema=Pydantic class, method="json_schema", include_raw=False:
```python
from typing import Optional
from langchain_groq import ChatGroq
from pydantic import BaseModel, Field
class AnswerWithJustification(BaseModel):
'''An answer to the user question along with justification for the answer.'''
answer: str
# If we provide default values and/or descriptions for fields, these will be passed
# to the model. This is an important part of improving a model's ability to
# correctly return structured outputs.
justification: str | None = Field(default=None, description="A justification for the answer.")
model = ChatGroq(model="openai/gpt-oss-120b", temperature=0)
structured_model = model.with_structured_output(
AnswerWithJustification,
method="json_schema",
)
structured_model.invoke("What weighs more a pound of bricks or a pound of feathers")
# -> AnswerWithJustification(
# answer='They weigh the same',
# justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'
# )
```
Example: schema=Pydantic class, method="json_mode", include_raw=True:
```python
from langchain_groq import ChatGroq
from pydantic import BaseModel
class AnswerWithJustification(BaseModel):
answer: str
justification: str
model = ChatGroq(model="openai/gpt-oss-120b", temperature=0)
structured_model = model.with_structured_output(
AnswerWithJustification, method="json_mode", include_raw=True
)
structured_model.invoke(
"Answer the following question. "
"Make sure to return a JSON blob with keys 'answer' and 'justification'.\n\n"
"What's heavier a pound of bricks or a pound of feathers?"
)
# -> {
# 'raw': AIMessage(content='{\n "answer": "They are both the same weight.",\n "justification": "Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight." \n}'),
# 'parsed': AnswerWithJustification(answer='They are both the same weight.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The difference lies in the volume and density of the materials, not the weight.'),
# 'parsing_error': None
# }
```
""" # noqa: E501
is_pydantic_schema = _is_pydantic_class(schema)
if method == "function_calling":
if schema is None:
msg = (
"schema must be specified when method is 'function_calling'. "
"Received None."
)
raise ValueError(msg)
formatted_tool = convert_to_openai_tool(schema)
tool_name = formatted_tool["function"]["name"]
llm = self.bind_tools(
[schema],
tool_choice=tool_name,
ls_structured_output_format={
"kwargs": {"method": "function_calling"},
"schema": formatted_tool,
},
**kwargs,
)
if is_pydantic_schema:
output_parser: OutputParserLike = PydanticToolsParser(
tools=[schema], # type: ignore[list-item]
first_tool_only=True, # type: ignore[list-item]
)
else:
output_parser = JsonOutputKeyToolsParser(
key_name=tool_name, first_tool_only=True
)
elif method == "json_schema":
# Use structured outputs (json_schema) for models that support it
# Convert schema to JSON Schema format for structured outputs
if schema is None:
msg = (
"schema must be specified when method is 'json_schema'. "
"Received None."
)
raise ValueError(msg)
if (
strict is True
and self.model_name not in _STRICT_STRUCTURED_OUTPUT_MODELS
):
# Ignore unsupported strict=True to preserve backward compatibility.
strict = None
json_schema = convert_to_json_schema(schema, strict=strict)
schema_name = json_schema.get("title", "")
response_format: dict[str, Any] = {
"type": "json_schema",
"json_schema": {"name": schema_name, "schema": json_schema},
}
if strict is not None:
response_format["json_schema"]["strict"] = strict
ls_format_kwargs: dict[str, Any] = {"method": "json_schema"}
if strict is not None:
ls_format_kwargs["strict"] = strict
ls_format_info = {
"kwargs": ls_format_kwargs,
"schema": json_schema,
}
llm = self.bind(
response_format=response_format,
ls_structured_output_format=ls_format_info,
**kwargs,
)
output_parser = (
PydanticOutputParser(pydantic_object=schema) # type: ignore[type-var, arg-type]
if is_pydantic_schema
else JsonOutputParser()
)
elif method == "json_mode":
llm = self.bind(
response_format={"type": "json_object"},
ls_structured_output_format={
"kwargs": {"method": "json_mode"},
"schema": schema,
},
**kwargs,
)
output_parser = (
PydanticOutputParser(pydantic_object=schema) # type: ignore[type-var, arg-type]
if is_pydantic_schema
else JsonOutputParser()
)
else:
msg = (
"Unrecognized method argument. Expected one of "
"'function_calling', 'json_mode', or 'json_schema'. "
f"Received: '{method}'"
)
raise ValueError(msg)
if include_raw:
parser_assign = RunnablePassthrough.assign(
parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None
)
parser_none = RunnablePassthrough.assign(parsed=lambda _: None)
parser_with_fallback = parser_assign.with_fallbacks(
[parser_none], exception_key="parsing_error"
)
return RunnableMap(raw=llm) | parser_with_fallback
return llm | output_parser
def _is_pydantic_class(obj: Any) -> bool:
return isinstance(obj, type) and is_basemodel_subclass(obj)
#
# Type conversion helpers
#
def _format_message_content(content: Any) -> Any:
"""Format message content for Groq API.
Converts LangChain image content blocks to Groq's expected image_url format.
Args:
content: The message content (string or list of content blocks).
Returns:
Formatted content suitable for Groq API.
"""
if content and isinstance(content, list):
formatted: list = []
for block in content:
# Handle LangChain standard data content blocks (image, audio, file)
if isinstance(block, dict) and is_data_content_block(block):
formatted.append(convert_to_openai_data_block(block))
else:
formatted.append(block)
return formatted
return content
def _convert_message_to_dict(message: BaseMessage) -> dict:
"""Convert a LangChain message to a dictionary.
Args:
message: The LangChain message.
Returns:
The dictionary.
"""
message_dict: dict[str, Any]
if isinstance(message, ChatMessage):
message_dict = {"role": message.role, "content": message.content}
elif isinstance(message, HumanMessage):
message_dict = {
"role": "user",
"content": _format_message_content(message.content),
}
elif isinstance(message, AIMessage):
# Translate v1 content
if message.response_metadata.get("output_version") == "v1":
new_content, new_additional_kwargs = _convert_from_v1_to_groq(
message.content_blocks, message.response_metadata.get("model_provider")
)
message = message.model_copy(
update={
"content": new_content,
"additional_kwargs": new_additional_kwargs,
}
)
message_dict = {"role": "assistant", "content": message.content}
# If content is a list of content blocks, filter out tool_call blocks
# as Groq API only accepts 'text' type blocks in content
if isinstance(message.content, list):
text_blocks = [
block
for block in message.content
if isinstance(block, dict) and block.get("type") == "text"
]
message_dict["content"] = text_blocks or ""
if "function_call" in message.additional_kwargs:
message_dict["function_call"] = message.additional_kwargs["function_call"]
# If function call only, content is None not empty string
if message_dict["content"] == "":
message_dict["content"] = None
if message.tool_calls or message.invalid_tool_calls:
message_dict["tool_calls"] = [
_lc_tool_call_to_groq_tool_call(tc) for tc in message.tool_calls
] + [
_lc_invalid_tool_call_to_groq_tool_call(tc)
for tc in message.invalid_tool_calls
]
# If tool calls only (no text blocks), content is None not empty string
if message_dict["content"] == "" or (
isinstance(message_dict["content"], list)
and not message_dict["content"]
):
message_dict["content"] = None
elif "tool_calls" in message.additional_kwargs:
message_dict["tool_calls"] = message.additional_kwargs["tool_calls"]
# If tool calls only, content is None not empty string
if message_dict["content"] == "" or (
isinstance(message_dict["content"], list)
and not message_dict["content"]
):
message_dict["content"] = None
elif isinstance(message, SystemMessage):
message_dict = {"role": "system", "content": message.content}
elif isinstance(message, FunctionMessage):
message_dict = {
"role": "function",
"content": message.content,
"name": message.name,
}
elif isinstance(message, ToolMessage):
message_dict = {
"role": "tool",
"content": message.content,
"tool_call_id": message.tool_call_id,
}
else:
msg = f"Got unknown type {message}"
raise TypeError(msg)
if "name" in message.additional_kwargs:
message_dict["name"] = message.additional_kwargs["name"]
return message_dict
def _convert_chunk_to_message_chunk(
chunk: Mapping[str, Any], default_class: type[BaseMessageChunk]
) -> BaseMessageChunk:
choice = chunk["choices"][0]
_dict = choice["delta"]
role = cast("str", _dict.get("role"))
content = cast("str", _dict.get("content") or "")
additional_kwargs: dict = {}
if _dict.get("function_call"):
function_call = dict(_dict["function_call"])
if "name" in function_call and function_call["name"] is None:
function_call["name"] = ""
additional_kwargs["function_call"] = function_call
if _dict.get("tool_calls"):
# Groq sends 'null' (JSON null) for tools with no arguments, but we
# expect '{}' (empty JSON object) to represent empty arguments
tool_calls = _dict["tool_calls"]
for tool_call in tool_calls:
if (
tool_call.get("function")
and tool_call["function"].get("arguments") == "null"
):
tool_call["function"]["arguments"] = "{}"
additional_kwargs["tool_calls"] = tool_calls
if role == "user" or default_class == HumanMessageChunk:
return HumanMessageChunk(content=content)
if role == "assistant" or default_class == AIMessageChunk:
if reasoning := _dict.get("reasoning"):
additional_kwargs["reasoning_content"] = reasoning
if executed_tools := _dict.get("executed_tools"):
additional_kwargs["executed_tools"] = []
for executed_tool in executed_tools:
if executed_tool.get("output"):
# Tool output duplicates query and other server tool call data
additional_kwargs["executed_tools"].append(
{
k: executed_tool[k]
for k in ("index", "output")
if k in executed_tool
}
)
else:
additional_kwargs["executed_tools"].append(
{k: executed_tool[k] for k in executed_tool if k != "output"}
)
if usage := (chunk.get("x_groq") or {}).get("usage"):
usage_metadata = _create_usage_metadata(usage)
else:
usage_metadata = None
return AIMessageChunk(
content=content,
additional_kwargs=additional_kwargs,
usage_metadata=usage_metadata, # type: ignore[arg-type]
response_metadata={"model_provider": "groq"},
)
if role == "system" or default_class == SystemMessageChunk:
return SystemMessageChunk(content=content)
if role == "function" or default_class == FunctionMessageChunk:
return FunctionMessageChunk(content=content, name=_dict["name"])
if role == "tool" or default_class == ToolMessageChunk:
return ToolMessageChunk(content=content, tool_call_id=_dict["tool_call_id"])
if role or default_class == ChatMessageChunk:
return ChatMessageChunk(content=content, role=role)
return default_class(content=content) # type: ignore[call-arg]
def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:
"""Convert a dictionary to a LangChain message.
Args:
_dict: The dictionary.
Returns:
The LangChain message.
"""
id_ = _dict.get("id")
role = _dict.get("role")
if role == "user":
return HumanMessage(content=_dict.get("content", ""))
if role == "assistant":
content = _dict.get("content", "") or ""
additional_kwargs: dict = {}
if reasoning := _dict.get("reasoning"):
additional_kwargs["reasoning_content"] = reasoning
if executed_tools := _dict.get("executed_tools"):
additional_kwargs["executed_tools"] = executed_tools
if function_call := _dict.get("function_call"):
additional_kwargs["function_call"] = dict(function_call)
tool_calls = []
invalid_tool_calls = []
if raw_tool_calls := _dict.get("tool_calls"):
# Groq sends 'null' (JSON null) for tools with no arguments, but we
# expect '{}' (empty JSON object) to represent empty arguments
for raw_tool_call in raw_tool_calls:
if (
raw_tool_call.get("function")
and raw_tool_call["function"].get("arguments") == "null"
):
raw_tool_call["function"]["arguments"] = "{}"
additional_kwargs["tool_calls"] = raw_tool_calls
for raw_tool_call in raw_tool_calls:
try:
tool_calls.append(parse_tool_call(raw_tool_call, return_id=True))
except Exception as e: # pylint: disable=broad-except
invalid_tool_calls.append(
make_invalid_tool_call(raw_tool_call, str(e))
)
return AIMessage(
content=content,
id=id_,
additional_kwargs=additional_kwargs,
tool_calls=tool_calls,
invalid_tool_calls=invalid_tool_calls,
response_metadata={"model_provider": "groq"},
)
if role == "system":
return SystemMessage(content=_dict.get("content", ""))
if role == "function":
return FunctionMessage(content=_dict.get("content", ""), name=_dict.get("name")) # type: ignore[arg-type]
if role == "tool":
additional_kwargs = {}
if "name" in _dict:
additional_kwargs["name"] = _dict["name"]
return ToolMessage(
content=_dict.get("content", ""),
tool_call_id=_dict.get("tool_call_id"),
additional_kwargs=additional_kwargs,
)
return ChatMessage(content=_dict.get("content", ""), role=role) # type: ignore[arg-type]
def _lc_tool_call_to_groq_tool_call(tool_call: ToolCall) -> dict:
return {
"type": "function",
"id": tool_call["id"],
"function": {
"name": tool_call["name"],
"arguments": json.dumps(tool_call["args"], ensure_ascii=False),
},
}
def _lc_invalid_tool_call_to_groq_tool_call(
invalid_tool_call: InvalidToolCall,
) -> dict:
return {
"type": "function",
"id": invalid_tool_call["id"],
"function": {
"name": invalid_tool_call["name"],
"arguments": invalid_tool_call["args"],
},
}
def _create_usage_metadata(groq_token_usage: dict) -> UsageMetadata:
"""Create usage metadata from Groq token usage response.
Args:
groq_token_usage: Token usage dict from Groq API response.
Returns:
Usage metadata dict with input/output token details.
"""
# Support both formats: new Responses API uses "input_tokens",
# Chat Completions API uses "prompt_tokens"
_input = groq_token_usage.get("input_tokens")
input_tokens = (
_input if _input is not None else (groq_token_usage.get("prompt_tokens") or 0)
)
_output = groq_token_usage.get("output_tokens")
output_tokens = (
_output
if _output is not None
else (groq_token_usage.get("completion_tokens") or 0)
)
_total = groq_token_usage.get("total_tokens")
total_tokens = _total if _total is not None else input_tokens + output_tokens
# Support both formats for token details:
# Responses API uses "*_tokens_details", Chat Completions API might use
# "prompt_token_details"
input_details_dict = (
groq_token_usage.get("input_tokens_details")
or groq_token_usage.get("prompt_tokens_details")
or {}
)
output_details_dict = (
groq_token_usage.get("output_tokens_details")
or groq_token_usage.get("completion_tokens_details")
or {}
)
input_token_details: dict = {
"cache_read": input_details_dict.get("cached_tokens"),
}
output_token_details: dict = {
"reasoning": output_details_dict.get("reasoning_tokens"),
}
usage_metadata: UsageMetadata = {
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"total_tokens": total_tokens,
}
if filtered_input := {k: v for k, v in input_token_details.items() if v}:
usage_metadata["input_token_details"] = InputTokenDetails(**filtered_input) # type: ignore[typeddict-item]
if filtered_output := {k: v for k, v in output_token_details.items() if v}:
usage_metadata["output_token_details"] = OutputTokenDetails(**filtered_output) # type: ignore[typeddict-item]
return usage_metadata