feat(openrouter): surface cost and cost_details in response_metadata (#35461)

## Description

OpenRouter returns `cost` and `cost_details` in its API response `usage`
object, providing the actual cost of each API call. Currently,
`_create_usage_metadata()` only extracts token counts and drops these
cost fields.

This PR surfaces both `cost` and `cost_details` in `response_metadata`
for both non-streaming and streaming paths, allowing users to access
actual API costs directly from the response without manual estimation
from token counts.

**Example response from OpenRouter:**
```json
{
  "usage": {
    "prompt_tokens": 100,
    "completion_tokens": 50,
    "cost": 0.000075,
    "cost_details": {
      "upstream_inference_cost": 0.00007745,
      "upstream_inference_prompt_cost": 0.00000895,
      "upstream_inference_completions_cost": 0.0000685
    }
  }
}
```

**After this change:**
```python
result = chat.invoke("hello")
result.response_metadata["cost"]          # 0.000075
result.response_metadata["cost_details"]  # {...}
```

## Changes

- **`_create_chat_result`**: Surface `cost` and `cost_details` from
`token_usage` into `response_metadata` (non-streaming)
- **`_convert_chunk_to_message_chunk`**: Same for streaming
`AIMessageChunk`
- Added `PLR0912` to `noqa` comments (new branches pushed count over
threshold)
- Added two unit tests: one verifying cost fields are present when
returned, one verifying they're absent when not in usage

## Issue

N/A — discovered while integrating OpenRouter in a production pipeline.
The cost data is already returned by the API but was being silently
dropped.

## Dependencies

None.

## Twitter handle

@hamza_kyamanywa

---------

Co-authored-by: Mason Daugherty <mason@langchain.dev>
This commit is contained in:
Hamza Kyamanywa
2026-03-02 08:47:19 +09:00
committed by GitHub
parent 21b64e56fe
commit f84b534248
2 changed files with 165 additions and 5 deletions

View File

@@ -489,7 +489,12 @@ class ChatOpenRouter(BaseChatModel):
if generation_info:
generation_info["model_provider"] = "openrouter"
message_chunk = message_chunk.model_copy(
update={"response_metadata": generation_info}
update={
"response_metadata": {
**message_chunk.response_metadata,
**generation_info,
}
}
)
default_chunk_class = message_chunk.__class__
@@ -558,7 +563,12 @@ class ChatOpenRouter(BaseChatModel):
if generation_info:
generation_info["model_provider"] = "openrouter"
message_chunk = message_chunk.model_copy(
update={"response_metadata": generation_info}
update={
"response_metadata": {
**message_chunk.response_metadata,
**generation_info,
}
}
)
default_chunk_class = message_chunk.__class__
@@ -623,7 +633,7 @@ class ChatOpenRouter(BaseChatModel):
message_dicts = [_convert_message_to_dict(m) for m in messages]
return message_dicts, params
def _create_chat_result(self, response: Any) -> ChatResult: # noqa: C901
def _create_chat_result(self, response: Any) -> ChatResult: # noqa: C901, PLR0912
"""Create a `ChatResult` from an OpenRouter SDK response."""
if not isinstance(response, dict):
response = response.model_dump(by_alias=True)
@@ -655,6 +665,13 @@ class ChatOpenRouter(BaseChatModel):
message = _convert_dict_to_message(res["message"])
if token_usage and isinstance(message, AIMessage):
message.usage_metadata = _create_usage_metadata(token_usage)
# Surface OpenRouter cost data in response_metadata
if "cost" in token_usage:
message.response_metadata["cost"] = token_usage["cost"]
if "cost_details" in token_usage:
message.response_metadata["cost_details"] = token_usage[
"cost_details"
]
if isinstance(message, AIMessage):
if system_fingerprint:
message.response_metadata["system_fingerprint"] = system_fingerprint
@@ -1160,7 +1177,7 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage: # noqa:
return ChatMessage(content=_dict.get("content", ""), role=role)
def _convert_chunk_to_message_chunk( # noqa: C901, PLR0911
def _convert_chunk_to_message_chunk( # noqa: C901, PLR0911, PLR0912
chunk: Mapping[str, Any], default_class: type[BaseMessageChunk]
) -> BaseMessageChunk:
"""Convert a streaming chunk dict to a LangChain message chunk.
@@ -1205,14 +1222,20 @@ def _convert_chunk_to_message_chunk( # noqa: C901, PLR0911
if reasoning_details := _dict.get("reasoning_details"):
additional_kwargs["reasoning_details"] = reasoning_details
usage_metadata = None
response_metadata: dict[str, Any] = {"model_provider": "openrouter"}
if usage := chunk.get("usage"):
usage_metadata = _create_usage_metadata(usage)
# Surface OpenRouter cost data in response_metadata
if "cost" in usage:
response_metadata["cost"] = usage["cost"]
if "cost_details" in usage:
response_metadata["cost_details"] = usage["cost_details"]
return AIMessageChunk(
content=content,
additional_kwargs=additional_kwargs,
tool_call_chunks=tool_call_chunks, # type: ignore[arg-type]
usage_metadata=usage_metadata, # type: ignore[arg-type]
response_metadata={"model_provider": "openrouter"},
response_metadata=response_metadata,
)
if role == "system" or default_class == SystemMessageChunk:
return SystemMessageChunk(content=content)

View File

@@ -1261,6 +1261,143 @@ class TestCreateChatResult:
assert isinstance(msg, AIMessage)
assert msg.response_metadata["native_finish_reason"] == "end_turn"
def test_cost_in_response_metadata(self) -> None:
"""Test that OpenRouter cost data is surfaced in response_metadata."""
model = _make_model()
response: dict[str, Any] = {
**_SIMPLE_RESPONSE_DICT,
"usage": {
**_SIMPLE_RESPONSE_DICT["usage"],
"cost": 7.5e-05,
"cost_details": {
"upstream_inference_cost": 7.745e-05,
"upstream_inference_prompt_cost": 8.95e-06,
"upstream_inference_completions_cost": 6.85e-05,
},
},
}
result = model._create_chat_result(response)
msg = result.generations[0].message
assert isinstance(msg, AIMessage)
assert msg.response_metadata["cost"] == 7.5e-05
assert msg.response_metadata["cost_details"] == {
"upstream_inference_cost": 7.745e-05,
"upstream_inference_prompt_cost": 8.95e-06,
"upstream_inference_completions_cost": 6.85e-05,
}
def test_cost_absent_when_not_in_usage(self) -> None:
"""Test that cost fields are not added when not present in usage."""
model = _make_model()
result = model._create_chat_result(_SIMPLE_RESPONSE_DICT)
msg = result.generations[0].message
assert isinstance(msg, AIMessage)
assert "cost" not in msg.response_metadata
assert "cost_details" not in msg.response_metadata
def test_stream_cost_survives_final_chunk(self) -> None:
"""Test that cost fields are preserved on the final streaming chunk.
The final chunk carries both finish_reason metadata and usage/cost data.
Regression test: generation_info must merge into response_metadata, not
replace it, so cost fields set by _convert_chunk_to_message_chunk are
not lost.
"""
model = _make_model()
model.client = MagicMock()
cost_details = {
"upstream_inference_cost": 7.745e-05,
"upstream_inference_prompt_cost": 8.95e-06,
"upstream_inference_completions_cost": 6.85e-05,
}
stream_chunks: list[dict[str, Any]] = [
{
"choices": [
{"delta": {"role": "assistant", "content": "Hi"}, "index": 0}
],
},
{
"choices": [
{
"delta": {},
"finish_reason": "stop",
"index": 0,
}
],
"model": "openai/gpt-4o-mini",
"id": "gen-cost-stream",
"usage": {
"prompt_tokens": 10,
"completion_tokens": 5,
"total_tokens": 15,
"cost": 7.5e-05,
"cost_details": cost_details,
},
},
]
model.client.chat.send.return_value = _MockSyncStream(stream_chunks)
chunks = list(model.stream("Hello"))
final = [
c for c in chunks if c.response_metadata.get("finish_reason") == "stop"
]
assert len(final) == 1
meta = final[0].response_metadata
assert meta["cost"] == 7.5e-05
assert meta["cost_details"] == cost_details
assert meta["finish_reason"] == "stop"
async def test_astream_cost_survives_final_chunk(self) -> None:
"""Test that cost fields are preserved on the final async streaming chunk.
Same regression coverage as the sync test above, for the _astream path.
"""
model = _make_model()
model.client = MagicMock()
cost_details = {
"upstream_inference_cost": 7.745e-05,
"upstream_inference_prompt_cost": 8.95e-06,
"upstream_inference_completions_cost": 6.85e-05,
}
stream_chunks: list[dict[str, Any]] = [
{
"choices": [
{"delta": {"role": "assistant", "content": "Hi"}, "index": 0}
],
},
{
"choices": [
{
"delta": {},
"finish_reason": "stop",
"index": 0,
}
],
"model": "openai/gpt-4o-mini",
"id": "gen-cost-astream",
"usage": {
"prompt_tokens": 10,
"completion_tokens": 5,
"total_tokens": 15,
"cost": 7.5e-05,
"cost_details": cost_details,
},
},
]
model.client.chat.send_async = AsyncMock(
return_value=_MockAsyncStream(stream_chunks)
)
chunks = [c async for c in model.astream("Hello")]
final = [
c for c in chunks if c.response_metadata.get("finish_reason") == "stop"
]
assert len(final) == 1
meta = final[0].response_metadata
assert meta["cost"] == 7.5e-05
assert meta["cost_details"] == cost_details
assert meta["finish_reason"] == "stop"
def test_missing_optional_metadata_excluded(self) -> None:
"""Test that absent optional fields are not added to response_metadata."""
model = _make_model()