mirror of
https://github.com/hwchase17/langchain.git
synced 2026-03-18 02:53:16 +00:00
feat(openrouter): surface cost and cost_details in response_metadata (#35461)
## Description
OpenRouter returns `cost` and `cost_details` in its API response `usage`
object, providing the actual cost of each API call. Currently,
`_create_usage_metadata()` only extracts token counts and drops these
cost fields.
This PR surfaces both `cost` and `cost_details` in `response_metadata`
for both non-streaming and streaming paths, allowing users to access
actual API costs directly from the response without manual estimation
from token counts.
**Example response from OpenRouter:**
```json
{
"usage": {
"prompt_tokens": 100,
"completion_tokens": 50,
"cost": 0.000075,
"cost_details": {
"upstream_inference_cost": 0.00007745,
"upstream_inference_prompt_cost": 0.00000895,
"upstream_inference_completions_cost": 0.0000685
}
}
}
```
**After this change:**
```python
result = chat.invoke("hello")
result.response_metadata["cost"] # 0.000075
result.response_metadata["cost_details"] # {...}
```
## Changes
- **`_create_chat_result`**: Surface `cost` and `cost_details` from
`token_usage` into `response_metadata` (non-streaming)
- **`_convert_chunk_to_message_chunk`**: Same for streaming
`AIMessageChunk`
- Added `PLR0912` to `noqa` comments (new branches pushed count over
threshold)
- Added two unit tests: one verifying cost fields are present when
returned, one verifying they're absent when not in usage
## Issue
N/A — discovered while integrating OpenRouter in a production pipeline.
The cost data is already returned by the API but was being silently
dropped.
## Dependencies
None.
## Twitter handle
@hamza_kyamanywa
---------
Co-authored-by: Mason Daugherty <mason@langchain.dev>
This commit is contained in:
@@ -489,7 +489,12 @@ class ChatOpenRouter(BaseChatModel):
|
||||
if generation_info:
|
||||
generation_info["model_provider"] = "openrouter"
|
||||
message_chunk = message_chunk.model_copy(
|
||||
update={"response_metadata": generation_info}
|
||||
update={
|
||||
"response_metadata": {
|
||||
**message_chunk.response_metadata,
|
||||
**generation_info,
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
default_chunk_class = message_chunk.__class__
|
||||
@@ -558,7 +563,12 @@ class ChatOpenRouter(BaseChatModel):
|
||||
if generation_info:
|
||||
generation_info["model_provider"] = "openrouter"
|
||||
message_chunk = message_chunk.model_copy(
|
||||
update={"response_metadata": generation_info}
|
||||
update={
|
||||
"response_metadata": {
|
||||
**message_chunk.response_metadata,
|
||||
**generation_info,
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
default_chunk_class = message_chunk.__class__
|
||||
@@ -623,7 +633,7 @@ class ChatOpenRouter(BaseChatModel):
|
||||
message_dicts = [_convert_message_to_dict(m) for m in messages]
|
||||
return message_dicts, params
|
||||
|
||||
def _create_chat_result(self, response: Any) -> ChatResult: # noqa: C901
|
||||
def _create_chat_result(self, response: Any) -> ChatResult: # noqa: C901, PLR0912
|
||||
"""Create a `ChatResult` from an OpenRouter SDK response."""
|
||||
if not isinstance(response, dict):
|
||||
response = response.model_dump(by_alias=True)
|
||||
@@ -655,6 +665,13 @@ class ChatOpenRouter(BaseChatModel):
|
||||
message = _convert_dict_to_message(res["message"])
|
||||
if token_usage and isinstance(message, AIMessage):
|
||||
message.usage_metadata = _create_usage_metadata(token_usage)
|
||||
# Surface OpenRouter cost data in response_metadata
|
||||
if "cost" in token_usage:
|
||||
message.response_metadata["cost"] = token_usage["cost"]
|
||||
if "cost_details" in token_usage:
|
||||
message.response_metadata["cost_details"] = token_usage[
|
||||
"cost_details"
|
||||
]
|
||||
if isinstance(message, AIMessage):
|
||||
if system_fingerprint:
|
||||
message.response_metadata["system_fingerprint"] = system_fingerprint
|
||||
@@ -1160,7 +1177,7 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage: # noqa:
|
||||
return ChatMessage(content=_dict.get("content", ""), role=role)
|
||||
|
||||
|
||||
def _convert_chunk_to_message_chunk( # noqa: C901, PLR0911
|
||||
def _convert_chunk_to_message_chunk( # noqa: C901, PLR0911, PLR0912
|
||||
chunk: Mapping[str, Any], default_class: type[BaseMessageChunk]
|
||||
) -> BaseMessageChunk:
|
||||
"""Convert a streaming chunk dict to a LangChain message chunk.
|
||||
@@ -1205,14 +1222,20 @@ def _convert_chunk_to_message_chunk( # noqa: C901, PLR0911
|
||||
if reasoning_details := _dict.get("reasoning_details"):
|
||||
additional_kwargs["reasoning_details"] = reasoning_details
|
||||
usage_metadata = None
|
||||
response_metadata: dict[str, Any] = {"model_provider": "openrouter"}
|
||||
if usage := chunk.get("usage"):
|
||||
usage_metadata = _create_usage_metadata(usage)
|
||||
# Surface OpenRouter cost data in response_metadata
|
||||
if "cost" in usage:
|
||||
response_metadata["cost"] = usage["cost"]
|
||||
if "cost_details" in usage:
|
||||
response_metadata["cost_details"] = usage["cost_details"]
|
||||
return AIMessageChunk(
|
||||
content=content,
|
||||
additional_kwargs=additional_kwargs,
|
||||
tool_call_chunks=tool_call_chunks, # type: ignore[arg-type]
|
||||
usage_metadata=usage_metadata, # type: ignore[arg-type]
|
||||
response_metadata={"model_provider": "openrouter"},
|
||||
response_metadata=response_metadata,
|
||||
)
|
||||
if role == "system" or default_class == SystemMessageChunk:
|
||||
return SystemMessageChunk(content=content)
|
||||
|
||||
@@ -1261,6 +1261,143 @@ class TestCreateChatResult:
|
||||
assert isinstance(msg, AIMessage)
|
||||
assert msg.response_metadata["native_finish_reason"] == "end_turn"
|
||||
|
||||
def test_cost_in_response_metadata(self) -> None:
|
||||
"""Test that OpenRouter cost data is surfaced in response_metadata."""
|
||||
model = _make_model()
|
||||
response: dict[str, Any] = {
|
||||
**_SIMPLE_RESPONSE_DICT,
|
||||
"usage": {
|
||||
**_SIMPLE_RESPONSE_DICT["usage"],
|
||||
"cost": 7.5e-05,
|
||||
"cost_details": {
|
||||
"upstream_inference_cost": 7.745e-05,
|
||||
"upstream_inference_prompt_cost": 8.95e-06,
|
||||
"upstream_inference_completions_cost": 6.85e-05,
|
||||
},
|
||||
},
|
||||
}
|
||||
result = model._create_chat_result(response)
|
||||
msg = result.generations[0].message
|
||||
assert isinstance(msg, AIMessage)
|
||||
assert msg.response_metadata["cost"] == 7.5e-05
|
||||
assert msg.response_metadata["cost_details"] == {
|
||||
"upstream_inference_cost": 7.745e-05,
|
||||
"upstream_inference_prompt_cost": 8.95e-06,
|
||||
"upstream_inference_completions_cost": 6.85e-05,
|
||||
}
|
||||
|
||||
def test_cost_absent_when_not_in_usage(self) -> None:
|
||||
"""Test that cost fields are not added when not present in usage."""
|
||||
model = _make_model()
|
||||
result = model._create_chat_result(_SIMPLE_RESPONSE_DICT)
|
||||
msg = result.generations[0].message
|
||||
assert isinstance(msg, AIMessage)
|
||||
assert "cost" not in msg.response_metadata
|
||||
assert "cost_details" not in msg.response_metadata
|
||||
|
||||
def test_stream_cost_survives_final_chunk(self) -> None:
|
||||
"""Test that cost fields are preserved on the final streaming chunk.
|
||||
|
||||
The final chunk carries both finish_reason metadata and usage/cost data.
|
||||
Regression test: generation_info must merge into response_metadata, not
|
||||
replace it, so cost fields set by _convert_chunk_to_message_chunk are
|
||||
not lost.
|
||||
"""
|
||||
model = _make_model()
|
||||
model.client = MagicMock()
|
||||
cost_details = {
|
||||
"upstream_inference_cost": 7.745e-05,
|
||||
"upstream_inference_prompt_cost": 8.95e-06,
|
||||
"upstream_inference_completions_cost": 6.85e-05,
|
||||
}
|
||||
stream_chunks: list[dict[str, Any]] = [
|
||||
{
|
||||
"choices": [
|
||||
{"delta": {"role": "assistant", "content": "Hi"}, "index": 0}
|
||||
],
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
}
|
||||
],
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"id": "gen-cost-stream",
|
||||
"usage": {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": 5,
|
||||
"total_tokens": 15,
|
||||
"cost": 7.5e-05,
|
||||
"cost_details": cost_details,
|
||||
},
|
||||
},
|
||||
]
|
||||
model.client.chat.send.return_value = _MockSyncStream(stream_chunks)
|
||||
|
||||
chunks = list(model.stream("Hello"))
|
||||
final = [
|
||||
c for c in chunks if c.response_metadata.get("finish_reason") == "stop"
|
||||
]
|
||||
assert len(final) == 1
|
||||
meta = final[0].response_metadata
|
||||
assert meta["cost"] == 7.5e-05
|
||||
assert meta["cost_details"] == cost_details
|
||||
assert meta["finish_reason"] == "stop"
|
||||
|
||||
async def test_astream_cost_survives_final_chunk(self) -> None:
|
||||
"""Test that cost fields are preserved on the final async streaming chunk.
|
||||
|
||||
Same regression coverage as the sync test above, for the _astream path.
|
||||
"""
|
||||
model = _make_model()
|
||||
model.client = MagicMock()
|
||||
cost_details = {
|
||||
"upstream_inference_cost": 7.745e-05,
|
||||
"upstream_inference_prompt_cost": 8.95e-06,
|
||||
"upstream_inference_completions_cost": 6.85e-05,
|
||||
}
|
||||
stream_chunks: list[dict[str, Any]] = [
|
||||
{
|
||||
"choices": [
|
||||
{"delta": {"role": "assistant", "content": "Hi"}, "index": 0}
|
||||
],
|
||||
},
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"delta": {},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
}
|
||||
],
|
||||
"model": "openai/gpt-4o-mini",
|
||||
"id": "gen-cost-astream",
|
||||
"usage": {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": 5,
|
||||
"total_tokens": 15,
|
||||
"cost": 7.5e-05,
|
||||
"cost_details": cost_details,
|
||||
},
|
||||
},
|
||||
]
|
||||
model.client.chat.send_async = AsyncMock(
|
||||
return_value=_MockAsyncStream(stream_chunks)
|
||||
)
|
||||
|
||||
chunks = [c async for c in model.astream("Hello")]
|
||||
final = [
|
||||
c for c in chunks if c.response_metadata.get("finish_reason") == "stop"
|
||||
]
|
||||
assert len(final) == 1
|
||||
meta = final[0].response_metadata
|
||||
assert meta["cost"] == 7.5e-05
|
||||
assert meta["cost_details"] == cost_details
|
||||
assert meta["finish_reason"] == "stop"
|
||||
|
||||
def test_missing_optional_metadata_excluded(self) -> None:
|
||||
"""Test that absent optional fields are not added to response_metadata."""
|
||||
model = _make_model()
|
||||
|
||||
Reference in New Issue
Block a user