From f84b5342486526a2e8b31190eb9117961da62600 Mon Sep 17 00:00:00 2001 From: Hamza Kyamanywa Date: Mon, 2 Mar 2026 08:47:19 +0900 Subject: [PATCH] feat(openrouter): surface `cost` and `cost_details` in `response_metadata` (#35461) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Description OpenRouter returns `cost` and `cost_details` in its API response `usage` object, providing the actual cost of each API call. Currently, `_create_usage_metadata()` only extracts token counts and drops these cost fields. This PR surfaces both `cost` and `cost_details` in `response_metadata` for both non-streaming and streaming paths, allowing users to access actual API costs directly from the response without manual estimation from token counts. **Example response from OpenRouter:** ```json { "usage": { "prompt_tokens": 100, "completion_tokens": 50, "cost": 0.000075, "cost_details": { "upstream_inference_cost": 0.00007745, "upstream_inference_prompt_cost": 0.00000895, "upstream_inference_completions_cost": 0.0000685 } } } ``` **After this change:** ```python result = chat.invoke("hello") result.response_metadata["cost"] # 0.000075 result.response_metadata["cost_details"] # {...} ``` ## Changes - **`_create_chat_result`**: Surface `cost` and `cost_details` from `token_usage` into `response_metadata` (non-streaming) - **`_convert_chunk_to_message_chunk`**: Same for streaming `AIMessageChunk` - Added `PLR0912` to `noqa` comments (new branches pushed count over threshold) - Added two unit tests: one verifying cost fields are present when returned, one verifying they're absent when not in usage ## Issue N/A — discovered while integrating OpenRouter in a production pipeline. The cost data is already returned by the API but was being silently dropped. ## Dependencies None. ## Twitter handle @hamza_kyamanywa --------- Co-authored-by: Mason Daugherty --- .../langchain_openrouter/chat_models.py | 33 ++++- .../tests/unit_tests/test_chat_models.py | 137 ++++++++++++++++++ 2 files changed, 165 insertions(+), 5 deletions(-) diff --git a/libs/partners/openrouter/langchain_openrouter/chat_models.py b/libs/partners/openrouter/langchain_openrouter/chat_models.py index a31c4386940..833cc1a6183 100644 --- a/libs/partners/openrouter/langchain_openrouter/chat_models.py +++ b/libs/partners/openrouter/langchain_openrouter/chat_models.py @@ -489,7 +489,12 @@ class ChatOpenRouter(BaseChatModel): if generation_info: generation_info["model_provider"] = "openrouter" message_chunk = message_chunk.model_copy( - update={"response_metadata": generation_info} + update={ + "response_metadata": { + **message_chunk.response_metadata, + **generation_info, + } + } ) default_chunk_class = message_chunk.__class__ @@ -558,7 +563,12 @@ class ChatOpenRouter(BaseChatModel): if generation_info: generation_info["model_provider"] = "openrouter" message_chunk = message_chunk.model_copy( - update={"response_metadata": generation_info} + update={ + "response_metadata": { + **message_chunk.response_metadata, + **generation_info, + } + } ) default_chunk_class = message_chunk.__class__ @@ -623,7 +633,7 @@ class ChatOpenRouter(BaseChatModel): message_dicts = [_convert_message_to_dict(m) for m in messages] return message_dicts, params - def _create_chat_result(self, response: Any) -> ChatResult: # noqa: C901 + def _create_chat_result(self, response: Any) -> ChatResult: # noqa: C901, PLR0912 """Create a `ChatResult` from an OpenRouter SDK response.""" if not isinstance(response, dict): response = response.model_dump(by_alias=True) @@ -655,6 +665,13 @@ class ChatOpenRouter(BaseChatModel): message = _convert_dict_to_message(res["message"]) if token_usage and isinstance(message, AIMessage): message.usage_metadata = _create_usage_metadata(token_usage) + # Surface OpenRouter cost data in response_metadata + if "cost" in token_usage: + message.response_metadata["cost"] = token_usage["cost"] + if "cost_details" in token_usage: + message.response_metadata["cost_details"] = token_usage[ + "cost_details" + ] if isinstance(message, AIMessage): if system_fingerprint: message.response_metadata["system_fingerprint"] = system_fingerprint @@ -1160,7 +1177,7 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage: # noqa: return ChatMessage(content=_dict.get("content", ""), role=role) -def _convert_chunk_to_message_chunk( # noqa: C901, PLR0911 +def _convert_chunk_to_message_chunk( # noqa: C901, PLR0911, PLR0912 chunk: Mapping[str, Any], default_class: type[BaseMessageChunk] ) -> BaseMessageChunk: """Convert a streaming chunk dict to a LangChain message chunk. @@ -1205,14 +1222,20 @@ def _convert_chunk_to_message_chunk( # noqa: C901, PLR0911 if reasoning_details := _dict.get("reasoning_details"): additional_kwargs["reasoning_details"] = reasoning_details usage_metadata = None + response_metadata: dict[str, Any] = {"model_provider": "openrouter"} if usage := chunk.get("usage"): usage_metadata = _create_usage_metadata(usage) + # Surface OpenRouter cost data in response_metadata + if "cost" in usage: + response_metadata["cost"] = usage["cost"] + if "cost_details" in usage: + response_metadata["cost_details"] = usage["cost_details"] return AIMessageChunk( content=content, additional_kwargs=additional_kwargs, tool_call_chunks=tool_call_chunks, # type: ignore[arg-type] usage_metadata=usage_metadata, # type: ignore[arg-type] - response_metadata={"model_provider": "openrouter"}, + response_metadata=response_metadata, ) if role == "system" or default_class == SystemMessageChunk: return SystemMessageChunk(content=content) diff --git a/libs/partners/openrouter/tests/unit_tests/test_chat_models.py b/libs/partners/openrouter/tests/unit_tests/test_chat_models.py index 2e41c43c3e0..3d3573497f5 100644 --- a/libs/partners/openrouter/tests/unit_tests/test_chat_models.py +++ b/libs/partners/openrouter/tests/unit_tests/test_chat_models.py @@ -1261,6 +1261,143 @@ class TestCreateChatResult: assert isinstance(msg, AIMessage) assert msg.response_metadata["native_finish_reason"] == "end_turn" + def test_cost_in_response_metadata(self) -> None: + """Test that OpenRouter cost data is surfaced in response_metadata.""" + model = _make_model() + response: dict[str, Any] = { + **_SIMPLE_RESPONSE_DICT, + "usage": { + **_SIMPLE_RESPONSE_DICT["usage"], + "cost": 7.5e-05, + "cost_details": { + "upstream_inference_cost": 7.745e-05, + "upstream_inference_prompt_cost": 8.95e-06, + "upstream_inference_completions_cost": 6.85e-05, + }, + }, + } + result = model._create_chat_result(response) + msg = result.generations[0].message + assert isinstance(msg, AIMessage) + assert msg.response_metadata["cost"] == 7.5e-05 + assert msg.response_metadata["cost_details"] == { + "upstream_inference_cost": 7.745e-05, + "upstream_inference_prompt_cost": 8.95e-06, + "upstream_inference_completions_cost": 6.85e-05, + } + + def test_cost_absent_when_not_in_usage(self) -> None: + """Test that cost fields are not added when not present in usage.""" + model = _make_model() + result = model._create_chat_result(_SIMPLE_RESPONSE_DICT) + msg = result.generations[0].message + assert isinstance(msg, AIMessage) + assert "cost" not in msg.response_metadata + assert "cost_details" not in msg.response_metadata + + def test_stream_cost_survives_final_chunk(self) -> None: + """Test that cost fields are preserved on the final streaming chunk. + + The final chunk carries both finish_reason metadata and usage/cost data. + Regression test: generation_info must merge into response_metadata, not + replace it, so cost fields set by _convert_chunk_to_message_chunk are + not lost. + """ + model = _make_model() + model.client = MagicMock() + cost_details = { + "upstream_inference_cost": 7.745e-05, + "upstream_inference_prompt_cost": 8.95e-06, + "upstream_inference_completions_cost": 6.85e-05, + } + stream_chunks: list[dict[str, Any]] = [ + { + "choices": [ + {"delta": {"role": "assistant", "content": "Hi"}, "index": 0} + ], + }, + { + "choices": [ + { + "delta": {}, + "finish_reason": "stop", + "index": 0, + } + ], + "model": "openai/gpt-4o-mini", + "id": "gen-cost-stream", + "usage": { + "prompt_tokens": 10, + "completion_tokens": 5, + "total_tokens": 15, + "cost": 7.5e-05, + "cost_details": cost_details, + }, + }, + ] + model.client.chat.send.return_value = _MockSyncStream(stream_chunks) + + chunks = list(model.stream("Hello")) + final = [ + c for c in chunks if c.response_metadata.get("finish_reason") == "stop" + ] + assert len(final) == 1 + meta = final[0].response_metadata + assert meta["cost"] == 7.5e-05 + assert meta["cost_details"] == cost_details + assert meta["finish_reason"] == "stop" + + async def test_astream_cost_survives_final_chunk(self) -> None: + """Test that cost fields are preserved on the final async streaming chunk. + + Same regression coverage as the sync test above, for the _astream path. + """ + model = _make_model() + model.client = MagicMock() + cost_details = { + "upstream_inference_cost": 7.745e-05, + "upstream_inference_prompt_cost": 8.95e-06, + "upstream_inference_completions_cost": 6.85e-05, + } + stream_chunks: list[dict[str, Any]] = [ + { + "choices": [ + {"delta": {"role": "assistant", "content": "Hi"}, "index": 0} + ], + }, + { + "choices": [ + { + "delta": {}, + "finish_reason": "stop", + "index": 0, + } + ], + "model": "openai/gpt-4o-mini", + "id": "gen-cost-astream", + "usage": { + "prompt_tokens": 10, + "completion_tokens": 5, + "total_tokens": 15, + "cost": 7.5e-05, + "cost_details": cost_details, + }, + }, + ] + model.client.chat.send_async = AsyncMock( + return_value=_MockAsyncStream(stream_chunks) + ) + + chunks = [c async for c in model.astream("Hello")] + final = [ + c for c in chunks if c.response_metadata.get("finish_reason") == "stop" + ] + assert len(final) == 1 + meta = final[0].response_metadata + assert meta["cost"] == 7.5e-05 + assert meta["cost_details"] == cost_details + assert meta["finish_reason"] == "stop" + def test_missing_optional_metadata_excluded(self) -> None: """Test that absent optional fields are not added to response_metadata.""" model = _make_model()