feat(openrouter): surface cost and cost_details in response_metadata (#35461)

## Description OpenRouter returns `cost` and `cost_details` in its API response `usage` object, providing the actual cost of each API call. Currently, `_create_usage_metadata()` only extracts token counts and drops these cost fields. This PR surfaces both `cost` and `cost_details` in `response_metadata` for both non-streaming and streaming paths, allowing users to access actual API costs directly from the response without manual estimation from token counts. **Example response from OpenRouter:** ```json { "usage": { "prompt_tokens": 100, "completion_tokens": 50, "cost": 0.000075, "cost_details": { "upstream_inference_cost": 0.00007745, "upstream_inference_prompt_cost": 0.00000895, "upstream_inference_completions_cost": 0.0000685 } } } ``` **After this change:** ```python result = chat.invoke("hello") result.response_metadata["cost"] # 0.000075 result.response_metadata["cost_details"] # {...} ``` ## Changes - **`_create_chat_result`**: Surface `cost` and `cost_details` from `token_usage` into `response_metadata` (non-streaming) - **`_convert_chunk_to_message_chunk`**: Same for streaming `AIMessageChunk` - Added `PLR0912` to `noqa` comments (new branches pushed count over threshold) - Added two unit tests: one verifying cost fields are present when returned, one verifying they're absent when not in usage ## Issue N/A — discovered while integrating OpenRouter in a production pipeline. The cost data is already returned by the API but was being silently dropped. ## Dependencies None. ## Twitter handle @hamza_kyamanywa --------- Co-authored-by: Mason Daugherty <mason@langchain.dev>
2026-03-18 02:53:16 +00:00 · 2026-03-02 08:47:19 +09:00
parent 21b64e56fe
commit f84b534248
2 changed files with 165 additions and 5 deletions
--- a/libs/partners/openrouter/langchain_openrouter/chat_models.py
+++ b/libs/partners/openrouter/langchain_openrouter/chat_models.py
@@ -489,7 +489,12 @@ class ChatOpenRouter(BaseChatModel):
            if generation_info:
                generation_info["model_provider"] = "openrouter"
                message_chunk = message_chunk.model_copy(
-                    update={"response_metadata": generation_info}
+                    update={
+                        "response_metadata": {
+                            **message_chunk.response_metadata,
+                            **generation_info,
+                        }
+                    }
                )

            default_chunk_class = message_chunk.__class__
@@ -558,7 +563,12 @@ class ChatOpenRouter(BaseChatModel):
            if generation_info:
                generation_info["model_provider"] = "openrouter"
                message_chunk = message_chunk.model_copy(
-                    update={"response_metadata": generation_info}
+                    update={
+                        "response_metadata": {
+                            **message_chunk.response_metadata,
+                            **generation_info,
+                        }
+                    }
                )

            default_chunk_class = message_chunk.__class__
@@ -623,7 +633,7 @@ class ChatOpenRouter(BaseChatModel):
        message_dicts = [_convert_message_to_dict(m) for m in messages]
        return message_dicts, params

-    def _create_chat_result(self, response: Any) -> ChatResult:  # noqa: C901
+    def _create_chat_result(self, response: Any) -> ChatResult:  # noqa: C901, PLR0912
        """Create a `ChatResult` from an OpenRouter SDK response."""
        if not isinstance(response, dict):
            response = response.model_dump(by_alias=True)
@@ -655,6 +665,13 @@ class ChatOpenRouter(BaseChatModel):
            message = _convert_dict_to_message(res["message"])
            if token_usage and isinstance(message, AIMessage):
                message.usage_metadata = _create_usage_metadata(token_usage)
+                # Surface OpenRouter cost data in response_metadata
+                if "cost" in token_usage:
+                    message.response_metadata["cost"] = token_usage["cost"]
+                if "cost_details" in token_usage:
+                    message.response_metadata["cost_details"] = token_usage[
+                        "cost_details"
+                    ]
            if isinstance(message, AIMessage):
                if system_fingerprint:
                    message.response_metadata["system_fingerprint"] = system_fingerprint
@@ -1160,7 +1177,7 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:  # noqa:
    return ChatMessage(content=_dict.get("content", ""), role=role)


-def _convert_chunk_to_message_chunk(  # noqa: C901, PLR0911
+def _convert_chunk_to_message_chunk(  # noqa: C901, PLR0911, PLR0912
    chunk: Mapping[str, Any], default_class: type[BaseMessageChunk]
 ) -> BaseMessageChunk:
    """Convert a streaming chunk dict to a LangChain message chunk.
@@ -1205,14 +1222,20 @@ def _convert_chunk_to_message_chunk(  # noqa: C901, PLR0911
        if reasoning_details := _dict.get("reasoning_details"):
            additional_kwargs["reasoning_details"] = reasoning_details
        usage_metadata = None
+        response_metadata: dict[str, Any] = {"model_provider": "openrouter"}
        if usage := chunk.get("usage"):
            usage_metadata = _create_usage_metadata(usage)
+            # Surface OpenRouter cost data in response_metadata
+            if "cost" in usage:
+                response_metadata["cost"] = usage["cost"]
+            if "cost_details" in usage:
+                response_metadata["cost_details"] = usage["cost_details"]
        return AIMessageChunk(
            content=content,
            additional_kwargs=additional_kwargs,
            tool_call_chunks=tool_call_chunks,  # type: ignore[arg-type]
            usage_metadata=usage_metadata,  # type: ignore[arg-type]
-            response_metadata={"model_provider": "openrouter"},
+            response_metadata=response_metadata,
        )
    if role == "system" or default_class == SystemMessageChunk:
        return SystemMessageChunk(content=content)
--- a/libs/partners/openrouter/tests/unit_tests/test_chat_models.py
+++ b/libs/partners/openrouter/tests/unit_tests/test_chat_models.py
@@ -1261,6 +1261,143 @@ class TestCreateChatResult:
        assert isinstance(msg, AIMessage)
        assert msg.response_metadata["native_finish_reason"] == "end_turn"

+    def test_cost_in_response_metadata(self) -> None:
+        """Test that OpenRouter cost data is surfaced in response_metadata."""
+        model = _make_model()
+        response: dict[str, Any] = {
+            **_SIMPLE_RESPONSE_DICT,
+            "usage": {
+                **_SIMPLE_RESPONSE_DICT["usage"],
+                "cost": 7.5e-05,
+                "cost_details": {
+                    "upstream_inference_cost": 7.745e-05,
+                    "upstream_inference_prompt_cost": 8.95e-06,
+                    "upstream_inference_completions_cost": 6.85e-05,
+                },
+            },
+        }
+        result = model._create_chat_result(response)
+        msg = result.generations[0].message
+        assert isinstance(msg, AIMessage)
+        assert msg.response_metadata["cost"] == 7.5e-05
+        assert msg.response_metadata["cost_details"] == {
+            "upstream_inference_cost": 7.745e-05,
+            "upstream_inference_prompt_cost": 8.95e-06,
+            "upstream_inference_completions_cost": 6.85e-05,
+        }
+
+    def test_cost_absent_when_not_in_usage(self) -> None:
+        """Test that cost fields are not added when not present in usage."""
+        model = _make_model()
+        result = model._create_chat_result(_SIMPLE_RESPONSE_DICT)
+        msg = result.generations[0].message
+        assert isinstance(msg, AIMessage)
+        assert "cost" not in msg.response_metadata
+        assert "cost_details" not in msg.response_metadata
+
+    def test_stream_cost_survives_final_chunk(self) -> None:
+        """Test that cost fields are preserved on the final streaming chunk.
+
+        The final chunk carries both finish_reason metadata and usage/cost data.
+        Regression test: generation_info must merge into response_metadata, not
+        replace it, so cost fields set by _convert_chunk_to_message_chunk are
+        not lost.
+        """
+        model = _make_model()
+        model.client = MagicMock()
+        cost_details = {
+            "upstream_inference_cost": 7.745e-05,
+            "upstream_inference_prompt_cost": 8.95e-06,
+            "upstream_inference_completions_cost": 6.85e-05,
+        }
+        stream_chunks: list[dict[str, Any]] = [
+            {
+                "choices": [
+                    {"delta": {"role": "assistant", "content": "Hi"}, "index": 0}
+                ],
+            },
+            {
+                "choices": [
+                    {
+                        "delta": {},
+                        "finish_reason": "stop",
+                        "index": 0,
+                    }
+                ],
+                "model": "openai/gpt-4o-mini",
+                "id": "gen-cost-stream",
+                "usage": {
+                    "prompt_tokens": 10,
+                    "completion_tokens": 5,
+                    "total_tokens": 15,
+                    "cost": 7.5e-05,
+                    "cost_details": cost_details,
+                },
+            },
+        ]
+        model.client.chat.send.return_value = _MockSyncStream(stream_chunks)
+
+        chunks = list(model.stream("Hello"))
+        final = [
+            c for c in chunks if c.response_metadata.get("finish_reason") == "stop"
+        ]
+        assert len(final) == 1
+        meta = final[0].response_metadata
+        assert meta["cost"] == 7.5e-05
+        assert meta["cost_details"] == cost_details
+        assert meta["finish_reason"] == "stop"
+
+    async def test_astream_cost_survives_final_chunk(self) -> None:
+        """Test that cost fields are preserved on the final async streaming chunk.
+
+        Same regression coverage as the sync test above, for the _astream path.
+        """
+        model = _make_model()
+        model.client = MagicMock()
+        cost_details = {
+            "upstream_inference_cost": 7.745e-05,
+            "upstream_inference_prompt_cost": 8.95e-06,
+            "upstream_inference_completions_cost": 6.85e-05,
+        }
+        stream_chunks: list[dict[str, Any]] = [
+            {
+                "choices": [
+                    {"delta": {"role": "assistant", "content": "Hi"}, "index": 0}
+                ],
+            },
+            {
+                "choices": [
+                    {
+                        "delta": {},
+                        "finish_reason": "stop",
+                        "index": 0,
+                    }
+                ],
+                "model": "openai/gpt-4o-mini",
+                "id": "gen-cost-astream",
+                "usage": {
+                    "prompt_tokens": 10,
+                    "completion_tokens": 5,
+                    "total_tokens": 15,
+                    "cost": 7.5e-05,
+                    "cost_details": cost_details,
+                },
+            },
+        ]
+        model.client.chat.send_async = AsyncMock(
+            return_value=_MockAsyncStream(stream_chunks)
+        )
+
+        chunks = [c async for c in model.astream("Hello")]
+        final = [
+            c for c in chunks if c.response_metadata.get("finish_reason") == "stop"
+        ]
+        assert len(final) == 1
+        meta = final[0].response_metadata
+        assert meta["cost"] == 7.5e-05
+        assert meta["cost_details"] == cost_details
+        assert meta["finish_reason"] == "stop"
+
    def test_missing_optional_metadata_excluded(self) -> None:
        """Test that absent optional fields are not added to response_metadata."""
        model = _make_model()