From f84b5342486526a2e8b31190eb9117961da62600 Mon Sep 17 00:00:00 2001
From: Hamza Kyamanywa <untilhamza@gmail.com>
Date: Mon, 2 Mar 2026 08:47:19 +0900
Subject: [PATCH] feat(openrouter): surface `cost` and `cost_details` in
 `response_metadata` (#35461)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Description

OpenRouter returns `cost` and `cost_details` in its API response `usage`
object, providing the actual cost of each API call. Currently,
`_create_usage_metadata()` only extracts token counts and drops these
cost fields.

This PR surfaces both `cost` and `cost_details` in `response_metadata`
for both non-streaming and streaming paths, allowing users to access
actual API costs directly from the response without manual estimation
from token counts.

**Example response from OpenRouter:**
```json
{
  "usage": {
    "prompt_tokens": 100,
    "completion_tokens": 50,
    "cost": 0.000075,
    "cost_details": {
      "upstream_inference_cost": 0.00007745,
      "upstream_inference_prompt_cost": 0.00000895,
      "upstream_inference_completions_cost": 0.0000685
    }
  }
}
```

**After this change:**
```python
result = chat.invoke("hello")
result.response_metadata["cost"]          # 0.000075
result.response_metadata["cost_details"]  # {...}
```

## Changes

- **`_create_chat_result`**: Surface `cost` and `cost_details` from
`token_usage` into `response_metadata` (non-streaming)
- **`_convert_chunk_to_message_chunk`**: Same for streaming
`AIMessageChunk`
- Added `PLR0912` to `noqa` comments (new branches pushed count over
threshold)
- Added two unit tests: one verifying cost fields are present when
returned, one verifying they're absent when not in usage

## Issue

N/A — discovered while integrating OpenRouter in a production pipeline.
The cost data is already returned by the API but was being silently
dropped.

## Dependencies

None.

## Twitter handle

@hamza_kyamanywa

---------

Co-authored-by: Mason Daugherty <mason@langchain.dev>
---
 .../langchain_openrouter/chat_models.py       |  33 ++++-
 .../tests/unit_tests/test_chat_models.py      | 137 ++++++++++++++++++
 2 files changed, 165 insertions(+), 5 deletions(-)

diff --git a/libs/partners/openrouter/langchain_openrouter/chat_models.py b/libs/partners/openrouter/langchain_openrouter/chat_models.py
index a31c4386940..833cc1a6183 100644
--- a/libs/partners/openrouter/langchain_openrouter/chat_models.py
+++ b/libs/partners/openrouter/langchain_openrouter/chat_models.py
@@ -489,7 +489,12 @@ class ChatOpenRouter(BaseChatModel):
             if generation_info:
                 generation_info["model_provider"] = "openrouter"
                 message_chunk = message_chunk.model_copy(
-                    update={"response_metadata": generation_info}
+                    update={
+                        "response_metadata": {
+                            **message_chunk.response_metadata,
+                            **generation_info,
+                        }
+                    }
                 )
 
             default_chunk_class = message_chunk.__class__
@@ -558,7 +563,12 @@ class ChatOpenRouter(BaseChatModel):
             if generation_info:
                 generation_info["model_provider"] = "openrouter"
                 message_chunk = message_chunk.model_copy(
-                    update={"response_metadata": generation_info}
+                    update={
+                        "response_metadata": {
+                            **message_chunk.response_metadata,
+                            **generation_info,
+                        }
+                    }
                 )
 
             default_chunk_class = message_chunk.__class__
@@ -623,7 +633,7 @@ class ChatOpenRouter(BaseChatModel):
         message_dicts = [_convert_message_to_dict(m) for m in messages]
         return message_dicts, params
 
-    def _create_chat_result(self, response: Any) -> ChatResult:  # noqa: C901
+    def _create_chat_result(self, response: Any) -> ChatResult:  # noqa: C901, PLR0912
         """Create a `ChatResult` from an OpenRouter SDK response."""
         if not isinstance(response, dict):
             response = response.model_dump(by_alias=True)
@@ -655,6 +665,13 @@ class ChatOpenRouter(BaseChatModel):
             message = _convert_dict_to_message(res["message"])
             if token_usage and isinstance(message, AIMessage):
                 message.usage_metadata = _create_usage_metadata(token_usage)
+                # Surface OpenRouter cost data in response_metadata
+                if "cost" in token_usage:
+                    message.response_metadata["cost"] = token_usage["cost"]
+                if "cost_details" in token_usage:
+                    message.response_metadata["cost_details"] = token_usage[
+                        "cost_details"
+                    ]
             if isinstance(message, AIMessage):
                 if system_fingerprint:
                     message.response_metadata["system_fingerprint"] = system_fingerprint
@@ -1160,7 +1177,7 @@ def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage:  # noqa:
     return ChatMessage(content=_dict.get("content", ""), role=role)
 
 
-def _convert_chunk_to_message_chunk(  # noqa: C901, PLR0911
+def _convert_chunk_to_message_chunk(  # noqa: C901, PLR0911, PLR0912
     chunk: Mapping[str, Any], default_class: type[BaseMessageChunk]
 ) -> BaseMessageChunk:
     """Convert a streaming chunk dict to a LangChain message chunk.
@@ -1205,14 +1222,20 @@ def _convert_chunk_to_message_chunk(  # noqa: C901, PLR0911
         if reasoning_details := _dict.get("reasoning_details"):
             additional_kwargs["reasoning_details"] = reasoning_details
         usage_metadata = None
+        response_metadata: dict[str, Any] = {"model_provider": "openrouter"}
         if usage := chunk.get("usage"):
             usage_metadata = _create_usage_metadata(usage)
+            # Surface OpenRouter cost data in response_metadata
+            if "cost" in usage:
+                response_metadata["cost"] = usage["cost"]
+            if "cost_details" in usage:
+                response_metadata["cost_details"] = usage["cost_details"]
         return AIMessageChunk(
             content=content,
             additional_kwargs=additional_kwargs,
             tool_call_chunks=tool_call_chunks,  # type: ignore[arg-type]
             usage_metadata=usage_metadata,  # type: ignore[arg-type]
-            response_metadata={"model_provider": "openrouter"},
+            response_metadata=response_metadata,
         )
     if role == "system" or default_class == SystemMessageChunk:
         return SystemMessageChunk(content=content)
diff --git a/libs/partners/openrouter/tests/unit_tests/test_chat_models.py b/libs/partners/openrouter/tests/unit_tests/test_chat_models.py
index 2e41c43c3e0..3d3573497f5 100644
--- a/libs/partners/openrouter/tests/unit_tests/test_chat_models.py
+++ b/libs/partners/openrouter/tests/unit_tests/test_chat_models.py
@@ -1261,6 +1261,143 @@ class TestCreateChatResult:
         assert isinstance(msg, AIMessage)
         assert msg.response_metadata["native_finish_reason"] == "end_turn"
 
+    def test_cost_in_response_metadata(self) -> None:
+        """Test that OpenRouter cost data is surfaced in response_metadata."""
+        model = _make_model()
+        response: dict[str, Any] = {
+            **_SIMPLE_RESPONSE_DICT,
+            "usage": {
+                **_SIMPLE_RESPONSE_DICT["usage"],
+                "cost": 7.5e-05,
+                "cost_details": {
+                    "upstream_inference_cost": 7.745e-05,
+                    "upstream_inference_prompt_cost": 8.95e-06,
+                    "upstream_inference_completions_cost": 6.85e-05,
+                },
+            },
+        }
+        result = model._create_chat_result(response)
+        msg = result.generations[0].message
+        assert isinstance(msg, AIMessage)
+        assert msg.response_metadata["cost"] == 7.5e-05
+        assert msg.response_metadata["cost_details"] == {
+            "upstream_inference_cost": 7.745e-05,
+            "upstream_inference_prompt_cost": 8.95e-06,
+            "upstream_inference_completions_cost": 6.85e-05,
+        }
+
+    def test_cost_absent_when_not_in_usage(self) -> None:
+        """Test that cost fields are not added when not present in usage."""
+        model = _make_model()
+        result = model._create_chat_result(_SIMPLE_RESPONSE_DICT)
+        msg = result.generations[0].message
+        assert isinstance(msg, AIMessage)
+        assert "cost" not in msg.response_metadata
+        assert "cost_details" not in msg.response_metadata
+
+    def test_stream_cost_survives_final_chunk(self) -> None:
+        """Test that cost fields are preserved on the final streaming chunk.
+
+        The final chunk carries both finish_reason metadata and usage/cost data.
+        Regression test: generation_info must merge into response_metadata, not
+        replace it, so cost fields set by _convert_chunk_to_message_chunk are
+        not lost.
+        """
+        model = _make_model()
+        model.client = MagicMock()
+        cost_details = {
+            "upstream_inference_cost": 7.745e-05,
+            "upstream_inference_prompt_cost": 8.95e-06,
+            "upstream_inference_completions_cost": 6.85e-05,
+        }
+        stream_chunks: list[dict[str, Any]] = [
+            {
+                "choices": [
+                    {"delta": {"role": "assistant", "content": "Hi"}, "index": 0}
+                ],
+            },
+            {
+                "choices": [
+                    {
+                        "delta": {},
+                        "finish_reason": "stop",
+                        "index": 0,
+                    }
+                ],
+                "model": "openai/gpt-4o-mini",
+                "id": "gen-cost-stream",
+                "usage": {
+                    "prompt_tokens": 10,
+                    "completion_tokens": 5,
+                    "total_tokens": 15,
+                    "cost": 7.5e-05,
+                    "cost_details": cost_details,
+                },
+            },
+        ]
+        model.client.chat.send.return_value = _MockSyncStream(stream_chunks)
+
+        chunks = list(model.stream("Hello"))
+        final = [
+            c for c in chunks if c.response_metadata.get("finish_reason") == "stop"
+        ]
+        assert len(final) == 1
+        meta = final[0].response_metadata
+        assert meta["cost"] == 7.5e-05
+        assert meta["cost_details"] == cost_details
+        assert meta["finish_reason"] == "stop"
+
+    async def test_astream_cost_survives_final_chunk(self) -> None:
+        """Test that cost fields are preserved on the final async streaming chunk.
+
+        Same regression coverage as the sync test above, for the _astream path.
+        """
+        model = _make_model()
+        model.client = MagicMock()
+        cost_details = {
+            "upstream_inference_cost": 7.745e-05,
+            "upstream_inference_prompt_cost": 8.95e-06,
+            "upstream_inference_completions_cost": 6.85e-05,
+        }
+        stream_chunks: list[dict[str, Any]] = [
+            {
+                "choices": [
+                    {"delta": {"role": "assistant", "content": "Hi"}, "index": 0}
+                ],
+            },
+            {
+                "choices": [
+                    {
+                        "delta": {},
+                        "finish_reason": "stop",
+                        "index": 0,
+                    }
+                ],
+                "model": "openai/gpt-4o-mini",
+                "id": "gen-cost-astream",
+                "usage": {
+                    "prompt_tokens": 10,
+                    "completion_tokens": 5,
+                    "total_tokens": 15,
+                    "cost": 7.5e-05,
+                    "cost_details": cost_details,
+                },
+            },
+        ]
+        model.client.chat.send_async = AsyncMock(
+            return_value=_MockAsyncStream(stream_chunks)
+        )
+
+        chunks = [c async for c in model.astream("Hello")]
+        final = [
+            c for c in chunks if c.response_metadata.get("finish_reason") == "stop"
+        ]
+        assert len(final) == 1
+        meta = final[0].response_metadata
+        assert meta["cost"] == 7.5e-05
+        assert meta["cost_details"] == cost_details
+        assert meta["finish_reason"] == "stop"
+
     def test_missing_optional_metadata_excluded(self) -> None:
         """Test that absent optional fields are not added to response_metadata."""
         model = _make_model()