feat(core): zero-out token costs for cache hits (#32437)

2025-08-13 22:59:05 +00:00 · 2025-08-07 09:49:34 -03:00 · 2025-08-07 09:49:34 -03:00 · 6e108c1cb4
commit 6e108c1cb4
parent bc4251b9e0
2 changed files with 30 additions and 0 deletions
--- a/libs/core/langchain_core/language_models/chat_models.py
+++ b/libs/core/langchain_core/language_models/chat_models.py
@ -666,6 +666,16 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
                converted_generations.append(chat_gen)
            else:
                # Already a ChatGeneration or other expected type
+                if hasattr(gen, "message") and isinstance(gen.message, AIMessage):
+                    # We zero out cost on cache hits
+                    gen.message = gen.message.model_copy(
+                        update={
+                            "usage_metadata": {
+                                **(gen.message.usage_metadata or {}),
+                                "total_cost": 0,
+                            }
+                        }
+                    )
                converted_generations.append(gen)
        return converted_generations

--- a/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py
+++ b/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py
@ -458,3 +458,23 @@ def test_cleanup_serialized() -> None:
        "name": "CustomChat",
        "type": "constructor",
    }
+
+
+def test_token_costs_are_zeroed_out() -> None:
+    # We zero-out token costs for cache hits
+    local_cache = InMemoryCache()
+    messages = [
+        AIMessage(
+            content="Hello, how are you?",
+            usage_metadata={"input_tokens": 5, "output_tokens": 10, "total_tokens": 15},
+        ),
+    ]
+    model = GenericFakeChatModel(messages=iter(messages), cache=local_cache)
+    first_response = model.invoke("Hello")
+    assert isinstance(first_response, AIMessage)
+    assert first_response.usage_metadata
+
+    second_response = model.invoke("Hello")
+    assert isinstance(second_response, AIMessage)
+    assert second_response.usage_metadata
+    assert second_response.usage_metadata["total_cost"] == 0  # type: ignore[typeddict-item]