From 6e108c1cb47042b4ae303e04504337dda318f648 Mon Sep 17 00:00:00 2001 From: ccurme Date: Thu, 7 Aug 2025 09:49:34 -0300 Subject: [PATCH] feat(core): zero-out token costs for cache hits (#32437) --- .../language_models/chat_models.py | 10 ++++++++++ .../language_models/chat_models/test_cache.py | 20 +++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index c5e7b43cebf..7873e7aaf5d 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -666,6 +666,16 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC): converted_generations.append(chat_gen) else: # Already a ChatGeneration or other expected type + if hasattr(gen, "message") and isinstance(gen.message, AIMessage): + # We zero out cost on cache hits + gen.message = gen.message.model_copy( + update={ + "usage_metadata": { + **(gen.message.usage_metadata or {}), + "total_cost": 0, + } + } + ) converted_generations.append(gen) return converted_generations diff --git a/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py b/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py index f4f033d3d7f..39e4babc782 100644 --- a/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py +++ b/libs/core/tests/unit_tests/language_models/chat_models/test_cache.py @@ -458,3 +458,23 @@ def test_cleanup_serialized() -> None: "name": "CustomChat", "type": "constructor", } + + +def test_token_costs_are_zeroed_out() -> None: + # We zero-out token costs for cache hits + local_cache = InMemoryCache() + messages = [ + AIMessage( + content="Hello, how are you?", + usage_metadata={"input_tokens": 5, "output_tokens": 10, "total_tokens": 15}, + ), + ] + model = GenericFakeChatModel(messages=iter(messages), cache=local_cache) + first_response = model.invoke("Hello") + assert isinstance(first_response, AIMessage) + assert first_response.usage_metadata + + second_response = model.invoke("Hello") + assert isinstance(second_response, AIMessage) + assert second_response.usage_metadata + assert second_response.usage_metadata["total_cost"] == 0 # type: ignore[typeddict-item]