feat(core): zero-out token costs for cache hits (#32437)

This commit is contained in:
ccurme 2025-08-07 09:49:34 -03:00 committed by GitHub
parent bc4251b9e0
commit 6e108c1cb4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 30 additions and 0 deletions

View File

@ -666,6 +666,16 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
converted_generations.append(chat_gen)
else:
# Already a ChatGeneration or other expected type
if hasattr(gen, "message") and isinstance(gen.message, AIMessage):
# We zero out cost on cache hits
gen.message = gen.message.model_copy(
update={
"usage_metadata": {
**(gen.message.usage_metadata or {}),
"total_cost": 0,
}
}
)
converted_generations.append(gen)
return converted_generations

View File

@ -458,3 +458,23 @@ def test_cleanup_serialized() -> None:
"name": "CustomChat",
"type": "constructor",
}
def test_token_costs_are_zeroed_out() -> None:
# We zero-out token costs for cache hits
local_cache = InMemoryCache()
messages = [
AIMessage(
content="Hello, how are you?",
usage_metadata={"input_tokens": 5, "output_tokens": 10, "total_tokens": 15},
),
]
model = GenericFakeChatModel(messages=iter(messages), cache=local_cache)
first_response = model.invoke("Hello")
assert isinstance(first_response, AIMessage)
assert first_response.usage_metadata
second_response = model.invoke("Hello")
assert isinstance(second_response, AIMessage)
assert second_response.usage_metadata
assert second_response.usage_metadata["total_cost"] == 0 # type: ignore[typeddict-item]