mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-13 22:59:05 +00:00
feat(core): zero-out token costs for cache hits (#32437)
This commit is contained in:
parent
bc4251b9e0
commit
6e108c1cb4
@ -666,6 +666,16 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
|
||||
converted_generations.append(chat_gen)
|
||||
else:
|
||||
# Already a ChatGeneration or other expected type
|
||||
if hasattr(gen, "message") and isinstance(gen.message, AIMessage):
|
||||
# We zero out cost on cache hits
|
||||
gen.message = gen.message.model_copy(
|
||||
update={
|
||||
"usage_metadata": {
|
||||
**(gen.message.usage_metadata or {}),
|
||||
"total_cost": 0,
|
||||
}
|
||||
}
|
||||
)
|
||||
converted_generations.append(gen)
|
||||
return converted_generations
|
||||
|
||||
|
@ -458,3 +458,23 @@ def test_cleanup_serialized() -> None:
|
||||
"name": "CustomChat",
|
||||
"type": "constructor",
|
||||
}
|
||||
|
||||
|
||||
def test_token_costs_are_zeroed_out() -> None:
|
||||
# We zero-out token costs for cache hits
|
||||
local_cache = InMemoryCache()
|
||||
messages = [
|
||||
AIMessage(
|
||||
content="Hello, how are you?",
|
||||
usage_metadata={"input_tokens": 5, "output_tokens": 10, "total_tokens": 15},
|
||||
),
|
||||
]
|
||||
model = GenericFakeChatModel(messages=iter(messages), cache=local_cache)
|
||||
first_response = model.invoke("Hello")
|
||||
assert isinstance(first_response, AIMessage)
|
||||
assert first_response.usage_metadata
|
||||
|
||||
second_response = model.invoke("Hello")
|
||||
assert isinstance(second_response, AIMessage)
|
||||
assert second_response.usage_metadata
|
||||
assert second_response.usage_metadata["total_cost"] == 0 # type: ignore[typeddict-item]
|
||||
|
Loading…
Reference in New Issue
Block a user