fix(anthropic): Ignore general usage cache_creation fields if more specific fields are set (#35845)

Prevent double counting, since the sum of all `input_token_details` should never exceed `input_tokens` --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
2026-06-09 10:17:00 +00:00 · 2026-03-13 17:21:06 -07:00
parent b1f2d9c0fb
commit 6d6d7191cf
2 changed files with 136 additions and 1 deletions
--- a/libs/partners/anthropic/langchain_anthropic/chat_models.py
+++ b/libs/partners/anthropic/langchain_anthropic/chat_models.py
@@ -2181,18 +2181,27 @@ def _create_usage_metadata(anthropic_usage: BaseModel) -> UsageMetadata:
    # Currently just copying over the 5m and 1h keys, but if more are added in the
    # future we'll need to expand this tuple
    cache_creation_keys = ("ephemeral_5m_input_tokens", "ephemeral_1h_input_tokens")
+    specific_cache_creation_tokens = 0
    if cache_creation:
        if isinstance(cache_creation, BaseModel):
            cache_creation = cache_creation.model_dump()
        for k in cache_creation_keys:
+            specific_cache_creation_tokens += cache_creation.get(k, 0)
            input_token_details[k] = cache_creation.get(k)
+        if not isinstance(specific_cache_creation_tokens, int):
+            specific_cache_creation_tokens = 0
+        if specific_cache_creation_tokens > 0:
+            # Remove generic key to avoid double counting cache creation tokens
+            input_token_details["cache_creation"] = 0

    # Calculate total input tokens: Anthropic's `input_tokens` excludes cached tokens,
    # so we need to add them back to get the true total input token count
    input_tokens = (
        (getattr(anthropic_usage, "input_tokens", 0) or 0)  # Base input tokens
        + (input_token_details["cache_read"] or 0)  # Tokens read from cache
-        + (input_token_details["cache_creation"] or 0)  # Tokens used to create cache
+        + (
+            specific_cache_creation_tokens or input_token_details["cache_creation"] or 0
+        )  # Tokens used to create cache
    )
    output_tokens = getattr(anthropic_usage, "output_tokens", 0) or 0