fix(core): apply cap when scaling approximate token counts (#35005)

This commit is contained in:
ccurme
2026-02-03 21:20:48 -05:00
committed by GitHub
parent 643355fa2d
commit 5981ee142c
2 changed files with 4 additions and 4 deletions

View File

@@ -2320,7 +2320,7 @@ def count_tokens_approximately(
and approx_at_last_ai > 0
):
scale_factor = last_ai_total_tokens / approx_at_last_ai
token_count *= max(1.0, scale_factor)
token_count *= min(1.5, max(1.0, scale_factor))
# round up once more time in case extra_tokens_per_message is a float
return math.ceil(token_count)

View File

@@ -1614,8 +1614,8 @@ def test_count_tokens_approximately_usage_metadata_scaling() -> None:
unscaled = count_tokens_approximately(messages)
scaled = count_tokens_approximately(messages, use_usage_metadata_scaling=True)
assert scaled == 200
assert unscaled < 100
ratio = scaled / unscaled
assert 1 <= ratio <= 1.5
messages.extend([ToolMessage("text", tool_call_id="abc123")] * 3)
@@ -1630,7 +1630,7 @@ def test_count_tokens_approximately_usage_metadata_scaling() -> None:
# And the scaled total should be the unscaled total multiplied by the same ratio.
# ratio = 200 / unscaled (as of last AI message)
expected_scaled_extended = math.ceil(unscaled_extended * (200 / unscaled))
expected_scaled_extended = math.ceil(unscaled_extended * ratio)
assert scaled_extended == expected_scaled_extended