diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index 9ac20f0df22..0d1c6739018 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -2320,7 +2320,7 @@ def count_tokens_approximately( and approx_at_last_ai > 0 ): scale_factor = last_ai_total_tokens / approx_at_last_ai - token_count *= max(1.0, scale_factor) + token_count *= min(1.5, max(1.0, scale_factor)) # round up once more time in case extra_tokens_per_message is a float return math.ceil(token_count) diff --git a/libs/core/tests/unit_tests/messages/test_utils.py b/libs/core/tests/unit_tests/messages/test_utils.py index 91060bb8dc9..ae528930301 100644 --- a/libs/core/tests/unit_tests/messages/test_utils.py +++ b/libs/core/tests/unit_tests/messages/test_utils.py @@ -1614,8 +1614,8 @@ def test_count_tokens_approximately_usage_metadata_scaling() -> None: unscaled = count_tokens_approximately(messages) scaled = count_tokens_approximately(messages, use_usage_metadata_scaling=True) - assert scaled == 200 - assert unscaled < 100 + ratio = scaled / unscaled + assert 1 <= ratio <= 1.5 messages.extend([ToolMessage("text", tool_call_id="abc123")] * 3) @@ -1630,7 +1630,7 @@ def test_count_tokens_approximately_usage_metadata_scaling() -> None: # And the scaled total should be the unscaled total multiplied by the same ratio. # ratio = 200 / unscaled (as of last AI message) - expected_scaled_extended = math.ceil(unscaled_extended * (200 / unscaled)) + expected_scaled_extended = math.ceil(unscaled_extended * ratio) assert scaled_extended == expected_scaled_extended