Compare commits

...

5 Commits

Author SHA1 Message Date
Mason Daugherty
d53affa740 ss 2025-09-05 14:25:52 -04:00
Mason Daugherty
42413e5de7 ss 2025-09-05 14:22:51 -04:00
Mason Daugherty
9d868e7642 test 2025-09-05 14:07:21 -04:00
Mason Daugherty
2b7b7177c5 sync & lock 2025-09-05 14:07:18 -04:00
Mason Daugherty
4577466c60 docs 2025-09-05 14:07:11 -04:00
10 changed files with 205 additions and 40 deletions

View File

@@ -108,6 +108,12 @@ class UsageMetadata(TypedDict):
This is a standard representation of token usage that is consistent across models.
.. note::
When using prompt caching (e.g., with Anthropic models), ``input_tokens``
represents the total tokens processed (cached + non-cached), not just the
tokens you're charged for. Use ``input_token_details`` to get the breakdown
of cached vs non-cached tokens for billing calculations.
Example:
.. code-block:: python
@@ -134,7 +140,12 @@ class UsageMetadata(TypedDict):
"""
input_tokens: int
"""Count of input (or prompt) tokens. Sum of all input token types."""
"""Count of input (or prompt) tokens. Sum of all input token types.
This includes both newly processed tokens and tokens read from cache, if
applicable. For billing purposes with cached models, check ``input_token_details``
for the breakdown between ``cache_read`` and newly processed tokens.
"""
output_tokens: int
"""Count of output (or completion) tokens. Sum of all output token types."""
total_tokens: int

View File

@@ -29,6 +29,7 @@ if TYPE_CHECKING:
stringify_dict,
stringify_value,
)
from langchain_core.utils.usage import get_billable_input_tokens
from langchain_core.utils.utils import (
build_extra_kwargs,
check_package_version,
@@ -52,6 +53,7 @@ __all__ = (
"convert_to_secret_str",
"formatter",
"from_env",
"get_billable_input_tokens",
"get_bolded_text",
"get_color_mapping",
"get_colored_text",
@@ -100,6 +102,7 @@ _dynamic_imports = {
"secret_from_env": "utils",
"xor_args": "utils",
"raise_for_status_with_text": "utils",
"get_billable_input_tokens": "usage",
}

View File

@@ -1,6 +1,9 @@
"""Usage utilities."""
from typing import Callable
from typing import TYPE_CHECKING, Callable
if TYPE_CHECKING:
from langchain_core.messages.ai import UsageMetadata
def _dict_int_op(
@@ -37,3 +40,31 @@ def _dict_int_op(
)
raise ValueError(msg) # noqa: TRY004
return combined
def get_billable_input_tokens(usage_metadata: "UsageMetadata") -> int:
"""Calculate billable input tokens excluding cached tokens.
When using prompt caching (e.g., with Anthropic models), the ``input_tokens``
field on ``UsageMetadata`` represents the total tokens processed (cached +
non-cached), but you're only charged for non-cached tokens. This function calculates
the actual billable input tokens.
Example:
.. code-block:: python
from langchain_anthropic import ChatAnthropic
from langchain_core.utils.usage import get_billable_input_tokens
model = ChatAnthropic(model="claude-3-sonnet-20240229")
response = model.invoke([{"role": "user", "content": "Hello!"}])
# Calculate billable tokens
billable = get_billable_input_tokens(response.usage_metadata)
"""
total_input = usage_metadata["input_tokens"]
details = usage_metadata.get("input_token_details", {})
cache_read = details.get("cache_read", 0)
cache_creation = details.get("cache_creation", 0)
return total_input - cache_read - cache_creation

View File

@@ -1322,6 +1322,12 @@
This is a standard representation of token usage that is consistent across models.
.. note::
When using prompt caching (e.g., with Anthropic models), ``input_tokens``
represents the total tokens processed (cached + non-cached), not just the
tokens you're charged for. Use ``input_token_details`` to get the breakdown
of cached vs non-cached tokens for billing calculations.
Example:
.. code-block:: python
@@ -2754,6 +2760,12 @@
This is a standard representation of token usage that is consistent across models.
.. note::
When using prompt caching (e.g., with Anthropic models), ``input_tokens``
represents the total tokens processed (cached + non-cached), not just the
tokens you're charged for. Use ``input_token_details`` to get the breakdown
of cached vs non-cached tokens for billing calculations.
Example:
.. code-block:: python

View File

@@ -1725,6 +1725,12 @@
This is a standard representation of token usage that is consistent across models.
.. note::
When using prompt caching (e.g., with Anthropic models), ``input_tokens``
represents the total tokens processed (cached + non-cached), not just the
tokens you're charged for. Use ``input_token_details`` to get the breakdown
of cached vs non-cached tokens for billing calculations.
Example:
.. code-block:: python

View File

@@ -3263,6 +3263,12 @@
This is a standard representation of token usage that is consistent across models.
.. note::
When using prompt caching (e.g., with Anthropic models), ``input_tokens``
represents the total tokens processed (cached + non-cached), not just the
tokens you're charged for. Use ``input_token_details`` to get the breakdown
of cached vs non-cached tokens for billing calculations.
Example:
.. code-block:: python
@@ -4758,6 +4764,12 @@
This is a standard representation of token usage that is consistent across models.
.. note::
When using prompt caching (e.g., with Anthropic models), ``input_tokens``
represents the total tokens processed (cached + non-cached), not just the
tokens you're charged for. Use ``input_token_details`` to get the breakdown
of cached vs non-cached tokens for billing calculations.
Example:
.. code-block:: python
@@ -6265,6 +6277,12 @@
This is a standard representation of token usage that is consistent across models.
.. note::
When using prompt caching (e.g., with Anthropic models), ``input_tokens``
represents the total tokens processed (cached + non-cached), not just the
tokens you're charged for. Use ``input_token_details`` to get the breakdown
of cached vs non-cached tokens for billing calculations.
Example:
.. code-block:: python
@@ -7628,6 +7646,12 @@
This is a standard representation of token usage that is consistent across models.
.. note::
When using prompt caching (e.g., with Anthropic models), ``input_tokens``
represents the total tokens processed (cached + non-cached), not just the
tokens you're charged for. Use ``input_token_details`` to get the breakdown
of cached vs non-cached tokens for billing calculations.
Example:
.. code-block:: python
@@ -9165,6 +9189,12 @@
This is a standard representation of token usage that is consistent across models.
.. note::
When using prompt caching (e.g., with Anthropic models), ``input_tokens``
represents the total tokens processed (cached + non-cached), not just the
tokens you're charged for. Use ``input_token_details`` to get the breakdown
of cached vs non-cached tokens for billing calculations.
Example:
.. code-block:: python
@@ -10573,6 +10603,12 @@
This is a standard representation of token usage that is consistent across models.
.. note::
When using prompt caching (e.g., with Anthropic models), ``input_tokens``
represents the total tokens processed (cached + non-cached), not just the
tokens you're charged for. Use ``input_token_details`` to get the breakdown
of cached vs non-cached tokens for billing calculations.
Example:
.. code-block:: python
@@ -12029,6 +12065,12 @@
This is a standard representation of token usage that is consistent across models.
.. note::
When using prompt caching (e.g., with Anthropic models), ``input_tokens``
represents the total tokens processed (cached + non-cached), not just the
tokens you're charged for. Use ``input_token_details`` to get the breakdown
of cached vs non-cached tokens for billing calculations.
Example:
.. code-block:: python
@@ -13486,6 +13528,12 @@
This is a standard representation of token usage that is consistent across models.
.. note::
When using prompt caching (e.g., with Anthropic models), ``input_tokens``
represents the total tokens processed (cached + non-cached), not just the
tokens you're charged for. Use ``input_token_details`` to get the breakdown
of cached vs non-cached tokens for billing calculations.
Example:
.. code-block:: python

View File

@@ -28,6 +28,7 @@ EXPECTED_ALL = [
"from_env",
"secret_from_env",
"sanitize_for_postgres",
"get_billable_input_tokens",
]

View File

@@ -1,8 +1,10 @@
import operator
from typing import cast
import pytest
from langchain_core.utils.usage import _dict_int_op
from langchain_core.messages.ai import UsageMetadata
from langchain_core.utils.usage import _dict_int_op, get_billable_input_tokens
def test_dict_int_op_add() -> None:
@@ -43,3 +45,79 @@ def test_dict_int_op_invalid_types() -> None:
match="Only dict and int values are supported.",
):
_dict_int_op(left, right, operator.add)
def test_get_billable_input_tokens_basic() -> None:
"""Test basic billable token calculation."""
usage_metadata = {
"input_tokens": 1000,
"output_tokens": 500,
"total_tokens": 1500,
}
# Without input_token_details, should return full input_tokens
# Note: cast() is used to convert plain dict to UsageMetadata type for testing
# Done to avoid circular imports
result = get_billable_input_tokens(cast("UsageMetadata", usage_metadata))
assert result == 1000
# With cache usage
cache_usage_metadata = {
"input_tokens": 151998,
"output_tokens": 691,
"total_tokens": 152689,
"input_token_details": {
"cache_creation": 0,
"cache_read": 151995,
},
}
# Should subtract cached tokens from total
result = get_billable_input_tokens(cast("UsageMetadata", cache_usage_metadata))
assert result == 3 # 151998 - 0 - 151995
def test_get_billable_input_tokens_with_cache_creation() -> None:
"""Test billable token calculation with cache creation."""
usage_metadata = {
"input_tokens": 10000,
"output_tokens": 500,
"total_tokens": 10500,
"input_token_details": {
"cache_creation": 5000,
"cache_read": 2000,
},
}
# Should subtract both cache_creation and cache_read
result = get_billable_input_tokens(cast("UsageMetadata", usage_metadata))
assert result == 3000 # 10000 - 5000 - 2000
def test_get_billable_input_tokens_partial_details() -> None:
"""Test with only some cache details present."""
usage_metadata = {
"input_tokens": 5000,
"output_tokens": 300,
"total_tokens": 5300,
"input_token_details": {
"cache_read": 1000,
# cache_creation missing - should default to 0
},
}
result = get_billable_input_tokens(cast("UsageMetadata", usage_metadata))
assert result == 4000 # 5000 - 1000 - 0
def test_get_billable_input_tokens_empty_details() -> None:
"""Test with empty input_token_details."""
usage_metadata = {
"input_tokens": 2000,
"output_tokens": 400,
"total_tokens": 2400,
"input_token_details": {},
}
result = get_billable_input_tokens(cast("UsageMetadata", usage_metadata))
assert result == 2000 # No cache usage, return full amount

20
libs/core/uv.lock generated
View File

@@ -1134,7 +1134,7 @@ typing = [
[[package]]
name = "langchain-tests"
version = "0.3.20"
version = "0.3.21"
source = { directory = "../standard-tests" }
dependencies = [
{ name = "httpx" },
@@ -1179,18 +1179,14 @@ typing = [
[[package]]
name = "langchain-text-splitters"
version = "0.3.9"
version = "0.3.11"
source = { directory = "../text-splitters" }
dependencies = [
{ name = "langchain-core" },
{ name = "pip" },
]
[package.metadata]
requires-dist = [
{ name = "langchain-core", editable = "." },
{ name = "pip", specifier = ">=25.2" },
]
requires-dist = [{ name = "langchain-core", editable = "." }]
[package.metadata.requires-dev]
dev = [
@@ -1212,6 +1208,7 @@ test = [
{ name = "pytest-xdist", specifier = ">=3.6.1,<4.0.0" },
]
test-integration = [
{ name = "en-core-web-sm", url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" },
{ name = "nltk", specifier = ">=3.9.1,<4.0.0" },
{ name = "sentence-transformers", specifier = ">=3.0.1" },
{ name = "spacy", specifier = ">=3.8.7,<4.0.0" },
@@ -1997,15 +1994,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772, upload-time = "2023-11-25T06:56:14.81Z" },
]
[[package]]
name = "pip"
version = "25.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/20/16/650289cd3f43d5a2fadfd98c68bd1e1e7f2550a1a5326768cddfbcedb2c5/pip-25.2.tar.gz", hash = "sha256:578283f006390f85bb6282dffb876454593d637f5d1be494b5202ce4877e71f2", size = 1840021, upload-time = "2025-07-30T21:50:15.401Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b7/3f/945ef7ab14dc4f9d7f40288d2df998d1837ee0888ec3659c813487572faa/pip-25.2-py3-none-any.whl", hash = "sha256:6d67a2b4e7f14d8b31b8b52648866fa717f45a1eb70e83002f4331d07e953717", size = 1752557, upload-time = "2025-07-30T21:50:13.323Z" },
]
[[package]]
name = "platformdirs"
version = "4.3.8"

29
uv.lock generated
View File

@@ -1,5 +1,5 @@
version = 1
revision = 2
revision = 3
requires-python = ">=3.9"
resolution-markers = [
"python_full_version >= '3.13' and platform_python_implementation == 'PyPy'",
@@ -922,7 +922,6 @@ dependencies = [
{ name = "numpy", version = "2.2.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
{ name = "packaging" },
]
sdist = { url = "https://files.pythonhosted.org/packages/91/1b/6fe5dbe5be0240cfd82b52bd7c186655c578d935c0ce2e713c100e6f8cce/faiss_cpu-1.10.0.tar.gz", hash = "sha256:5bdca555f24bc036f4d67f8a5a4d6cc91b8d2126d4e78de496ca23ccd46e479d", size = 69159, upload-time = "2025-01-31T07:45:49.305Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/8b/56/87eb506d8634f08fc7c63d1ca5631aeec7d6b9afbfabedf2cb7a2a804b13/faiss_cpu-1.10.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:6693474be296a7142ade1051ea18e7d85cedbfdee4b7eac9c52f83fed0467855", size = 7693034, upload-time = "2025-01-31T07:44:31.908Z" },
{ url = "https://files.pythonhosted.org/packages/51/46/f4d9de34ed1b06300b1a75b824d4857963216f5826de33f291af78088e39/faiss_cpu-1.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:70ebe60a560414dc8dd6cfe8fed105c8f002c0d11f765f5adfe8d63d42c0467f", size = 3234656, upload-time = "2025-01-31T07:44:34.418Z" },
@@ -2825,7 +2824,7 @@ dependencies = [
[package.metadata]
requires-dist = [
{ name = "langchain-core", editable = "libs/core" },
{ name = "openai", specifier = ">=1.99.9,<2.0.0" },
{ name = "openai", specifier = ">=1.104.2,<2.0.0" },
{ name = "tiktoken", specifier = ">=0.7,<1" },
]
@@ -2880,18 +2879,14 @@ wheels = [
[[package]]
name = "langchain-text-splitters"
version = "0.3.10"
version = "0.3.11"
source = { editable = "libs/text-splitters" }
dependencies = [
{ name = "langchain-core" },
{ name = "pip" },
]
[package.metadata]
requires-dist = [
{ name = "langchain-core", editable = "libs/core" },
{ name = "pip", specifier = ">=25.2" },
]
requires-dist = [{ name = "langchain-core", editable = "libs/core" }]
[package.metadata.requires-dev]
dev = [
@@ -2913,6 +2908,7 @@ test = [
{ name = "pytest-xdist", specifier = ">=3.6.1,<4.0.0" },
]
test-integration = [
{ name = "en-core-web-sm", url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" },
{ name = "nltk", specifier = ">=3.9.1,<4.0.0" },
{ name = "sentence-transformers", specifier = ">=3.0.1" },
{ name = "spacy", specifier = ">=3.8.7,<4.0.0" },
@@ -3971,7 +3967,7 @@ wheels = [
[[package]]
name = "openai"
version = "1.99.9"
version = "1.106.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@@ -3983,9 +3979,9 @@ dependencies = [
{ name = "tqdm" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/8a/d2/ef89c6f3f36b13b06e271d3cc984ddd2f62508a0972c1cbcc8485a6644ff/openai-1.99.9.tar.gz", hash = "sha256:f2082d155b1ad22e83247c3de3958eb4255b20ccf4a1de2e6681b6957b554e92", size = 506992, upload-time = "2025-08-12T02:31:10.054Z" }
sdist = { url = "https://files.pythonhosted.org/packages/79/b6/1aff7d6b8e9f0c3ac26bfbb57b9861a6711d5d60bd7dd5f7eebbf80509b7/openai-1.106.1.tar.gz", hash = "sha256:5f575967e3a05555825c43829cdcd50be6e49ab6a3e5262f0937a3f791f917f1", size = 561095, upload-time = "2025-09-04T18:17:15.303Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e8/fb/df274ca10698ee77b07bff952f302ea627cc12dac6b85289485dd77db6de/openai-1.99.9-py3-none-any.whl", hash = "sha256:9dbcdb425553bae1ac5d947147bebbd630d91bbfc7788394d4c4f3a35682ab3a", size = 786816, upload-time = "2025-08-12T02:31:08.34Z" },
{ url = "https://files.pythonhosted.org/packages/00/e1/47887212baa7bc0532880d33d5eafbdb46fcc4b53789b903282a74a85b5b/openai-1.106.1-py3-none-any.whl", hash = "sha256:bfdef37c949f80396c59f2c17e0eda35414979bc07ef3379596a93c9ed044f3a", size = 930768, upload-time = "2025-09-04T18:17:13.349Z" },
]
[[package]]
@@ -4438,15 +4434,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/41/67/936f9814bdd74b2dfd4822f1f7725ab5d8ff4103919a1664eb4874c58b2f/pillow-11.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:4637b88343166249fe8aa94e7c4a62a180c4b3898283bb5d3d2fd5fe10d8e4e0", size = 2626353, upload-time = "2025-01-02T08:13:52.725Z" },
]
[[package]]
name = "pip"
version = "25.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/20/16/650289cd3f43d5a2fadfd98c68bd1e1e7f2550a1a5326768cddfbcedb2c5/pip-25.2.tar.gz", hash = "sha256:578283f006390f85bb6282dffb876454593d637f5d1be494b5202ce4877e71f2", size = 1840021, upload-time = "2025-07-30T21:50:15.401Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b7/3f/945ef7ab14dc4f9d7f40288d2df998d1837ee0888ec3659c813487572faa/pip-25.2-py3-none-any.whl", hash = "sha256:6d67a2b4e7f14d8b31b8b52648866fa717f45a1eb70e83002f4331d07e953717", size = 1752557, upload-time = "2025-07-30T21:50:13.323Z" },
]
[[package]]
name = "platformdirs"
version = "4.3.6"