ss

test
2026-02-03 15:55:44 +00:00 · 2025-09-05 14:25:52 -04:00 · 2025-09-05 14:22:51 -04:00 · 2025-09-05 14:07:21 -04:00 · 2025-09-05 14:07:18 -04:00 · 2025-09-05 14:07:11 -04:00
10 changed files with 205 additions and 40 deletions
--- a/libs/core/langchain_core/messages/ai.py
+++ b/libs/core/langchain_core/messages/ai.py
@@ -108,6 +108,12 @@ class UsageMetadata(TypedDict):

    This is a standard representation of token usage that is consistent across models.

+    .. note::
+        When using prompt caching (e.g., with Anthropic models), ``input_tokens``
+        represents the total tokens processed (cached + non-cached), not just the
+        tokens you're charged for. Use ``input_token_details`` to get the breakdown
+        of cached vs non-cached tokens for billing calculations.
+
    Example:

        .. code-block:: python
@@ -134,7 +140,12 @@ class UsageMetadata(TypedDict):
    """

    input_tokens: int
-    """Count of input (or prompt) tokens. Sum of all input token types."""
+    """Count of input (or prompt) tokens. Sum of all input token types.
+
+    This includes both newly processed tokens and tokens read from cache, if
+    applicable. For billing purposes with cached models, check ``input_token_details``
+    for the breakdown between ``cache_read`` and newly processed tokens.
+    """
    output_tokens: int
    """Count of output (or completion) tokens. Sum of all output token types."""
    total_tokens: int
--- a/libs/core/langchain_core/utils/init.py
+++ b/libs/core/langchain_core/utils/init.py
@@ -29,6 +29,7 @@ if TYPE_CHECKING:
        stringify_dict,
        stringify_value,
    )
+    from langchain_core.utils.usage import get_billable_input_tokens
    from langchain_core.utils.utils import (
        build_extra_kwargs,
        check_package_version,
@@ -52,6 +53,7 @@ __all__ = (
    "convert_to_secret_str",
    "formatter",
    "from_env",
+    "get_billable_input_tokens",
    "get_bolded_text",
    "get_color_mapping",
    "get_colored_text",
@@ -100,6 +102,7 @@ _dynamic_imports = {
    "secret_from_env": "utils",
    "xor_args": "utils",
    "raise_for_status_with_text": "utils",
+    "get_billable_input_tokens": "usage",
 }


--- a/libs/core/langchain_core/utils/usage.py
+++ b/libs/core/langchain_core/utils/usage.py
@@ -1,6 +1,9 @@
 """Usage utilities."""

-from typing import Callable
+from typing import TYPE_CHECKING, Callable
+
+if TYPE_CHECKING:
+    from langchain_core.messages.ai import UsageMetadata


 def _dict_int_op(
@@ -37,3 +40,31 @@ def _dict_int_op(
            )
            raise ValueError(msg)  # noqa: TRY004
    return combined
+
+
+def get_billable_input_tokens(usage_metadata: "UsageMetadata") -> int:
+    """Calculate billable input tokens excluding cached tokens.
+
+    When using prompt caching (e.g., with Anthropic models), the ``input_tokens``
+    field on ``UsageMetadata`` represents the total tokens processed (cached +
+    non-cached), but you're only charged for non-cached tokens. This function calculates
+    the actual billable input tokens.
+
+    Example:
+        .. code-block:: python
+
+            from langchain_anthropic import ChatAnthropic
+            from langchain_core.utils.usage import get_billable_input_tokens
+
+            model = ChatAnthropic(model="claude-3-sonnet-20240229")
+            response = model.invoke([{"role": "user", "content": "Hello!"}])
+
+            # Calculate billable tokens
+            billable = get_billable_input_tokens(response.usage_metadata)
+
+    """
+    total_input = usage_metadata["input_tokens"]
+    details = usage_metadata.get("input_token_details", {})
+    cache_read = details.get("cache_read", 0)
+    cache_creation = details.get("cache_creation", 0)
+    return total_input - cache_read - cache_creation
--- a/libs/core/tests/unit_tests/prompts/snapshots/test_chat.ambr
+++ b/libs/core/tests/unit_tests/prompts/snapshots/test_chat.ambr
@@ -1322,6 +1322,12 @@
          
          This is a standard representation of token usage that is consistent across models.
          
+          .. note::
+              When using prompt caching (e.g., with Anthropic models), ``input_tokens``
+              represents the total tokens processed (cached + non-cached), not just the
+              tokens you're charged for. Use ``input_token_details`` to get the breakdown
+              of cached vs non-cached tokens for billing calculations.
+          
          Example:
          
              .. code-block:: python
@@ -2754,6 +2760,12 @@
          
          This is a standard representation of token usage that is consistent across models.
          
+          .. note::
+              When using prompt caching (e.g., with Anthropic models), ``input_tokens``
+              represents the total tokens processed (cached + non-cached), not just the
+              tokens you're charged for. Use ``input_token_details`` to get the breakdown
+              of cached vs non-cached tokens for billing calculations.
+          
          Example:
          
              .. code-block:: python
--- a/libs/core/tests/unit_tests/runnables/snapshots/test_graph.ambr
+++ b/libs/core/tests/unit_tests/runnables/snapshots/test_graph.ambr
@@ -1725,6 +1725,12 @@
                
                This is a standard representation of token usage that is consistent across models.
                
+                .. note::
+                    When using prompt caching (e.g., with Anthropic models), ``input_tokens``
+                    represents the total tokens processed (cached + non-cached), not just the
+                    tokens you're charged for. Use ``input_token_details`` to get the breakdown
+                    of cached vs non-cached tokens for billing calculations.
+                
                Example:
                
                    .. code-block:: python
--- a/libs/core/tests/unit_tests/runnables/snapshots/test_runnable.ambr
+++ b/libs/core/tests/unit_tests/runnables/snapshots/test_runnable.ambr
@@ -3263,6 +3263,12 @@
          
          This is a standard representation of token usage that is consistent across models.
          
+          .. note::
+              When using prompt caching (e.g., with Anthropic models), ``input_tokens``
+              represents the total tokens processed (cached + non-cached), not just the
+              tokens you're charged for. Use ``input_token_details`` to get the breakdown
+              of cached vs non-cached tokens for billing calculations.
+          
          Example:
          
              .. code-block:: python
@@ -4758,6 +4764,12 @@
          
          This is a standard representation of token usage that is consistent across models.
          
+          .. note::
+              When using prompt caching (e.g., with Anthropic models), ``input_tokens``
+              represents the total tokens processed (cached + non-cached), not just the
+              tokens you're charged for. Use ``input_token_details`` to get the breakdown
+              of cached vs non-cached tokens for billing calculations.
+          
          Example:
          
              .. code-block:: python
@@ -6265,6 +6277,12 @@
          
          This is a standard representation of token usage that is consistent across models.
          
+          .. note::
+              When using prompt caching (e.g., with Anthropic models), ``input_tokens``
+              represents the total tokens processed (cached + non-cached), not just the
+              tokens you're charged for. Use ``input_token_details`` to get the breakdown
+              of cached vs non-cached tokens for billing calculations.
+          
          Example:
          
              .. code-block:: python
@@ -7628,6 +7646,12 @@
          
          This is a standard representation of token usage that is consistent across models.
          
+          .. note::
+              When using prompt caching (e.g., with Anthropic models), ``input_tokens``
+              represents the total tokens processed (cached + non-cached), not just the
+              tokens you're charged for. Use ``input_token_details`` to get the breakdown
+              of cached vs non-cached tokens for billing calculations.
+          
          Example:
          
              .. code-block:: python
@@ -9165,6 +9189,12 @@
          
          This is a standard representation of token usage that is consistent across models.
          
+          .. note::
+              When using prompt caching (e.g., with Anthropic models), ``input_tokens``
+              represents the total tokens processed (cached + non-cached), not just the
+              tokens you're charged for. Use ``input_token_details`` to get the breakdown
+              of cached vs non-cached tokens for billing calculations.
+          
          Example:
          
              .. code-block:: python
@@ -10573,6 +10603,12 @@
          
          This is a standard representation of token usage that is consistent across models.
          
+          .. note::
+              When using prompt caching (e.g., with Anthropic models), ``input_tokens``
+              represents the total tokens processed (cached + non-cached), not just the
+              tokens you're charged for. Use ``input_token_details`` to get the breakdown
+              of cached vs non-cached tokens for billing calculations.
+          
          Example:
          
              .. code-block:: python
@@ -12029,6 +12065,12 @@
          
          This is a standard representation of token usage that is consistent across models.
          
+          .. note::
+              When using prompt caching (e.g., with Anthropic models), ``input_tokens``
+              represents the total tokens processed (cached + non-cached), not just the
+              tokens you're charged for. Use ``input_token_details`` to get the breakdown
+              of cached vs non-cached tokens for billing calculations.
+          
          Example:
          
              .. code-block:: python
@@ -13486,6 +13528,12 @@
          
          This is a standard representation of token usage that is consistent across models.
          
+          .. note::
+              When using prompt caching (e.g., with Anthropic models), ``input_tokens``
+              represents the total tokens processed (cached + non-cached), not just the
+              tokens you're charged for. Use ``input_token_details`` to get the breakdown
+              of cached vs non-cached tokens for billing calculations.
+          
          Example:
          
              .. code-block:: python
--- a/libs/core/tests/unit_tests/utils/test_imports.py
+++ b/libs/core/tests/unit_tests/utils/test_imports.py
@@ -28,6 +28,7 @@ EXPECTED_ALL = [
    "from_env",
    "secret_from_env",
    "sanitize_for_postgres",
+    "get_billable_input_tokens",
 ]


--- a/libs/core/tests/unit_tests/utils/test_usage.py
+++ b/libs/core/tests/unit_tests/utils/test_usage.py
@@ -1,8 +1,10 @@
 import operator
+from typing import cast

 import pytest

-from langchain_core.utils.usage import _dict_int_op
+from langchain_core.messages.ai import UsageMetadata
+from langchain_core.utils.usage import _dict_int_op, get_billable_input_tokens


 def test_dict_int_op_add() -> None:
@@ -43,3 +45,79 @@ def test_dict_int_op_invalid_types() -> None:
        match="Only dict and int values are supported.",
    ):
        _dict_int_op(left, right, operator.add)
+
+
+def test_get_billable_input_tokens_basic() -> None:
+    """Test basic billable token calculation."""
+    usage_metadata = {
+        "input_tokens": 1000,
+        "output_tokens": 500,
+        "total_tokens": 1500,
+    }
+
+    # Without input_token_details, should return full input_tokens
+    # Note: cast() is used to convert plain dict to UsageMetadata type for testing
+    # Done to avoid circular imports
+    result = get_billable_input_tokens(cast("UsageMetadata", usage_metadata))
+    assert result == 1000
+
+    # With cache usage
+    cache_usage_metadata = {
+        "input_tokens": 151998,
+        "output_tokens": 691,
+        "total_tokens": 152689,
+        "input_token_details": {
+            "cache_creation": 0,
+            "cache_read": 151995,
+        },
+    }
+
+    # Should subtract cached tokens from total
+    result = get_billable_input_tokens(cast("UsageMetadata", cache_usage_metadata))
+    assert result == 3  # 151998 - 0 - 151995
+
+
+def test_get_billable_input_tokens_with_cache_creation() -> None:
+    """Test billable token calculation with cache creation."""
+    usage_metadata = {
+        "input_tokens": 10000,
+        "output_tokens": 500,
+        "total_tokens": 10500,
+        "input_token_details": {
+            "cache_creation": 5000,
+            "cache_read": 2000,
+        },
+    }
+
+    # Should subtract both cache_creation and cache_read
+    result = get_billable_input_tokens(cast("UsageMetadata", usage_metadata))
+    assert result == 3000  # 10000 - 5000 - 2000
+
+
+def test_get_billable_input_tokens_partial_details() -> None:
+    """Test with only some cache details present."""
+    usage_metadata = {
+        "input_tokens": 5000,
+        "output_tokens": 300,
+        "total_tokens": 5300,
+        "input_token_details": {
+            "cache_read": 1000,
+            # cache_creation missing - should default to 0
+        },
+    }
+
+    result = get_billable_input_tokens(cast("UsageMetadata", usage_metadata))
+    assert result == 4000  # 5000 - 1000 - 0
+
+
+def test_get_billable_input_tokens_empty_details() -> None:
+    """Test with empty input_token_details."""
+    usage_metadata = {
+        "input_tokens": 2000,
+        "output_tokens": 400,
+        "total_tokens": 2400,
+        "input_token_details": {},
+    }
+
+    result = get_billable_input_tokens(cast("UsageMetadata", usage_metadata))
+    assert result == 2000  # No cache usage, return full amount
--- a/libs/core/uv.lock
+++ b/libs/core/uv.lock
@@ -1134,7 +1134,7 @@ typing = [

 [[package]]
 name = "langchain-tests"
-version = "0.3.20"
+version = "0.3.21"
 source = { directory = "../standard-tests" }
 dependencies = [
    { name = "httpx" },
@@ -1179,18 +1179,14 @@ typing = [

 [[package]]
 name = "langchain-text-splitters"
-version = "0.3.9"
+version = "0.3.11"
 source = { directory = "../text-splitters" }
 dependencies = [
    { name = "langchain-core" },
-    { name = "pip" },
 ]

 [package.metadata]
-requires-dist = [
-    { name = "langchain-core", editable = "." },
-    { name = "pip", specifier = ">=25.2" },
-]
+requires-dist = [{ name = "langchain-core", editable = "." }]

 [package.metadata.requires-dev]
 dev = [
@@ -1212,6 +1208,7 @@ test = [
    { name = "pytest-xdist", specifier = ">=3.6.1,<4.0.0" },
 ]
 test-integration = [
+    { name = "en-core-web-sm", url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" },
    { name = "nltk", specifier = ">=3.9.1,<4.0.0" },
    { name = "sentence-transformers", specifier = ">=3.0.1" },
    { name = "spacy", specifier = ">=3.8.7,<4.0.0" },
@@ -1997,15 +1994,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772, upload-time = "2023-11-25T06:56:14.81Z" },
 ]

-[[package]]
-name = "pip"
-version = "25.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/20/16/650289cd3f43d5a2fadfd98c68bd1e1e7f2550a1a5326768cddfbcedb2c5/pip-25.2.tar.gz", hash = "sha256:578283f006390f85bb6282dffb876454593d637f5d1be494b5202ce4877e71f2", size = 1840021, upload-time = "2025-07-30T21:50:15.401Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/b7/3f/945ef7ab14dc4f9d7f40288d2df998d1837ee0888ec3659c813487572faa/pip-25.2-py3-none-any.whl", hash = "sha256:6d67a2b4e7f14d8b31b8b52648866fa717f45a1eb70e83002f4331d07e953717", size = 1752557, upload-time = "2025-07-30T21:50:13.323Z" },
-]
-
 [[package]]
 name = "platformdirs"
 version = "4.3.8"
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.9"
 resolution-markers = [
    "python_full_version >= '3.13' and platform_python_implementation == 'PyPy'",
@@ -922,7 +922,6 @@ dependencies = [
    { name = "numpy", version = "2.2.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
    { name = "packaging" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/91/1b/6fe5dbe5be0240cfd82b52bd7c186655c578d935c0ce2e713c100e6f8cce/faiss_cpu-1.10.0.tar.gz", hash = "sha256:5bdca555f24bc036f4d67f8a5a4d6cc91b8d2126d4e78de496ca23ccd46e479d", size = 69159, upload-time = "2025-01-31T07:45:49.305Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/8b/56/87eb506d8634f08fc7c63d1ca5631aeec7d6b9afbfabedf2cb7a2a804b13/faiss_cpu-1.10.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:6693474be296a7142ade1051ea18e7d85cedbfdee4b7eac9c52f83fed0467855", size = 7693034, upload-time = "2025-01-31T07:44:31.908Z" },
    { url = "https://files.pythonhosted.org/packages/51/46/f4d9de34ed1b06300b1a75b824d4857963216f5826de33f291af78088e39/faiss_cpu-1.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:70ebe60a560414dc8dd6cfe8fed105c8f002c0d11f765f5adfe8d63d42c0467f", size = 3234656, upload-time = "2025-01-31T07:44:34.418Z" },
@@ -2825,7 +2824,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
    { name = "langchain-core", editable = "libs/core" },
-    { name = "openai", specifier = ">=1.99.9,<2.0.0" },
+    { name = "openai", specifier = ">=1.104.2,<2.0.0" },
    { name = "tiktoken", specifier = ">=0.7,<1" },
 ]

@@ -2880,18 +2879,14 @@ wheels = [

 [[package]]
 name = "langchain-text-splitters"
-version = "0.3.10"
+version = "0.3.11"
 source = { editable = "libs/text-splitters" }
 dependencies = [
    { name = "langchain-core" },
-    { name = "pip" },
 ]

 [package.metadata]
-requires-dist = [
-    { name = "langchain-core", editable = "libs/core" },
-    { name = "pip", specifier = ">=25.2" },
-]
+requires-dist = [{ name = "langchain-core", editable = "libs/core" }]

 [package.metadata.requires-dev]
 dev = [
@@ -2913,6 +2908,7 @@ test = [
    { name = "pytest-xdist", specifier = ">=3.6.1,<4.0.0" },
 ]
 test-integration = [
+    { name = "en-core-web-sm", url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" },
    { name = "nltk", specifier = ">=3.9.1,<4.0.0" },
    { name = "sentence-transformers", specifier = ">=3.0.1" },
    { name = "spacy", specifier = ">=3.8.7,<4.0.0" },
@@ -3971,7 +3967,7 @@ wheels = [

 [[package]]
 name = "openai"
-version = "1.99.9"
+version = "1.106.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "anyio" },
@@ -3983,9 +3979,9 @@ dependencies = [
    { name = "tqdm" },
    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8a/d2/ef89c6f3f36b13b06e271d3cc984ddd2f62508a0972c1cbcc8485a6644ff/openai-1.99.9.tar.gz", hash = "sha256:f2082d155b1ad22e83247c3de3958eb4255b20ccf4a1de2e6681b6957b554e92", size = 506992, upload-time = "2025-08-12T02:31:10.054Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/79/b6/1aff7d6b8e9f0c3ac26bfbb57b9861a6711d5d60bd7dd5f7eebbf80509b7/openai-1.106.1.tar.gz", hash = "sha256:5f575967e3a05555825c43829cdcd50be6e49ab6a3e5262f0937a3f791f917f1", size = 561095, upload-time = "2025-09-04T18:17:15.303Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e8/fb/df274ca10698ee77b07bff952f302ea627cc12dac6b85289485dd77db6de/openai-1.99.9-py3-none-any.whl", hash = "sha256:9dbcdb425553bae1ac5d947147bebbd630d91bbfc7788394d4c4f3a35682ab3a", size = 786816, upload-time = "2025-08-12T02:31:08.34Z" },
+    { url = "https://files.pythonhosted.org/packages/00/e1/47887212baa7bc0532880d33d5eafbdb46fcc4b53789b903282a74a85b5b/openai-1.106.1-py3-none-any.whl", hash = "sha256:bfdef37c949f80396c59f2c17e0eda35414979bc07ef3379596a93c9ed044f3a", size = 930768, upload-time = "2025-09-04T18:17:13.349Z" },
 ]

 [[package]]
@@ -4438,15 +4434,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/41/67/936f9814bdd74b2dfd4822f1f7725ab5d8ff4103919a1664eb4874c58b2f/pillow-11.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:4637b88343166249fe8aa94e7c4a62a180c4b3898283bb5d3d2fd5fe10d8e4e0", size = 2626353, upload-time = "2025-01-02T08:13:52.725Z" },
 ]

-[[package]]
-name = "pip"
-version = "25.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/20/16/650289cd3f43d5a2fadfd98c68bd1e1e7f2550a1a5326768cddfbcedb2c5/pip-25.2.tar.gz", hash = "sha256:578283f006390f85bb6282dffb876454593d637f5d1be494b5202ce4877e71f2", size = 1840021, upload-time = "2025-07-30T21:50:15.401Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/b7/3f/945ef7ab14dc4f9d7f40288d2df998d1837ee0888ec3659c813487572faa/pip-25.2-py3-none-any.whl", hash = "sha256:6d67a2b4e7f14d8b31b8b52648866fa717f45a1eb70e83002f4331d07e953717", size = 1752557, upload-time = "2025-07-30T21:50:13.323Z" },
-]
-
 [[package]]
 name = "platformdirs"
 version = "4.3.6"
Author	SHA1	Message	Date
Mason Daugherty	d53affa740	ss	2025-09-05 14:25:52 -04:00
Mason Daugherty	42413e5de7	ss	2025-09-05 14:22:51 -04:00
Mason Daugherty	9d868e7642	test	2025-09-05 14:07:21 -04:00
Mason Daugherty	2b7b7177c5	sync & lock	2025-09-05 14:07:18 -04:00
Mason Daugherty	4577466c60	docs	2025-09-05 14:07:11 -04:00