core[patch]: add UsageMetadata details (#27072)

2025-08-31 10:23:18 +00:00 · 2024-10-03 13:36:17 -07:00
parent cc1b8b3d30
commit 546dc44da5
7 changed files with 1127 additions and 41 deletions
--- a/libs/core/langchain_core/messages/ai.py
+++ b/libs/core/langchain_core/messages/ai.py
@@ -2,7 +2,7 @@ import json
 from typing import Any, Literal, Optional, Union

 from pydantic import model_validator
-from typing_extensions import Self, TypedDict
+from typing_extensions import NotRequired, Self, TypedDict

 from langchain_core.messages.base import (
    BaseMessage,
@@ -29,6 +29,66 @@ from langchain_core.utils._merge import merge_dicts, merge_lists
 from langchain_core.utils.json import parse_partial_json


+class InputTokenDetails(TypedDict, total=False):
+    """Breakdown of input token counts.
+
+    Does *not* need to sum to full input token count. Does *not* need to have all keys.
+
+    Example:
+
+        .. code-block:: python
+
+            {
+                "audio": 10,
+                "cache_creation": 200,
+                "cache_read": 100,
+            }
+
+    .. versionadded:: 0.3.9
+    """
+
+    audio: int
+    """Audio input tokens."""
+    cache_creation: int
+    """Input tokens that were cached and there was a cache miss.
+
+    Since there was a cache miss, the cache was created from these tokens.
+    """
+    cache_read: int
+    """Input tokens that were cached and there was a cache hit.
+
+    Since there was a cache hit, the tokens were read from the cache. More precisely,
+    the model state given these tokens was read from the cache.
+    """
+
+
+class OutputTokenDetails(TypedDict, total=False):
+    """Breakdown of output token counts.
+
+    Does *not* need to sum to full output token count. Does *not* need to have all keys.
+
+    Example:
+
+        .. code-block:: python
+
+            {
+                "audio": 10,
+                "reasoning": 200,
+            }
+
+    .. versionadded:: 0.3.9
+    """
+
+    audio: int
+    """Audio output tokens."""
+    reasoning: int
+    """Reasoning output tokens.
+
+    Tokens generated by the model in a chain of thought process (i.e. by OpenAI's o1
+    models) that are not returned as part of model output.
+    """
+
+
 class UsageMetadata(TypedDict):
    """Usage metadata for a message, such as token counts.

@@ -39,18 +99,41 @@ class UsageMetadata(TypedDict):
        .. code-block:: python

            {
-                "input_tokens": 10,
-                "output_tokens": 20,
-                "total_tokens": 30
+                "input_tokens": 350,
+                "output_tokens": 240,
+                "total_tokens": 590,
+                "input_token_details": {
+                    "audio": 10,
+                    "cache_creation": 200,
+                    "cache_read": 100,
+                },
+                "output_token_details": {
+                    "audio": 10,
+                    "reasoning": 200,
+                }
            }
+
+    .. versionchanged:: 0.3.9
+
+        Added ``input_token_details`` and ``output_token_details``.
    """

    input_tokens: int
-    """Count of input (or prompt) tokens."""
+    """Count of input (or prompt) tokens. Sum of all input token types."""
    output_tokens: int
-    """Count of output (or completion) tokens."""
+    """Count of output (or completion) tokens. Sum of all output token types."""
    total_tokens: int
-    """Total token count."""
+    """Total token count. Sum of input_tokens + output_tokens."""
+    input_token_details: NotRequired[InputTokenDetails]
+    """Breakdown of input token counts.
+ 
+    Does *not* need to sum to full input token count. Does *not* need to have all keys.
+    """
+    output_token_details: NotRequired[OutputTokenDetails]
+    """Breakdown of output token counts.
+
+    Does *not* need to sum to full output token count. Does *not* need to have all keys.
+    """


 class AIMessage(BaseMessage):
--- a/libs/core/pyproject.toml
+++ b/libs/core/pyproject.toml
@@ -45,7 +45,7 @@ python = ">=3.12.4"

 [tool.ruff.lint]
 select = [ "B", "C4", "E", "F", "I", "N", "PIE", "SIM", "T201", "UP", "W",]
-ignore = [ "UP007",]
+ignore = [ "UP007", 'W293']

 [tool.coverage.run]
 omit = [ "tests/*",]
--- a/libs/core/tests/unit_tests/prompts/snapshots/test_chat.ambr
+++ b/libs/core/tests/unit_tests/prompts/snapshots/test_chat.ambr
@@ -677,6 +677,41 @@
        'title': 'HumanMessageChunk',
        'type': 'object',
      }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
      'InvalidToolCall': dict({
        'description': '''
          Allowance for errors made by LLM.
@@ -743,6 +778,36 @@
        'title': 'InvalidToolCall',
        'type': 'object',
      }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
      'SystemMessage': dict({
        'additionalProperties': True,
        'description': '''
@@ -1245,16 +1310,35 @@
              .. code-block:: python
          
                  {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                  }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
        ''',
        'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/$defs/InputTokenDetails',
+          }),
          'input_tokens': dict({
            'title': 'Input Tokens',
            'type': 'integer',
          }),
+          'output_token_details': dict({
+            '$ref': '#/$defs/OutputTokenDetails',
+          }),
          'output_tokens': dict({
            'title': 'Output Tokens',
            'type': 'integer',
@@ -2008,6 +2092,41 @@
        'title': 'HumanMessageChunk',
        'type': 'object',
      }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
      'InvalidToolCall': dict({
        'description': '''
          Allowance for errors made by LLM.
@@ -2074,6 +2193,36 @@
        'title': 'InvalidToolCall',
        'type': 'object',
      }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
      'SystemMessage': dict({
        'additionalProperties': True,
        'description': '''
@@ -2576,16 +2725,35 @@
              .. code-block:: python
          
                  {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                  }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
        ''',
        'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/$defs/InputTokenDetails',
+          }),
          'input_tokens': dict({
            'title': 'Input Tokens',
            'type': 'integer',
          }),
+          'output_token_details': dict({
+            '$ref': '#/$defs/OutputTokenDetails',
+          }),
          'output_tokens': dict({
            'title': 'Output Tokens',
            'type': 'integer',
--- a/libs/core/tests/unit_tests/runnables/snapshots/test_graph.ambr
+++ b/libs/core/tests/unit_tests/runnables/snapshots/test_graph.ambr
@@ -1037,6 +1037,41 @@
              'title': 'HumanMessageChunk',
              'type': 'object',
            }),
+            'InputTokenDetails': dict({
+              'description': '''
+                Breakdown of input token counts.
+                
+                Does *not* need to sum to full input token count. Does *not* need to have all keys.
+                
+                Example:
+                
+                    .. code-block:: python
+                
+                        {
+                            "audio": 10,
+                            "cache_creation": 200,
+                            "cache_read": 100,
+                        }
+                
+                .. versionadded:: 0.3.9
+              ''',
+              'properties': dict({
+                'audio': dict({
+                  'title': 'Audio',
+                  'type': 'integer',
+                }),
+                'cache_creation': dict({
+                  'title': 'Cache Creation',
+                  'type': 'integer',
+                }),
+                'cache_read': dict({
+                  'title': 'Cache Read',
+                  'type': 'integer',
+                }),
+              }),
+              'title': 'InputTokenDetails',
+              'type': 'object',
+            }),
            'InvalidToolCall': dict({
              'description': '''
                Allowance for errors made by LLM.
@@ -1103,6 +1138,36 @@
              'title': 'InvalidToolCall',
              'type': 'object',
            }),
+            'OutputTokenDetails': dict({
+              'description': '''
+                Breakdown of output token counts.
+                
+                Does *not* need to sum to full output token count. Does *not* need to have all keys.
+                
+                Example:
+                
+                    .. code-block:: python
+                
+                        {
+                            "audio": 10,
+                            "reasoning": 200,
+                        }
+                
+                .. versionadded:: 0.3.9
+              ''',
+              'properties': dict({
+                'audio': dict({
+                  'title': 'Audio',
+                  'type': 'integer',
+                }),
+                'reasoning': dict({
+                  'title': 'Reasoning',
+                  'type': 'integer',
+                }),
+              }),
+              'title': 'OutputTokenDetails',
+              'type': 'object',
+            }),
            'SystemMessage': dict({
              'additionalProperties': True,
              'description': '''
@@ -1605,16 +1670,35 @@
                    .. code-block:: python
                
                        {
-                            "input_tokens": 10,
-                            "output_tokens": 20,
-                            "total_tokens": 30
+                            "input_tokens": 350,
+                            "output_tokens": 240,
+                            "total_tokens": 590,
+                            "input_token_details": {
+                                "audio": 10,
+                                "cache_creation": 200,
+                                "cache_read": 100,
+                            },
+                            "output_token_details": {
+                                "audio": 10,
+                                "reasoning": 200,
+                            }
                        }
+                
+                .. versionchanged:: 0.3.9
+                
+                    Added ``input_token_details`` and ``output_token_details``.
              ''',
              'properties': dict({
+                'input_token_details': dict({
+                  '$ref': '#/$defs/InputTokenDetails',
+                }),
                'input_tokens': dict({
                  'title': 'Input Tokens',
                  'type': 'integer',
                }),
+                'output_token_details': dict({
+                  '$ref': '#/$defs/OutputTokenDetails',
+                }),
                'output_tokens': dict({
                  'title': 'Output Tokens',
                  'type': 'integer',
--- a/libs/core/tests/unit_tests/runnables/snapshots/test_runnable.ambr
+++ b/libs/core/tests/unit_tests/runnables/snapshots/test_runnable.ambr
@@ -2639,6 +2639,41 @@
        'title': 'HumanMessageChunk',
        'type': 'object',
      }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
      'InvalidToolCall': dict({
        'description': '''
          Allowance for errors made by LLM.
@@ -2705,6 +2740,36 @@
        'title': 'InvalidToolCall',
        'type': 'object',
      }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
      'SystemMessage': dict({
        'additionalProperties': True,
        'description': '''
@@ -3207,16 +3272,35 @@
              .. code-block:: python
          
                  {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                  }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
        ''',
        'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/$defs/InputTokenDetails',
+          }),
          'input_tokens': dict({
            'title': 'Input Tokens',
            'type': 'integer',
          }),
+          'output_token_details': dict({
+            '$ref': '#/$defs/OutputTokenDetails',
+          }),
          'output_tokens': dict({
            'title': 'Output Tokens',
            'type': 'integer',
@@ -4028,6 +4112,41 @@
        'title': 'HumanMessageChunk',
        'type': 'object',
      }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
      'InvalidToolCall': dict({
        'description': '''
          Allowance for errors made by LLM.
@@ -4094,6 +4213,36 @@
        'title': 'InvalidToolCall',
        'type': 'object',
      }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
      'StringPromptValue': dict({
        'description': 'String prompt value.',
        'properties': dict({
@@ -4615,16 +4764,35 @@
              .. code-block:: python
          
                  {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                  }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
        ''',
        'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/$defs/InputTokenDetails',
+          }),
          'input_tokens': dict({
            'title': 'Input Tokens',
            'type': 'integer',
          }),
+          'output_token_details': dict({
+            '$ref': '#/$defs/OutputTokenDetails',
+          }),
          'output_tokens': dict({
            'title': 'Output Tokens',
            'type': 'integer',
@@ -5448,6 +5616,41 @@
        'title': 'HumanMessageChunk',
        'type': 'object',
      }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
      'InvalidToolCall': dict({
        'description': '''
          Allowance for errors made by LLM.
@@ -5514,6 +5717,36 @@
        'title': 'InvalidToolCall',
        'type': 'object',
      }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
      'StringPromptValue': dict({
        'description': 'String prompt value.',
        'properties': dict({
@@ -6035,16 +6268,35 @@
              .. code-block:: python
          
                  {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                  }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
        ''',
        'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/definitions/InputTokenDetails',
+          }),
          'input_tokens': dict({
            'title': 'Input Tokens',
            'type': 'integer',
          }),
+          'output_token_details': dict({
+            '$ref': '#/definitions/OutputTokenDetails',
+          }),
          'output_tokens': dict({
            'title': 'Output Tokens',
            'type': 'integer',
@@ -6744,6 +6996,41 @@
        'title': 'HumanMessageChunk',
        'type': 'object',
      }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
      'InvalidToolCall': dict({
        'description': '''
          Allowance for errors made by LLM.
@@ -6810,6 +7097,36 @@
        'title': 'InvalidToolCall',
        'type': 'object',
      }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
      'SystemMessage': dict({
        'additionalProperties': True,
        'description': '''
@@ -7312,16 +7629,35 @@
              .. code-block:: python
          
                  {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                  }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
        ''',
        'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/definitions/InputTokenDetails',
+          }),
          'input_tokens': dict({
            'title': 'Input Tokens',
            'type': 'integer',
          }),
+          'output_token_details': dict({
+            '$ref': '#/definitions/OutputTokenDetails',
+          }),
          'output_tokens': dict({
            'title': 'Output Tokens',
            'type': 'integer',
@@ -8175,6 +8511,41 @@
        'title': 'HumanMessageChunk',
        'type': 'object',
      }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
      'InvalidToolCall': dict({
        'description': '''
          Allowance for errors made by LLM.
@@ -8241,6 +8612,36 @@
        'title': 'InvalidToolCall',
        'type': 'object',
      }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
      'StringPromptValue': dict({
        'description': 'String prompt value.',
        'properties': dict({
@@ -8762,16 +9163,35 @@
              .. code-block:: python
          
                  {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                  }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
        ''',
        'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/definitions/InputTokenDetails',
+          }),
          'input_tokens': dict({
            'title': 'Input Tokens',
            'type': 'integer',
          }),
+          'output_token_details': dict({
+            '$ref': '#/definitions/OutputTokenDetails',
+          }),
          'output_tokens': dict({
            'title': 'Output Tokens',
            'type': 'integer',
@@ -9516,6 +9936,41 @@
        'title': 'HumanMessageChunk',
        'type': 'object',
      }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
      'InvalidToolCall': dict({
        'description': '''
          Allowance for errors made by LLM.
@@ -9582,6 +10037,36 @@
        'title': 'InvalidToolCall',
        'type': 'object',
      }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
      'SystemMessage': dict({
        'additionalProperties': True,
        'description': '''
@@ -10084,16 +10569,35 @@
              .. code-block:: python
          
                  {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                  }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
        ''',
        'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/definitions/InputTokenDetails',
+          }),
          'input_tokens': dict({
            'title': 'Input Tokens',
            'type': 'integer',
          }),
+          'output_token_details': dict({
+            '$ref': '#/definitions/OutputTokenDetails',
+          }),
          'output_tokens': dict({
            'title': 'Output Tokens',
            'type': 'integer',
@@ -10855,6 +11359,41 @@
        'title': 'HumanMessageChunk',
        'type': 'object',
      }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
      'InvalidToolCall': dict({
        'description': '''
          Allowance for errors made by LLM.
@@ -10921,6 +11460,36 @@
        'title': 'InvalidToolCall',
        'type': 'object',
      }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
      'PromptTemplateOutput': dict({
        'anyOf': list([
          dict({
@@ -11453,16 +12022,35 @@
              .. code-block:: python
          
                  {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                  }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
        ''',
        'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/definitions/InputTokenDetails',
+          }),
          'input_tokens': dict({
            'title': 'Input Tokens',
            'type': 'integer',
          }),
+          'output_token_details': dict({
+            '$ref': '#/definitions/OutputTokenDetails',
+          }),
          'output_tokens': dict({
            'title': 'Output Tokens',
            'type': 'integer',
@@ -12236,6 +12824,41 @@
        'title': 'HumanMessageChunk',
        'type': 'object',
      }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+          
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
      'InvalidToolCall': dict({
        'description': '''
          Allowance for errors made by LLM.
@@ -12302,6 +12925,36 @@
        'title': 'InvalidToolCall',
        'type': 'object',
      }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+          
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+          
+          Example:
+          
+              .. code-block:: python
+          
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+          
+          .. versionadded:: 0.3.9
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
      'StringPromptValue': dict({
        'description': 'String prompt value.',
        'properties': dict({
@@ -12823,16 +13476,35 @@
              .. code-block:: python
          
                  {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                  }
+          
+          .. versionchanged:: 0.3.9
+          
+              Added ``input_token_details`` and ``output_token_details``.
        ''',
        'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/definitions/InputTokenDetails',
+          }),
          'input_tokens': dict({
            'title': 'Input Tokens',
            'type': 'integer',
          }),
+          'output_token_details': dict({
+            '$ref': '#/definitions/OutputTokenDetails',
+          }),
          'output_tokens': dict({
            'title': 'Output Tokens',
            'type': 'integer',
--- a/libs/standard-tests/langchain_standard_tests/integration_tests/chat_models.py
+++ b/libs/standard-tests/langchain_standard_tests/integration_tests/chat_models.py
@@ -151,6 +151,31 @@ class ChatModelIntegrationTests(ChatModelTests):
        assert isinstance(result.usage_metadata["output_tokens"], int)
        assert isinstance(result.usage_metadata["total_tokens"], int)

+        if "audio_input" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_audio_input()
+            assert isinstance(msg.usage_metadata["input_token_details"]["audio"], int)  # type: ignore[index]
+        if "audio_output" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_audio_output()
+            assert isinstance(msg.usage_metadata["output_token_details"]["audio"], int)  # type: ignore[index]
+        if "reasoning_output" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_reasoning_output()
+            assert isinstance(
+                msg.usage_metadata["output_token_details"]["reasoning"],  # type: ignore[index]
+                int,
+            )
+        if "cache_read_input" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_cache_read_input()
+            assert isinstance(
+                msg.usage_metadata["input_token_details"]["cache_read"],  # type: ignore[index]
+                int,
+            )
+        if "cache_creation_input" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_cache_creation_input()
+            assert isinstance(
+                msg.usage_metadata["input_token_details"]["cache_creation"],  # type: ignore[index]
+                int,
+            )
+
    def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
        if not self.returns_usage_metadata:
            pytest.skip("Not implemented.")
@@ -164,6 +189,31 @@ class ChatModelIntegrationTests(ChatModelTests):
        assert isinstance(full.usage_metadata["output_tokens"], int)
        assert isinstance(full.usage_metadata["total_tokens"], int)

+        if "audio_input" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_audio_input(stream=True)
+            assert isinstance(msg.usage_metadata["input_token_details"]["audio"], int)  # type: ignore[index]
+        if "audio_output" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_audio_output(stream=True)
+            assert isinstance(msg.usage_metadata["output_token_details"]["audio"], int)  # type: ignore[index]
+        if "reasoning_output" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_reasoning_output(stream=True)
+            assert isinstance(
+                msg.usage_metadata["output_token_details"]["reasoning"],  # type: ignore[index]
+                int,
+            )
+        if "cache_read_input" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_cache_read_input(stream=True)
+            assert isinstance(
+                msg.usage_metadata["input_token_details"]["cache_read"],  # type: ignore[index]
+                int,
+            )
+        if "cache_creation_input" in self.supported_usage_metadata_details:
+            msg = self.invoke_with_cache_creation_input(stream=True)
+            assert isinstance(
+                msg.usage_metadata["input_token_details"]["cache_creation"],  # type: ignore[index]
+                int,
+            )
+
    def test_stop_sequence(self, model: BaseChatModel) -> None:
        result = model.invoke("hi", stop=["you"])
        assert isinstance(result, AIMessage)
@@ -608,3 +658,18 @@ class ChatModelIntegrationTests(ChatModelTests):
        assert isinstance(result, AIMessage)
        assert isinstance(result.content, str)
        assert len(result.content) > 0
+
+    def invoke_with_audio_input(self, *, stream: bool = False) -> AIMessage:
+        raise NotImplementedError()
+
+    def invoke_with_audio_output(self, *, stream: bool = False) -> AIMessage:
+        raise NotImplementedError()
+
+    def invoke_with_reasoning_output(self, *, stream: bool = False) -> AIMessage:
+        raise NotImplementedError()
+
+    def invoke_with_cache_read_input(self, *, stream: bool = False) -> AIMessage:
+        raise NotImplementedError()
+
+    def invoke_with_cache_creation_input(self, *, stream: bool = False) -> AIMessage:
+        raise NotImplementedError()
--- a/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py
+++ b/libs/standard-tests/langchain_standard_tests/unit_tests/chat_models.py
@@ -138,6 +138,20 @@ class ChatModelTests(BaseStandardTests):
    def supports_image_tool_message(self) -> bool:
        return False

+    @property
+    def supported_usage_metadata_details(
+        self,
+    ) -> List[
+        Literal[
+            "audio_input",
+            "audio_output",
+            "reasoning_output",
+            "cache_read_input",
+            "cache_creation_input",
+        ]
+    ]:
+        return []
+

 class ChatModelUnitTests(ChatModelTests):
    @property