move get_num_tokens_from_messages to BaseChatModel and BaseChatModelV1

2025-09-20 10:03:16 +00:00 · 2025-07-28 10:58:57 -04:00
parent c409f723a2
commit b8fed06409
3 changed files with 62 additions and 30 deletions
--- a/libs/core/langchain_core/language_models/base.py
+++ b/libs/core/langchain_core/language_models/base.py
@@ -2,7 +2,6 @@

 from __future__ import annotations

-import warnings
 from abc import ABC, abstractmethod
 from collections.abc import Mapping, Sequence
 from functools import cache
@@ -26,7 +25,6 @@ from langchain_core.messages import (
    AnyMessage,
    BaseMessage,
    MessageLikeRepresentation,
-    get_buffer_string,
 )
 from langchain_core.messages.v1 import AIMessage as AIMessageV1
 from langchain_core.prompt_values import PromptValue
@@ -368,33 +366,6 @@ class BaseLanguageModel(
        """
        return len(self.get_token_ids(text))

-    def get_num_tokens_from_messages(
-        self,
-        messages: list[BaseMessage],
-        tools: Optional[Sequence] = None,
-    ) -> int:
-        """Get the number of tokens in the messages.
-
-        Useful for checking if an input fits in a model's context window.
-
-        **Note**: the base implementation of get_num_tokens_from_messages ignores
-        tool schemas.
-
-        Args:
-            messages: The message inputs to tokenize.
-            tools: If provided, sequence of dict, BaseModel, function, or BaseTools
-                to be converted to tool schemas.
-
-        Returns:
-            The sum of the number of tokens across the messages.
-        """
-        if tools is not None:
-            warnings.warn(
-                "Counting tokens in tool schemas is not yet supported. Ignoring tools.",
-                stacklevel=2,
-            )
-        return sum(self.get_num_tokens(get_buffer_string([m])) for m in messages)
-
    @classmethod
    def _all_required_field_names(cls) -> set:
        """DEPRECATED: Kept for backwards compatibility.
--- a/libs/core/langchain_core/language_models/chat_models.py
+++ b/libs/core/langchain_core/language_models/chat_models.py
@@ -55,6 +55,7 @@ from langchain_core.messages import (
    HumanMessage,
    convert_to_messages,
    convert_to_openai_image_block,
+    get_buffer_string,
    is_data_content_block,
    message_chunk_to_message,
 )
@@ -1351,6 +1352,33 @@ class BaseChatModel(BaseLanguageModel[BaseMessage], ABC):
        starter_dict["_type"] = self._llm_type
        return starter_dict

+    def get_num_tokens_from_messages(
+        self,
+        messages: list[BaseMessage],
+        tools: Optional[Sequence] = None,
+    ) -> int:
+        """Get the number of tokens in the messages.
+
+        Useful for checking if an input fits in a model's context window.
+
+        **Note**: the base implementation of get_num_tokens_from_messages ignores
+        tool schemas.
+
+        Args:
+            messages: The message inputs to tokenize.
+            tools: If provided, sequence of dict, BaseModel, function, or BaseTools
+                to be converted to tool schemas.
+
+        Returns:
+            The sum of the number of tokens across the messages.
+        """
+        if tools is not None:
+            warnings.warn(
+                "Counting tokens in tool schemas is not yet supported. Ignoring tools.",
+                stacklevel=2,
+            )
+        return sum(self.get_num_tokens(get_buffer_string([m])) for m in messages)
+
    def bind_tools(
        self,
        tools: Sequence[
--- a/libs/core/langchain_core/language_models/v1/chat_models.py
+++ b/libs/core/langchain_core/language_models/v1/chat_models.py
@@ -4,6 +4,7 @@ from __future__ import annotations

 import copy
 import typing
+import warnings
 from abc import ABC, abstractmethod
 from collections.abc import AsyncIterator, Iterator, Sequence
 from operator import itemgetter
@@ -40,9 +41,13 @@ from langchain_core.messages import (
    AIMessage,
    BaseMessage,
    convert_to_openai_image_block,
+    get_buffer_string,
    is_data_content_block,
 )
-from langchain_core.messages.utils import convert_to_messages_v1
+from langchain_core.messages.utils import (
+    _convert_from_v1_message,
+    convert_to_messages_v1,
+)
 from langchain_core.messages.v1 import AIMessage as AIMessageV1
 from langchain_core.messages.v1 import AIMessageChunk as AIMessageChunkV1
 from langchain_core.messages.v1 import HumanMessage as HumanMessageV1
@@ -899,6 +904,34 @@ class BaseChatModelV1(BaseLanguageModel[AIMessageV1], ABC):
            return RunnableMap(raw=llm) | parser_with_fallback
        return llm | output_parser

+    def get_num_tokens_from_messages(
+        self,
+        messages: list[MessageV1],
+        tools: Optional[Sequence] = None,
+    ) -> int:
+        """Get the number of tokens in the messages.
+
+        Useful for checking if an input fits in a model's context window.
+
+        **Note**: the base implementation of get_num_tokens_from_messages ignores
+        tool schemas.
+
+        Args:
+            messages: The message inputs to tokenize.
+            tools: If provided, sequence of dict, BaseModel, function, or BaseTools
+                to be converted to tool schemas.
+
+        Returns:
+            The sum of the number of tokens across the messages.
+        """
+        messages = [_convert_from_v1_message(message) for message in messages]
+        if tools is not None:
+            warnings.warn(
+                "Counting tokens in tool schemas is not yet supported. Ignoring tools.",
+                stacklevel=2,
+            )
+        return sum(self.get_num_tokens(get_buffer_string([m])) for m in messages)
+

 def _gen_info_and_msg_metadata(
    generation: Union[ChatGeneration, ChatGenerationChunk],