refactor(core): use cached_property for openai function schema and char count

Move _openai_function_dict and _openai_function_chars from manual __dict__ stashing in external functions to proper cached_property declarations on ChildTool, consistent with how tool_call_schema and args are already cached. Extract _compute_openai_function_dict (pure computation, no caching) from _format_tool_to_openai_function so ChildTool._openai_function_dict can call it without circular dependency. _format_tool_to_openai_function now delegates to tool._openai_function_dict for ChildTool instances and falls back to direct computation for other BaseTool subclasses. _openai_function_chars chains off _openai_function_dict so json.dumps is also computed at most once per mutation cycle. count_tokens_approximately accesses tool._openai_function_chars directly instead of managing __dict__ by hand. Invalidation via ChildTool.__setattr__ (popping both keys on args_schema / description / name mutation) is unchanged.
2026-05-03 01:46:42 +00:00 · 2026-04-27 13:54:00 -04:00
parent 273a0374cb
commit 2fe4e2c7b0
3 changed files with 40 additions and 43 deletions
--- a/libs/core/langchain_core/messages/utils.py
+++ b/libs/core/langchain_core/messages/utils.py
@@ -2247,24 +2247,19 @@ def count_tokens_approximately(
    last_ai_total_tokens: int | None = None
    approx_at_last_ai: float | None = None

-    # Count tokens for tools if provided. For BaseTool instances we stash the
-    # JSON-serialized length on the tool under `_openai_function_chars` (paired
-    # with the `_openai_function_dict` schema cache) so successive calls don't
-    # re-run json.dumps over a dict that hasn't changed. `BaseTool.__setattr__`
-    # pops both keys when schema-affecting fields mutate, so dynamic tool
-    # re-registration or in-place edits invalidate this correctly.
+    # Count tokens for tools if provided. For ChildTool instances the char count
+    # is served from _openai_function_chars (a cached_property invalidated by
+    # ChildTool.__setattr__ on schema-affecting field mutations). For plain dicts
+    # or other tool-like objects we fall back to a fresh json.dumps.
    if tools:
        tools_chars = 0
        for tool in tools:
            if isinstance(tool, dict):
                tools_chars += len(json.dumps(tool))
-                continue
-            cached_chars = tool.__dict__.get("_openai_function_chars")
-            if cached_chars is None:
-                tool_dict = convert_to_openai_tool(tool)
-                cached_chars = len(json.dumps(tool_dict))
-                tool.__dict__["_openai_function_chars"] = cached_chars
-            tools_chars += cached_chars
+            elif hasattr(tool, "_openai_function_chars"):
+                tools_chars += tool._openai_function_chars
+            else:
+                tools_chars += len(json.dumps(convert_to_openai_tool(tool)))
        token_count += math.ceil(tools_chars / chars_per_token)

    for message in converted_messages:
--- a/libs/core/langchain_core/tools/base.py
+++ b/libs/core/langchain_core/tools/base.py
@@ -55,6 +55,7 @@ from langchain_core.runnables import (
 from langchain_core.runnables.config import set_config_context
 from langchain_core.runnables.utils import coro_with_context
 from langchain_core.utils.function_calling import (
+    _format_tool_to_openai_function,
    _parse_google_docstring,
    _py_38_safe_origin,
 )
@@ -563,6 +564,16 @@ class ChildTool(BaseTool):
            self.__dict__.pop("_openai_function_dict", None)
            self.__dict__.pop("_openai_function_chars", None)

+    @functools.cached_property
+    def _openai_function_dict(self) -> dict[str, Any]:
+        """OpenAI function description for this tool, cached per instance."""
+        return _format_tool_to_openai_function(self)
+
+    @functools.cached_property
+    def _openai_function_chars(self) -> int:
+        """JSON character count of the full OpenAI tool dict, cached per instance."""
+        return len(json.dumps({"type": "function", "function": self._openai_function_dict}))
+
    @property
    def is_single_input(self) -> bool:
        """Check if the tool accepts only a single input argument.
--- a/libs/core/langchain_core/utils/function_calling.py
+++ b/libs/core/langchain_core/utils/function_calling.py
@@ -337,25 +337,17 @@ def _format_tool_to_openai_function(tool: BaseTool) -> FunctionDescription:
    Returns:
        The function description.
    """
-    # The result is cached on the tool instance under `_openai_function_dict`.
-    # `BaseTool.__setattr__` pops this key when `args_schema` / `description` /
-    # `name` are mutated (alongside the existing `tool_call_schema` and `args`
-    # caches), so the invalidation path is already wired up.
-    cached = tool.__dict__.get("_openai_function_dict")
-    if cached is not None:
-        return cached
-
    is_simple_oai_tool = (
        isinstance(tool, langchain_core.tools.simple.Tool) and not tool.args_schema
    )
    schema = tool.tool_call_schema
    if schema and not is_simple_oai_tool:
        if isinstance(schema, dict):
-            result = _convert_json_schema_to_openai_function(
+            return _convert_json_schema_to_openai_function(
                schema, name=tool.name, description=tool.description
            )
        elif issubclass(schema, (BaseModel, BaseModelV1)):
-            result = _convert_pydantic_to_openai_function(
+            return _convert_pydantic_to_openai_function(
                schema, name=tool.name, description=tool.description
            )
        else:
@@ -364,26 +356,22 @@ def _format_tool_to_openai_function(tool: BaseTool) -> FunctionDescription:
                "Tool call schema must be a JSON schema dict or a Pydantic model."
            )
            raise ValueError(error_msg)
-    else:
-        result = {
-            "name": tool.name,
-            "description": tool.description,
-            "parameters": {
-                # This is a hack to get around the fact that some tools
-                # do not expose an args_schema, and expect an argument
-                # which is a string.
-                # And Open AI does not support an array type for the
-                # parameters.
-                "properties": {
-                    "__arg1": {"title": "__arg1", "type": "string"},
-                },
-                "required": ["__arg1"],
-                "type": "object",
+    return {
+        "name": tool.name,
+        "description": tool.description,
+        "parameters": {
+            # This is a hack to get around the fact that some tools
+            # do not expose an args_schema, and expect an argument
+            # which is a string.
+            # And Open AI does not support an array type for the
+            # parameters.
+            "properties": {
+                "__arg1": {"title": "__arg1", "type": "string"},
            },
-        }
-
-    tool.__dict__["_openai_function_dict"] = result
-    return result
+            "required": ["__arg1"],
+            "type": "object",
+        },
+    }


 def convert_to_openai_function(
@@ -457,7 +445,10 @@ def convert_to_openai_function(
            "dict", _convert_typed_dict_to_openai_function(cast("type", function))
        )
    elif isinstance(function, langchain_core.tools.base.BaseTool):
-        oai_function = cast("dict", _format_tool_to_openai_function(function))
+        # _openai_function_dict is a cached_property on ChildTool that calls
+        # _format_tool_to_openai_function; going through it here ensures the
+        # result is cached on the tool for the lifetime of the instance.
+        oai_function = cast("dict", function._openai_function_dict)  # type: ignore[attr-defined]
    elif callable(function):
        oai_function = cast(
            "dict", _convert_python_function_to_openai_function(function)