refactor(core): use cached_property for openai function schema and char count

Move _openai_function_dict and _openai_function_chars from manual __dict__
stashing in external functions to proper cached_property declarations on
ChildTool, consistent with how tool_call_schema and args are already cached.

Extract _compute_openai_function_dict (pure computation, no caching) from
_format_tool_to_openai_function so ChildTool._openai_function_dict can call
it without circular dependency. _format_tool_to_openai_function now delegates
to tool._openai_function_dict for ChildTool instances and falls back to direct
computation for other BaseTool subclasses.

_openai_function_chars chains off _openai_function_dict so json.dumps is also
computed at most once per mutation cycle. count_tokens_approximately accesses
tool._openai_function_chars directly instead of managing __dict__ by hand.

Invalidation via ChildTool.__setattr__ (popping both keys on args_schema /
description / name mutation) is unchanged.
This commit is contained in:
Sydney Runkle
2026-04-27 13:54:00 -04:00
parent 273a0374cb
commit 2fe4e2c7b0
3 changed files with 40 additions and 43 deletions

View File

@@ -2247,24 +2247,19 @@ def count_tokens_approximately(
last_ai_total_tokens: int | None = None
approx_at_last_ai: float | None = None
# Count tokens for tools if provided. For BaseTool instances we stash the
# JSON-serialized length on the tool under `_openai_function_chars` (paired
# with the `_openai_function_dict` schema cache) so successive calls don't
# re-run json.dumps over a dict that hasn't changed. `BaseTool.__setattr__`
# pops both keys when schema-affecting fields mutate, so dynamic tool
# re-registration or in-place edits invalidate this correctly.
# Count tokens for tools if provided. For ChildTool instances the char count
# is served from _openai_function_chars (a cached_property invalidated by
# ChildTool.__setattr__ on schema-affecting field mutations). For plain dicts
# or other tool-like objects we fall back to a fresh json.dumps.
if tools:
tools_chars = 0
for tool in tools:
if isinstance(tool, dict):
tools_chars += len(json.dumps(tool))
continue
cached_chars = tool.__dict__.get("_openai_function_chars")
if cached_chars is None:
tool_dict = convert_to_openai_tool(tool)
cached_chars = len(json.dumps(tool_dict))
tool.__dict__["_openai_function_chars"] = cached_chars
tools_chars += cached_chars
elif hasattr(tool, "_openai_function_chars"):
tools_chars += tool._openai_function_chars
else:
tools_chars += len(json.dumps(convert_to_openai_tool(tool)))
token_count += math.ceil(tools_chars / chars_per_token)
for message in converted_messages:

View File

@@ -55,6 +55,7 @@ from langchain_core.runnables import (
from langchain_core.runnables.config import set_config_context
from langchain_core.runnables.utils import coro_with_context
from langchain_core.utils.function_calling import (
_format_tool_to_openai_function,
_parse_google_docstring,
_py_38_safe_origin,
)
@@ -563,6 +564,16 @@ class ChildTool(BaseTool):
self.__dict__.pop("_openai_function_dict", None)
self.__dict__.pop("_openai_function_chars", None)
@functools.cached_property
def _openai_function_dict(self) -> dict[str, Any]:
"""OpenAI function description for this tool, cached per instance."""
return _format_tool_to_openai_function(self)
@functools.cached_property
def _openai_function_chars(self) -> int:
"""JSON character count of the full OpenAI tool dict, cached per instance."""
return len(json.dumps({"type": "function", "function": self._openai_function_dict}))
@property
def is_single_input(self) -> bool:
"""Check if the tool accepts only a single input argument.

View File

@@ -337,25 +337,17 @@ def _format_tool_to_openai_function(tool: BaseTool) -> FunctionDescription:
Returns:
The function description.
"""
# The result is cached on the tool instance under `_openai_function_dict`.
# `BaseTool.__setattr__` pops this key when `args_schema` / `description` /
# `name` are mutated (alongside the existing `tool_call_schema` and `args`
# caches), so the invalidation path is already wired up.
cached = tool.__dict__.get("_openai_function_dict")
if cached is not None:
return cached
is_simple_oai_tool = (
isinstance(tool, langchain_core.tools.simple.Tool) and not tool.args_schema
)
schema = tool.tool_call_schema
if schema and not is_simple_oai_tool:
if isinstance(schema, dict):
result = _convert_json_schema_to_openai_function(
return _convert_json_schema_to_openai_function(
schema, name=tool.name, description=tool.description
)
elif issubclass(schema, (BaseModel, BaseModelV1)):
result = _convert_pydantic_to_openai_function(
return _convert_pydantic_to_openai_function(
schema, name=tool.name, description=tool.description
)
else:
@@ -364,26 +356,22 @@ def _format_tool_to_openai_function(tool: BaseTool) -> FunctionDescription:
"Tool call schema must be a JSON schema dict or a Pydantic model."
)
raise ValueError(error_msg)
else:
result = {
"name": tool.name,
"description": tool.description,
"parameters": {
# This is a hack to get around the fact that some tools
# do not expose an args_schema, and expect an argument
# which is a string.
# And Open AI does not support an array type for the
# parameters.
"properties": {
"__arg1": {"title": "__arg1", "type": "string"},
},
"required": ["__arg1"],
"type": "object",
return {
"name": tool.name,
"description": tool.description,
"parameters": {
# This is a hack to get around the fact that some tools
# do not expose an args_schema, and expect an argument
# which is a string.
# And Open AI does not support an array type for the
# parameters.
"properties": {
"__arg1": {"title": "__arg1", "type": "string"},
},
}
tool.__dict__["_openai_function_dict"] = result
return result
"required": ["__arg1"],
"type": "object",
},
}
def convert_to_openai_function(
@@ -457,7 +445,10 @@ def convert_to_openai_function(
"dict", _convert_typed_dict_to_openai_function(cast("type", function))
)
elif isinstance(function, langchain_core.tools.base.BaseTool):
oai_function = cast("dict", _format_tool_to_openai_function(function))
# _openai_function_dict is a cached_property on ChildTool that calls
# _format_tool_to_openai_function; going through it here ensures the
# result is cached on the tool for the lifetime of the instance.
oai_function = cast("dict", function._openai_function_dict) # type: ignore[attr-defined]
elif callable(function):
oai_function = cast(
"dict", _convert_python_function_to_openai_function(function)