From f942d79aecc1564855dd7ff61eab4763179df186 Mon Sep 17 00:00:00 2001 From: Sydney Runkle Date: Fri, 24 Apr 2026 09:44:22 -0400 Subject: [PATCH] perf(core): cache tool openai-function JSON-char count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit count_tokens_approximately was calling json.dumps(tool_dict) and throwing away everything but the length on every invocation — even though the dict returned by convert_to_openai_tool(tool) is stable for a given tool. Stash the char count on the tool instance under _openai_function_chars (paired with the _openai_function_dict schema cache from the previous commit). BaseTool.__setattr__ pops both keys on mutation of args_schema / description / name so dynamic tool re-registration or in-place edits invalidate correctly. Co-Authored-By: Claude Opus 4.7 (1M context) --- libs/core/langchain_core/messages/utils.py | 18 +++++++++++++++--- libs/core/langchain_core/tools/base.py | 1 + 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/libs/core/langchain_core/messages/utils.py b/libs/core/langchain_core/messages/utils.py index f37d100c8f9..0817aba4e69 100644 --- a/libs/core/langchain_core/messages/utils.py +++ b/libs/core/langchain_core/messages/utils.py @@ -2247,12 +2247,24 @@ def count_tokens_approximately( last_ai_total_tokens: int | None = None approx_at_last_ai: float | None = None - # Count tokens for tools if provided + # Count tokens for tools if provided. For BaseTool instances we stash the + # JSON-serialized length on the tool under `_openai_function_chars` (paired + # with the `_openai_function_dict` schema cache) so successive calls don't + # re-run json.dumps over a dict that hasn't changed. `BaseTool.__setattr__` + # pops both keys when schema-affecting fields mutate, so dynamic tool + # re-registration or in-place edits invalidate this correctly. if tools: tools_chars = 0 for tool in tools: - tool_dict = tool if isinstance(tool, dict) else convert_to_openai_tool(tool) - tools_chars += len(json.dumps(tool_dict)) + if isinstance(tool, dict): + tools_chars += len(json.dumps(tool)) + continue + cached_chars = tool.__dict__.get("_openai_function_chars") + if cached_chars is None: + tool_dict = convert_to_openai_tool(tool) + cached_chars = len(json.dumps(tool_dict)) + tool.__dict__["_openai_function_chars"] = cached_chars + tools_chars += cached_chars token_count += math.ceil(tools_chars / chars_per_token) for message in converted_messages: diff --git a/libs/core/langchain_core/tools/base.py b/libs/core/langchain_core/tools/base.py index 7f57037802e..4c197d0f5f8 100644 --- a/libs/core/langchain_core/tools/base.py +++ b/libs/core/langchain_core/tools/base.py @@ -561,6 +561,7 @@ class ChildTool(BaseTool): self.__dict__.pop("tool_call_schema", None) self.__dict__.pop("args", None) self.__dict__.pop("_openai_function_dict", None) + self.__dict__.pop("_openai_function_chars", None) @property def is_single_input(self) -> bool: