perf(core): cache _format_tool_to_openai_function per tool instance

Stash the OpenAI function description dict on the BaseTool instance under
`tool.__dict__["_openai_function_dict"]`. BaseTool.__setattr__ already pops
`tool_call_schema` and `args` when `args_schema`, `description`, or `name`
change; extend the invalidation set to include the new key so the cache
matches the schema caching lifecycle.

Previously, every call to `convert_to_openai_tool(tool)` re-ran
`schema.model_json_schema()` on the cached tool_call_schema pydantic model,
rebuilding the full JSON-schema tree on every model invocation. Summarization
middleware's `count_tokens_approximately` (called twice per model call) plus
the prompt-caching middleware's `bind_tools` meant three fresh schema
generations per model call × 15-ish tools × 500 model calls in a 100-turn
agent run — tens of seconds of pydantic work that's identical every time.

With this cache the first call pays the schema-gen cost once per tool; all
subsequent calls are a dict lookup.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Sydney Runkle
2026-04-24 09:30:16 -04:00
parent 5f56a43753
commit 645b077c30
2 changed files with 37 additions and 23 deletions

View File

@@ -560,6 +560,7 @@ class ChildTool(BaseTool):
if name in {"args_schema", "description", "name"}:
self.__dict__.pop("tool_call_schema", None)
self.__dict__.pop("args", None)
self.__dict__.pop("_openai_function_dict", None)
@property
def is_single_input(self) -> bool:

View File

@@ -337,40 +337,53 @@ def _format_tool_to_openai_function(tool: BaseTool) -> FunctionDescription:
Returns:
The function description.
"""
# The result is cached on the tool instance under `_openai_function_dict`.
# `BaseTool.__setattr__` pops this key when `args_schema` / `description` /
# `name` are mutated (alongside the existing `tool_call_schema` and `args`
# caches), so the invalidation path is already wired up.
cached = tool.__dict__.get("_openai_function_dict")
if cached is not None:
return cached
is_simple_oai_tool = (
isinstance(tool, langchain_core.tools.simple.Tool) and not tool.args_schema
)
schema = tool.tool_call_schema
if schema and not is_simple_oai_tool:
if isinstance(schema, dict):
return _convert_json_schema_to_openai_function(
result = _convert_json_schema_to_openai_function(
schema, name=tool.name, description=tool.description
)
if issubclass(schema, (BaseModel, BaseModelV1)):
return _convert_pydantic_to_openai_function(
elif issubclass(schema, (BaseModel, BaseModelV1)):
result = _convert_pydantic_to_openai_function(
schema, name=tool.name, description=tool.description
)
error_msg = (
f"Unsupported tool call schema: {schema}. "
"Tool call schema must be a JSON schema dict or a Pydantic model."
)
raise ValueError(error_msg)
return {
"name": tool.name,
"description": tool.description,
"parameters": {
# This is a hack to get around the fact that some tools
# do not expose an args_schema, and expect an argument
# which is a string.
# And Open AI does not support an array type for the
# parameters.
"properties": {
"__arg1": {"title": "__arg1", "type": "string"},
else:
error_msg = (
f"Unsupported tool call schema: {schema}. "
"Tool call schema must be a JSON schema dict or a Pydantic model."
)
raise ValueError(error_msg)
else:
result = {
"name": tool.name,
"description": tool.description,
"parameters": {
# This is a hack to get around the fact that some tools
# do not expose an args_schema, and expect an argument
# which is a string.
# And Open AI does not support an array type for the
# parameters.
"properties": {
"__arg1": {"title": "__arg1", "type": "string"},
},
"required": ["__arg1"],
"type": "object",
},
"required": ["__arg1"],
"type": "object",
},
}
}
tool.__dict__["_openai_function_dict"] = result
return result
def convert_to_openai_function(