diff --git a/libs/langchain_v1/tests/benchmarks/test_create_agent.py b/libs/langchain_v1/tests/benchmarks/test_create_agent.py
index 8163e25250a..c46632ff069 100644
--- a/libs/langchain_v1/tests/benchmarks/test_create_agent.py
+++ b/libs/langchain_v1/tests/benchmarks/test_create_agent.py
@@ -114,6 +114,35 @@ def structured_tool_4(rows: int, cols: int, fill: float) -> list[list[float]]:
     return [[fill] * cols for _ in range(rows)]
 
 
+class CoordinateSchema(BaseModel):
+    lat: float = Field(description="Latitude")
+    lon: float = Field(description="Longitude")
+
+
+class LocationSchema(BaseModel):
+    name: str = Field(description="Location name")
+    coordinate: CoordinateSchema = Field(description="GPS coordinate")
+    altitude_m: float = Field(default=0.0, description="Altitude in meters")
+
+
+class RouteSchema(BaseModel):
+    origin: LocationSchema = Field(description="Starting location")
+    destination: LocationSchema = Field(description="Ending location")
+    waypoints: list[LocationSchema] = Field(default_factory=list, description="Intermediate stops")
+    max_distance_km: float = Field(default=1000.0, description="Maximum route distance")
+
+
+@tool(args_schema=RouteSchema)
+def deep_nested_tool(
+    origin: LocationSchema,
+    destination: LocationSchema,
+    waypoints: list[LocationSchema],
+    max_distance_km: float,
+) -> dict[str, Any]:
+    """Plan a route between locations with deep nested schema."""
+    return {"origin": origin.name, "destination": destination.name}
+
+
 @tool
 def complex_tool_1(
     name: str,
@@ -187,6 +216,7 @@ LARGE_TOOLS = [
     structured_tool_2,
     structured_tool_3,
     structured_tool_4,
+    deep_nested_tool,
     complex_tool_1,
     complex_tool_2,
     complex_tool_3,
@@ -224,47 +254,48 @@ def test_create_agent_medium_tools(benchmark: BenchmarkFixture) -> None:
 
 @pytest.mark.benchmark
 def test_create_agent_large_tools(benchmark: BenchmarkFixture) -> None:
-    """14 tools including complex nested schemas."""
+    """15 tools including complex nested schemas."""
     benchmark(lambda: create_agent(model=_make_model(), tools=LARGE_TOOLS))
 
 
 @pytest.mark.benchmark
 def test_create_agent_large_tools_with_middleware(benchmark: BenchmarkFixture) -> None:
-    """14 tools + full middleware stack."""
-    middleware: Sequence[AgentMiddleware[Any, Any]] = (
-        TodoListMiddleware(),
-        ToolRetryMiddleware(),
-        ModelRetryMiddleware(),
-    )
-    benchmark(
-        lambda: create_agent(
+    """15 tools + full middleware stack."""
+    def run() -> None:
+        middleware: Sequence[AgentMiddleware[Any, Any]] = (
+            TodoListMiddleware(),
+            ToolRetryMiddleware(),
+            ModelRetryMiddleware(),
+        )
+        create_agent(
             model=_make_model(),
             tools=LARGE_TOOLS,
             middleware=middleware,
         )
-    )
+
+    benchmark(run)
 
 
 @pytest.mark.benchmark
 def test_tool_call_schema_repeated_access(benchmark: BenchmarkFixture) -> None:
-    """Measure cost of repeated .tool_call_schema access on a complex tool."""
+    """Measure cost of repeated .tool_call_schema access on a complex tool (10 accesses per iteration)."""
     t = structured_tool_1
 
     def access_schema_10x() -> None:
         for _ in range(10):
-            _ = t.tool_call_schema
+            t.tool_call_schema
 
     benchmark(access_schema_10x)
 
 
 @pytest.mark.benchmark
 def test_tool_args_repeated_access(benchmark: BenchmarkFixture) -> None:
-    """Measure cost of repeated .args access on a complex tool."""
+    """Measure cost of repeated .args access on a complex tool (10 accesses per iteration)."""
     t = structured_tool_1
 
     def access_args_10x() -> None:
         for _ in range(10):
-            _ = t.args
+            t.args
 
     benchmark(access_args_10x)
 
@@ -272,12 +303,15 @@ def test_tool_args_repeated_access(benchmark: BenchmarkFixture) -> None:
 @pytest.mark.benchmark
 def test_create_agent_instantiation_with_middleware(benchmark: BenchmarkFixture) -> None:
     """Baseline with middleware, no tools."""
-    middleware: Sequence[AgentMiddleware[Any, Any]] = (
-        TodoListMiddleware(),
-        ToolRetryMiddleware(),
-        ModelRetryMiddleware(),
-    )
-    benchmark(lambda: create_agent(model=_make_model(), middleware=middleware))
+    def run() -> None:
+        middleware: Sequence[AgentMiddleware[Any, Any]] = (
+            TodoListMiddleware(),
+            ToolRetryMiddleware(),
+            ModelRetryMiddleware(),
+        )
+        create_agent(model=_make_model(), middleware=middleware)
+
+    benchmark(run)
 
 
 # ---------------------------------------------------------------------------
@@ -285,11 +319,19 @@ def test_create_agent_instantiation_with_middleware(benchmark: BenchmarkFixture)
 # ---------------------------------------------------------------------------
 
 
+@pytest.mark.benchmark
 def test_create_agent_large_tools_memory() -> None:
-    """Record peak memory for large-tools agent creation. Not a perf benchmark."""
+    """Observe peak memory for large-tools agent creation.
+
+    This is not a hard assertion — it records the tracemalloc peak for the
+    memory allocated *during* create_agent. Run before and after optimization
+    passes to track improvement. Update the printed baseline comment below
+    when the number changes significantly.
+    """
     tracemalloc.start()
     create_agent(model=_make_model(), tools=LARGE_TOOLS)
     _, peak = tracemalloc.get_traced_memory()
     tracemalloc.stop()
-    # Soft assertion: 50 MB is a generous ceiling for a single agent instantiation.
-    assert peak < 50 * 1024 * 1024, f"Peak memory {peak / 1024 / 1024:.1f} MB exceeded 50 MB"
+    peak_kb = peak / 1024
+    # Baseline (pre-optimization): ~recorded after first run
+    print(f"\nPeak memory during create_agent (15 tools): {peak_kb:.1f} KB")