diff --git a/tools/packaging/kata-deploy/binary/Cargo.toml b/tools/packaging/kata-deploy/binary/Cargo.toml index e68ff24c58..987bdfbb6a 100644 --- a/tools/packaging/kata-deploy/binary/Cargo.toml +++ b/tools/packaging/kata-deploy/binary/Cargo.toml @@ -38,6 +38,7 @@ tokio = { workspace = true, features = [ "rt-multi-thread", "macros", "signal", + "sync", "time", "net", "io-util", diff --git a/tools/packaging/kata-deploy/binary/src/main.rs b/tools/packaging/kata-deploy/binary/src/main.rs index b078a88757..11137d9821 100644 --- a/tools/packaging/kata-deploy/binary/src/main.rs +++ b/tools/packaging/kata-deploy/binary/src/main.rs @@ -28,7 +28,21 @@ enum Action { Reset, } -#[tokio::main] +// Cap the tokio runtime to a small fixed number of worker threads. The default +// multi-thread runtime allocates `num_cpus()` workers (each with a ~2 MiB +// stack), which on a 200+ vCPU GPU node is the dominant contributor to the +// DaemonSet pod's VmData reservation (~440 MiB). Two workers is plenty: +// +// - the install path is overwhelmingly I/O-bound, +// - it shells out to `nsenter ... systemctl restart …` (synchronous, +// blocking calls that wedge the thread they run on for tens of seconds); +// a second worker keeps the health server able to answer kubelet probes +// within timeoutSeconds while the first is blocked. +// +// `current_thread` would be tighter still, but starves the health server the +// moment a host_systemctl call runs — the kubelet then fails the readiness +// probe and the pod is restarted before install can finish. +#[tokio::main(flavor = "multi_thread", worker_threads = 2)] async fn main() -> Result<()> { // Set log level based on DEBUG environment variable // This must be done before initializing the logger