From feeb5d8ecc776d053d9bae3cfde0bfee6ff4a65d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Fri, 22 May 2026 14:48:42 +0200 Subject: [PATCH] runtime-rs: Fix vCPU pinning race with backoff retry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QEMU can report fewer vCPU threads during early startup, causing partial affinity setup. Let's retry with exponential backoff until the expected thread count is visible, then continue with best-effort pinning if the window is exhausted. Signed-off-by: Fabiano FidĂȘncio --- .../resource/src/cgroups/resource_inner.rs | 48 ++++++++++++++++--- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/src/runtime-rs/crates/resource/src/cgroups/resource_inner.rs b/src/runtime-rs/crates/resource/src/cgroups/resource_inner.rs index 7fbcdb2e06..d72dc73efe 100644 --- a/src/runtime-rs/crates/resource/src/cgroups/resource_inner.rs +++ b/src/runtime-rs/crates/resource/src/cgroups/resource_inner.rs @@ -7,6 +7,7 @@ use std::collections::{HashMap, HashSet}; use std::process; use std::str::FromStr; +use std::time::Duration; use anyhow::{anyhow, Context, Result}; use cgroups_rs::manager::is_systemd_cgroup; @@ -16,6 +17,7 @@ use kata_types::cpu::CpuSet; use nix::sched::{sched_setaffinity, CpuSet as NixCpuSet}; use nix::unistd::Pid; use oci_spec::runtime::{LinuxCpu, LinuxCpuBuilder, LinuxResources, LinuxResourcesBuilder}; +use tokio::time::sleep; use crate::cgroups::utils::get_tgid_from_pid; use crate::cgroups::CgroupConfig; @@ -187,12 +189,46 @@ impl CgroupsResourceInner { let needs_thread_ids = self.overhead_cgroup.is_some() || self.enable_vcpus_pinning; let thread_ids = if needs_thread_ids { - Some( - hypervisor - .get_thread_ids() - .await - .context("get vCPU thread IDs")?, - ) + let mut tids = hypervisor + .get_thread_ids() + .await + .context("get vCPU thread IDs")?; + + // QEMU may not have spawned all vCPU threads yet. Retry with + // exponential backoff until we see the expected count. + let expected = hypervisor.hypervisor_config().await.cpu_info.default_vcpus.ceil() as usize; + if expected > 0 && tids.vcpus.len() < expected { + const MAX_ATTEMPTS: u32 = 10; + let mut backoff = Duration::from_millis(50); + for attempt in 2..=MAX_ATTEMPTS { + if tids.vcpus.len() >= expected { + break; + } + info!( + sl!(), + "waiting for all vCPU threads: have {}, want {}, attempt {}", + tids.vcpus.len(), + expected, + attempt + ); + sleep(backoff).await; + backoff *= 2; + tids = hypervisor + .get_thread_ids() + .await + .context("get vCPU thread IDs (retry)")?; + } + if tids.vcpus.len() < expected { + warn!( + sl!(), + "not all vCPU threads available after retries: have {}, want {}; pinning available ones", + tids.vcpus.len(), + expected + ); + } + } + + Some(tids) } else { None };