From 96ff12fdabc7c4246776c065aacf6ff04aec0882 Mon Sep 17 00:00:00 2001 From: Anastassios Nanos Date: Thu, 2 Apr 2026 05:20:15 +0000 Subject: [PATCH] runtime-rs: Split FC spawn sequence To account for network hooks and the lack of device hotplugging on FC, we split the spawn process into start and boot. This way, the VMM is already up, but the VM sandbox is not -- we can add devices and then we can kick-off the VM with InstanceStart. Signed-off-by: Anastassios Nanos --- .../src/firecracker/inner_hypervisor.rs | 21 ++++++++--- .../crates/hypervisor/src/firecracker/mod.rs | 5 +++ src/runtime-rs/crates/hypervisor/src/lib.rs | 17 +++++++++ .../runtimes/virt_container/src/sandbox.rs | 37 ++++++++++--------- 4 files changed, 57 insertions(+), 23 deletions(-) diff --git a/src/runtime-rs/crates/hypervisor/src/firecracker/inner_hypervisor.rs b/src/runtime-rs/crates/hypervisor/src/firecracker/inner_hypervisor.rs index aff7152005..e7b4baf237 100644 --- a/src/runtime-rs/crates/hypervisor/src/firecracker/inner_hypervisor.rs +++ b/src/runtime-rs/crates/hypervisor/src/firecracker/inner_hypervisor.rs @@ -76,16 +76,27 @@ impl FcInner { } pub(crate) async fn start_vm(&mut self, _timeout: i32) -> Result<()> { - debug!(sl(), "Starting sandbox"); + // For Firecracker, the VMM process was already started in prepare_vm. + // Network interfaces must be configured before InstanceStart, but + // OCI hooks (which create the container veth via CNI) have not run + // yet. Defer the network flush and InstanceStart to boot_vm(), which + // sandbox.rs calls after the hooks + network rescan. + debug!(sl(), "FC start_vm: VMM already running; deferring InstanceStart to boot_vm"); + Ok(()) + } - // Flush all buffered network devices before sending InstanceStart. - // FC rejects PUT /network-interfaces once the VM is running, so network - // interfaces must be configured here, immediately before the start action. + pub(crate) async fn boot_vm(&mut self) -> Result<()> { + debug!(sl(), "FC boot_vm: flushing network devices and sending InstanceStart"); + + // Flush all buffered network devices. These were populated by + // add_device(Network) after the OCI hooks ran and the netns was + // rescanned by sandbox.rs. FC rejects PUT /network-interfaces once + // the VM is running, so this must happen before InstanceStart. let net_devices = std::mem::take(&mut self.pending_net_devices); for (config, device_id) in net_devices { self.add_net_device(&config, device_id) .await - .context("configure network interface before start")?; + .context("configure network interface before InstanceStart")?; } let body: String = serde_json::json!({ diff --git a/src/runtime-rs/crates/hypervisor/src/firecracker/mod.rs b/src/runtime-rs/crates/hypervisor/src/firecracker/mod.rs index 05fd0c57cb..b2a73444a8 100644 --- a/src/runtime-rs/crates/hypervisor/src/firecracker/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/firecracker/mod.rs @@ -75,6 +75,11 @@ impl Hypervisor for Firecracker { inner.start_vm(timeout).await } + async fn boot_vm(&self) -> Result<()> { + let mut inner = self.inner.write().await; + inner.boot_vm().await + } + async fn stop_vm(&self) -> Result<()> { let mut inner = self.inner.write().await; inner.stop_vm().await diff --git a/src/runtime-rs/crates/hypervisor/src/lib.rs b/src/runtime-rs/crates/hypervisor/src/lib.rs index c1bd48f590..357b2efaa6 100644 --- a/src/runtime-rs/crates/hypervisor/src/lib.rs +++ b/src/runtime-rs/crates/hypervisor/src/lib.rs @@ -106,6 +106,23 @@ pub trait Hypervisor: std::fmt::Debug + Send + Sync { selinux_label: Option, ) -> Result<()>; async fn start_vm(&self, timeout: i32) -> Result<()>; + + /// Finalize VM boot after OCI hooks and network setup have run. + /// + /// For hypervisors that require all devices (including network) to be + /// registered before the guest boots (e.g. Firecracker, which has no + /// hotplug), `start_vm` only starts the VMM process, while `boot_vm` + /// flushes the device queue and issues the actual boot command + /// (InstanceStart for FC). For hypervisors that start the guest + /// immediately in `start_vm` (QEMU, dragonball, cloud-hypervisor), the + /// default no-op implementation is sufficient. + /// + /// sandbox.rs calls this after OCI hooks and the post-hooks network + /// rescan, but before connecting to the kata-agent. + async fn boot_vm(&self) -> Result<()> { + Ok(()) + } + async fn stop_vm(&self) -> Result<()>; async fn wait_vm(&self) -> Result; async fn pause_vm(&self) -> Result<()>; diff --git a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs index 592a033c13..68a181384e 100644 --- a/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs +++ b/src/runtime-rs/crates/runtimes/virt_container/src/sandbox.rs @@ -611,14 +611,10 @@ impl Sandbox for VirtSandbox { .await .context("set up device before start vm")?; + // start vm + self.hypervisor.start_vm(10_000).await.context("start vm")?; + // execute pre-start hook functions, including Prestart Hooks and CreateRuntime Hooks - // - // These must run BEFORE start_vm so that: - // (a) createRuntime hooks (e.g. nerdctl's CNI hook) can create the veth pair - // in the container netns while the VMM process (already started by - // prepare_vm and placed in the netns) is still pre-InstanceStart, and - // (b) hypervisors that do not support network-interface hotplug (e.g. - // Firecracker) can configure the interface before InstanceStart. let (prestart_hooks, create_runtime_hooks) = if let Some(hooks) = sandbox_config.hooks.as_ref() { ( @@ -636,15 +632,12 @@ impl Sandbox for VirtSandbox { ) .await?; - // Rescan the netns and update the network configuration before start_vm: - // 1. When network_created==true the veth is set up by the createRuntime hook - // above; we must scan now so the network device lands in pending_net_devices - // before InstanceStart (required for FC which has no hotplug). - // 2. When there are pre-start hooks the network config may have changed. - // 3. Do not scan if disable_new_netns is set. + // 1. if there are pre-start hook functions, network config might have been changed. + // We need to rescan the netns to handle the change. + // 2. Do not scan the netns if we want no network for the VM. + // TODO In case of vm factory, scan the netns to hotplug interfaces after the VM is started. let config = self.resource_manager.config().await; - if (sandbox_config.network_env.network_created - || self.has_prestart_hooks(&prestart_hooks, &create_runtime_hooks)) + if self.has_prestart_hooks(&prestart_hooks, &create_runtime_hooks) && !config.runtime.disable_new_netns && !dan_config_path(&config, &self.sid).exists() { @@ -663,12 +656,20 @@ impl Sandbox for VirtSandbox { self.resource_manager .handle_network(network_resource) .await - .context("set up network before start vm")?; + .context("set up device after start vm")?; } } - // start vm - self.hypervisor.start_vm(10_000).await.context("start vm")?; + // Give the hypervisor a chance to finalize boot now that OCI hooks and + // the post-hooks network rescan have completed. For hypervisors that + // require all devices (including network) to be registered before the + // guest boots (e.g. Firecracker), start_vm defers the actual boot + // command to this call. For hypervisors that boot the guest in + // start_vm (QEMU, dragonball, cloud-hypervisor), this is a no-op. + self.hypervisor + .boot_vm() + .await + .context("boot vm")?; info!(sl!(), "start vm"); // connect agent