From 72a38457f00ece16725c7308729654bfd5b39bb1 Mon Sep 17 00:00:00 2001 From: Fupan Li Date: Thu, 3 Jul 2025 11:54:26 +0800 Subject: [PATCH 1/2] dragonball: make the resize_vcpu api support sync Let dragonball's resize_vcpu api support synchronization, and only return after the hot-plug of the CPU is successfully executed in the guest kernel. This ensures that the subsequent device hot-plug operation can also proceed smoothly. Signed-off-by: Fupan Li --- src/dragonball/src/api/v1/vmm_action.rs | 13 ++++++++----- .../src/device_manager/vfio_dev_mgr/mod.rs | 2 +- src/dragonball/src/vcpu/vcpu_manager.rs | 8 +++++--- src/dragonball/src/vm/mod.rs | 2 +- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index e687bc1dfc..a0a070dd37 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -9,8 +9,9 @@ use std::fs::File; use std::sync::{Arc, Mutex}; -use crossbeam_channel::{unbounded, Receiver, Sender, TryRecvError}; +use crossbeam_channel::{Receiver, Sender, TryRecvError}; use log::{debug, error, info, warn}; +use std::sync::mpsc; use tracing::instrument; use crate::error::{Result, StartMicroVmError, StopMicrovmError}; @@ -284,7 +285,7 @@ pub enum VmmData { /// Return vfio device's slot number in guest. VfioDeviceData(Option), /// Sync Hotplug - SyncHotplug((Sender>, Receiver>)), + SyncHotplug((mpsc::Sender>, mpsc::Receiver>)), } /// Request data type used to communicate between the API and the VMM. @@ -900,7 +901,7 @@ impl VmmService { } })?; - let (sender, receiver) = unbounded(); + let (sender, receiver) = mpsc::channel(); // It is safe because we don't expect poison lock. let vfio_manager = vm.device_manager.vfio_manager.lock().unwrap(); @@ -965,15 +966,17 @@ impl VmmService { )); } + let (sender, revceiver) = mpsc::channel(); + #[cfg(feature = "dbs-upcall")] - vm.resize_vcpu(config, None).map_err(|e| { + vm.resize_vcpu(config, Some(sender.clone())).map_err(|e| { if let VcpuResizeError::UpcallServerNotReady = e { return VmmActionError::UpcallServerNotReady; } VmmActionError::ResizeVcpu(e) })?; - Ok(VmmData::Empty) + Ok(VmmData::SyncHotplug((sender, revceiver))) } #[cfg(feature = "virtio-mem")] diff --git a/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs b/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs index 893788c401..c3318ebb9a 100644 --- a/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs +++ b/src/dragonball/src/device_manager/vfio_dev_mgr/mod.rs @@ -16,9 +16,9 @@ use std::collections::HashMap; use std::ops::Deref; use std::os::fd::RawFd; use std::path::Path; +use std::sync::mpsc::Sender; use std::sync::{Arc, Weak}; -use crossbeam_channel::Sender; use dbs_device::resources::Resource::LegacyIrq; use dbs_device::resources::{DeviceResources, Resource, ResourceConstraint}; use dbs_device::DeviceIo; diff --git a/src/dragonball/src/vcpu/vcpu_manager.rs b/src/dragonball/src/vcpu/vcpu_manager.rs index 2d6f915981..4072d413ee 100644 --- a/src/dragonball/src/vcpu/vcpu_manager.rs +++ b/src/dragonball/src/vcpu/vcpu_manager.rs @@ -225,7 +225,7 @@ pub struct VcpuManager { vm_as: GuestAddressSpaceImpl, pub(crate) vm_fd: Arc, - action_sycn_tx: Option>, + action_sycn_tx: Option>>, vcpus_in_action: (VcpuAction, Vec), pub(crate) reset_event_fd: Option, @@ -756,7 +756,9 @@ impl VcpuManager { fn sync_action_finish(&mut self, got_error: bool) { if let Some(tx) = self.action_sycn_tx.take() { - if let Err(e) = tx.send(got_error) { + let result = if got_error { 0 } else { -1 }; + + if let Err(e) = tx.send(Some(result)) { debug!("cpu sync action send to closed channel {}", e); } } @@ -856,7 +858,7 @@ mod hotplug { pub fn resize_vcpu( &mut self, vcpu_count: u8, - sync_tx: Option>, + sync_tx: Option>>, ) -> std::result::Result<(), VcpuResizeError> { if self.get_vcpus_action() != VcpuAction::None { return Err(VcpuResizeError::VcpuIsHotplugging); diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index 37e9f408fb..7de8d52fb3 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -832,7 +832,7 @@ impl Vm { pub fn resize_vcpu( &mut self, config: VcpuResizeInfo, - sync_tx: Option>, + sync_tx: Option>>, ) -> std::result::Result<(), VcpuResizeError> { if self.upcall_client().is_none() { Err(VcpuResizeError::UpcallClientMissing) From fb1c35335a1ad0ed66094f4a5125df04a719eec1 Mon Sep 17 00:00:00 2001 From: Fupan Li Date: Thu, 3 Jul 2025 11:59:11 +0800 Subject: [PATCH 2/2] runtime-rs: make the resize_vcpu sync When hot plugging vcpu in dragonball hypervisor, use the synchronization interface and wait until the hot plug cpu is executed in the guest before returning. This ensures that the subsequent device hot plug will not conflict with the previous call. Signed-off-by: Fupan Li --- .../crates/hypervisor/src/dragonball/inner.rs | 10 ++++++++-- .../hypervisor/src/dragonball/vmm_instance.rs | 13 +++++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs index d22b33bfd1..bc71ee3a43 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner.rs @@ -28,7 +28,7 @@ use kata_types::{ }; use nix::mount::MsFlags; use persist::sandbox_persist::Persist; -use std::cmp::Ordering; +use std::{cmp::Ordering, time::Duration}; use std::{collections::HashSet, fs::create_dir_all}; use tokio::sync::mpsc; @@ -37,6 +37,9 @@ const DRAGONBALL_INITRD: &str = "initrd"; const DRAGONBALL_ROOT_FS: &str = "rootfs"; const BALLOON_DEVICE_ID: &str = "balloon0"; const MEM_DEVICE_ID: &str = "memmr0"; +/// default hotplug timeout +const DEFAULT_HOTPLUG_TIMEOUT: u64 = 250; + #[derive(Debug)] pub struct DragonballInner { /// sandbox id @@ -391,7 +394,10 @@ impl DragonballInner { vcpu_count: Some(new_vcpus as u8), }; self.vmm_instance - .resize_vcpu(&cpu_resize_info) + .resize_vcpu( + &cpu_resize_info, + Some(Duration::from_millis(DEFAULT_HOTPLUG_TIMEOUT)), + ) .context(format!( "failed to do_resize_vcpus on new_vcpus={:?}", new_vcpus diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs index 4108c8896d..3a73f80e3b 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/vmm_instance.rs @@ -9,6 +9,7 @@ use std::{ os::unix::{io::IntoRawFd, prelude::AsRawFd}, sync::{Arc, Mutex, RwLock}, thread, + time::Duration, }; use anyhow::{anyhow, Context, Result}; @@ -292,9 +293,17 @@ impl VmmInstance { Ok(()) } - pub fn resize_vcpu(&self, cfg: &VcpuResizeInfo) -> Result<()> { - self.handle_request(Request::Sync(VmmAction::ResizeVcpu(cfg.clone()))) + pub fn resize_vcpu(&self, cfg: &VcpuResizeInfo, timeout: Option) -> Result<()> { + let vmmdata = self + .handle_request(Request::Sync(VmmAction::ResizeVcpu(cfg.clone()))) .with_context(|| format!("Failed to resize_vm(hotplug vcpu), cfg: {:?}", cfg))?; + + if let Some(timeout) = timeout { + if let VmmData::SyncHotplug((_, receiver)) = vmmdata { + let _ = receiver.recv_timeout(timeout)?; + } + } + Ok(()) }