From 281b6aa61a8d7c6be3880d9ecbbbd72f7f00e2c1 Mon Sep 17 00:00:00 2001 From: Alex Lyn Date: Sun, 14 Jun 2026 00:50:14 +0800 Subject: [PATCH] runtime-rs: Add hotunplug_block_device for block device hot removal Implement QMP-level block device hot-unplug by issuing device_del to remove the frontend device and blockdev_del to remove the backend blockdev node. For virtio-blk-ccw on s390x, the CCW subchannel slot is also released. Since QMP device_del is asynchronous and only initiates the removal request, introduce wait_for_device_deleted() to poll for the DEVICE_DELETED event before tearing down the backend. This prevents blockdev_del from failing with "Node is still in use". If blockdev_del fails, the error is logged but CCW cleanup still proceeds before the error is propagated, ensuring consistent subchannel state. Signed-off-by: Alex Lyn --- .../crates/hypervisor/src/qemu/qmp.rs | 121 ++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs b/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs index 456c73c0b1..8595d630d6 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs @@ -39,6 +39,8 @@ const DEFAULT_QMP_INIT_READ_TIMEOUT: u64 = 5000; const DEFAULT_QMP_CONNECT_DEADLINE_MS: u64 = 50000; const DEFAULT_QMP_RETRY_SLEEP_MS: u64 = 50; +const DEVICE_DELETED_TIMEOUT: Duration = Duration::from_secs(10); + pub struct Qmp { qmp: qapi::Qmp, UnixStream>>, @@ -667,6 +669,64 @@ impl Qmp { Ok(()) } + fn wait_for_device_deleted(&mut self, device_id: &str, timeout: Duration) -> Result<()> { + const POLL_INTERVAL: Duration = Duration::from_millis(100); + let deadline = Instant::now() + timeout; + + self.qmp + .inner_mut() + .get_mut_write() + .set_read_timeout(Some(timeout))?; + + let result = loop { + if let Err(e) = self.qmp.nop() { + warn!( + sl!(), + "The QMP nop() failed for {}: {:?}", + device_id, + e + ); + } + + let found = self.qmp.events().any(|event| { + matches!(event, qapi_qmp::Event::DEVICE_DELETED { ref data, .. } + if data.device.as_deref() == Some(device_id)) + }); + if found { + info!( + sl!(), + "The QMP received DEVICE_DELETED event for {}", + device_id + ); + break Ok(()); + } + + let now = Instant::now(); + if now >= deadline { + break Err(anyhow!( + "timed out ({:?}) waiting for DEVICE_DELETED event for {}", + timeout, + device_id + )); + } + thread::sleep(POLL_INTERVAL.min(deadline - now)); + }; + + // Reset the default read timeout for subsequent QMP operations. + // Failure here is non-fatal — a stale timeout only affects the next + // QMP read, not the already-completed device removal. + if let Err(e) = self.qmp.inner_mut().get_mut_write().set_read_timeout(Some( + Duration::from_millis(DEFAULT_QMP_READ_TIMEOUT), + )) { + warn!( + sl!(), + "Failed to reset read timeout: {:?}", e + ); + } + + result + } + /// Hotplug block device: /// { /// "execute": "blockdev-add", @@ -975,6 +1035,67 @@ impl Qmp { } } + /// Hotunplug block device. + #[allow(dead_code)] + pub fn hotunplug_block_device( + &mut self, + block_driver: &str, + index: u64, + ) -> Result<()> { + let node_name = block_node_name(index); + + let result = (|| -> Result<()> { + // Remove the frontend device (virtio-blk-pci / scsi-hd / virtio-blk-ccw). + self.qmp + .execute(&qmp::device_del { + id: node_name.clone(), + }) + .map_err(|e| anyhow!("device_del for block device {}: {:?}", node_name, e))?; + + // device_del is asynchronous — wait for the guest to acknowledge removal + // before tearing down the backend, otherwise blockdev_del may fail with + // "Node is still in use". + self.wait_for_device_deleted(&node_name, DEVICE_DELETED_TIMEOUT) + .context("hotunplug_block_device(): waiting for DEVICE_DELETED")?; + + // Remove the blockdev backend node. + self.qmp + .execute(&qapi_qmp::blockdev_del { + node_name: node_name.clone(), + }) + .map_err(|e| { + anyhow!("blockdev_del for block device {}: {:?}", node_name, e) + })?; + + Ok(()) + })(); + + if let Err(ref e) = result { + warn!( + sl!(), + "hotunplug_block_device(): failed for {}, cleaning up CCW state: {:?}", + node_name, + e + ); + } + + // Clean up CCW subchannel state (s390x) on all paths. + if block_driver == VIRTIO_BLK_CCW { + if let Some(ref mut subchannel) = self.ccw_subchannel { + let _ = subchannel.remove_device(&node_name); + } + } + + result?; + + info!( + sl!(), + "hotunplug_block_device(): successfully removed {}", node_name + ); + + Ok(()) + } + pub fn hotplug_vfio_device( &mut self, hostdev_id: &str,