From dfaa3be85a61b2809a0e78c645754c38326a00d8 Mon Sep 17 00:00:00 2001 From: Alex Lyn Date: Fri, 1 Aug 2025 16:44:26 +0800 Subject: [PATCH] runtime-rs: Add idempotency to hotplug block device operations Due to the lack of atomicity in the operation, a partial failure can lead to an inconsistent QEMU state, which pollutes subsequent operations. This can easily trigger a "Duplicate nodes" error. To prevent this, we should query the state before performing the operation. ee should ensure its validation and idempotency when making the function idempotent allows it to be safely retried. Fixes #11649 Signed-off-by: Alex Lyn --- .../crates/hypervisor/src/qemu/qmp.rs | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs b/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs index 8683d61428..4952629641 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs @@ -553,6 +553,55 @@ impl Qmp { // `blockdev-add` let node_name = format!("drive-{index}"); + // Pre-check block drive and block device with qapi + { + let node_exists = self + .qmp + .execute(&qapi_qmp::query_named_block_nodes { flat: Some(true) })? + .into_iter() + .any(|d| d.node_name == Some(node_name.clone())); + let device_exists = self + .qmp + .execute(&qapi_qmp::query_block {})? + .into_iter() + .any(|d| match d.inserted { + Some(node) => node.node_name == Some(node_name.clone()), + None => false, + }); + + if node_exists && device_exists { + if block_driver == VIRTIO_SCSI { + // Safely convert the u64 index to u16, ensuring it does not exceed `u16::MAX` (65535). + let (scsi_id, lun) = get_scsi_id_lun(u16::try_from(index)?)?; + let scsi_addr = format!("{}:{}", scsi_id, lun); + + return Ok((None, Some(scsi_addr))); + } else { + let pci_path = self + .get_device_by_qdev_id(&node_name) + .context("get device by qdev_id failed")?; + info!( + sl!(), + "hotplug block device return pci path: {:?}", &pci_path + ); + + return Ok((Some(pci_path), None)); + } + } + + if node_exists && !device_exists { + warn!( + sl!(), + "Found orphaned backend node {:?}, do cleanup before retry.", &node_name + ); + self.qmp + .execute(&qapi_qmp::blockdev_del { + node_name: node_name.clone(), + }) + .ok(); + } + } + let create_base_options = || qapi_qmp::BlockdevOptionsBase { auto_read_only: None, cache: if is_direct.is_none() {