Merge pull request #13216 from Apokleos/hotunplug-blk

runtime-rs: Add support for hot-unplugging block devices
This commit is contained in:
Fabiano Fidêncio
2026-06-22 12:14:30 +02:00
committed by GitHub
2 changed files with 253 additions and 39 deletions

View File

@@ -925,10 +925,57 @@ impl QemuInner {
pub(crate) async fn remove_device(&mut self, device: DeviceType) -> Result<()> {
info!(sl!(), "QemuInner::remove_device() {} ", device);
Err(anyhow!(
"QemuInner::remove_device({}): Not yet implemented",
device
))
self.hotunplug_device(&device).await?;
self.devices.retain(|d| match (d, &device) {
(DeviceType::Block(a), DeviceType::Block(b)) => a.config.index != b.config.index,
(DeviceType::BlockModern(a), DeviceType::BlockModern(b)) => !std::sync::Arc::ptr_eq(a, b),
_ => true,
});
Ok(())
}
async fn hotunplug_device(&mut self, device: &DeviceType) -> Result<()> {
let qmp = match self.qmp {
Some(ref mut qmp) => qmp,
None => return Err(anyhow!("QMP not initialized")),
};
match device {
DeviceType::Block(ref block_device) => {
let block_driver = &self.config.blockdev_info.block_device_driver;
qmp.hotunplug_block_device(block_driver, block_device.config.index)
.context("hotunplug block device")?;
}
DeviceType::BlockModern(ref block_device) => {
let (index, driver) = {
let cfg = &block_device.lock().await.config;
(
cfg.index,
self.config.blockdev_info.block_device_driver.clone(),
)
};
qmp.hotunplug_block_device(&driver, index)
.context("hotunplug block device")?;
}
DeviceType::Network(_)
| DeviceType::Vfio(_)
| DeviceType::VfioModern(_)
| DeviceType::VhostUserBlk(_)
| DeviceType::VhostUserNetwork(_)
| DeviceType::ShareFs(_)
| DeviceType::HybridVsock(_)
| DeviceType::Vsock(_)
| DeviceType::Protection(_)
| DeviceType::PortDevice(_) => {
return Err(anyhow!(
"hotunplug for {} is currently unsupported",
device
));
}
}
Ok(())
}
async fn hotplug_device(&mut self, device: DeviceType) -> Result<DeviceType> {

View File

@@ -39,6 +39,8 @@ const DEFAULT_QMP_INIT_READ_TIMEOUT: u64 = 5000;
const DEFAULT_QMP_CONNECT_DEADLINE_MS: u64 = 50000;
const DEFAULT_QMP_RETRY_SLEEP_MS: u64 = 50;
const DEVICE_DELETED_TIMEOUT: Duration = Duration::from_secs(10);
pub struct Qmp {
qmp: qapi::Qmp<qapi::Stream<BufReader<UnixStream>, UnixStream>>,
@@ -637,6 +639,94 @@ impl Qmp {
Err(anyhow!("no target device found"))
}
/// Execute device_add for a block device. On failure, automatically
/// rolls back the blockdev node added earlier to avoid orphaned resources.
fn device_add_with_rollback(
&mut self,
node_name: &str,
bus: Option<String>,
driver: &str,
arguments: Dictionary,
) -> Result<()> {
if let Err(e) = self.qmp.execute(&qmp::device_add {
bus,
id: Some(node_name.to_owned()),
driver: driver.to_owned(),
arguments,
}) {
if let Err(e) = self.qmp.execute(&qapi_qmp::blockdev_del {
node_name: node_name.to_owned(),
}) {
warn!(
sl!(),
"device_add_with_rollback(): blockdev_del failed for {}: {:?}",
node_name,
e
);
}
return Err(anyhow!("device_add {:?}", e));
}
Ok(())
}
fn wait_for_device_deleted(&mut self, device_id: &str, timeout: Duration) -> Result<()> {
const POLL_INTERVAL: Duration = Duration::from_millis(100);
let deadline = Instant::now() + timeout;
self.qmp
.inner_mut()
.get_mut_write()
.set_read_timeout(Some(timeout))?;
let result = loop {
if let Err(e) = self.qmp.nop() {
warn!(
sl!(),
"The QMP nop() failed for {}: {:?}",
device_id,
e
);
}
let found = self.qmp.events().any(|event| {
matches!(event, qapi_qmp::Event::DEVICE_DELETED { ref data, .. }
if data.device.as_deref() == Some(device_id))
});
if found {
info!(
sl!(),
"The QMP received DEVICE_DELETED event for {}",
device_id
);
break Ok(());
}
let now = Instant::now();
if now >= deadline {
break Err(anyhow!(
"timed out ({:?}) waiting for DEVICE_DELETED event for {}",
timeout,
device_id
));
}
thread::sleep(POLL_INTERVAL.min(deadline - now));
};
// Reset the default read timeout for subsequent QMP operations.
// Failure here is non-fatal — a stale timeout only affects the next
// QMP read, not the already-completed device removal.
if let Err(e) = self.qmp.inner_mut().get_mut_write().set_read_timeout(Some(
Duration::from_millis(DEFAULT_QMP_READ_TIMEOUT),
)) {
warn!(
sl!(),
"Failed to reset read timeout: {:?}", e
);
}
result
}
/// Hotplug block device:
/// {
/// "execute": "blockdev-add",
@@ -691,7 +781,7 @@ impl Qmp {
iothread: Option<&str>,
) -> Result<(Option<PciPath>, Option<String>)> {
// `blockdev-add`
let node_name = format!("drive-{index}");
let node_name = block_node_name(index);
let create_base_options = || qapi_qmp::BlockdevOptionsBase {
auto_read_only: None,
@@ -829,15 +919,12 @@ impl Qmp {
"scsi-hd",
blkdev_add_args
);
self.qmp
.execute(&qmp::device_add {
bus: Some("scsi0.0".to_string()),
id: Some(node_name.clone()),
driver: "scsi-hd".to_string(),
arguments: blkdev_add_args,
})
.map_err(|e| anyhow!("device_add {:?}", e))
.map(|_| ())?;
self.device_add_with_rollback(
&node_name,
Some("scsi0.0".to_string()),
"scsi-hd",
blkdev_add_args,
)?;
info!(
sl!(),
@@ -846,13 +933,29 @@ impl Qmp {
Ok((None, Some(scsi_addr)))
} else if block_driver == VIRTIO_BLK_CCW {
let subchannel = self.ccw_subchannel.as_mut().ok_or_else(|| {
anyhow!("CCW subchannel not available for virtio-blk-ccw hotplug")
})?;
let subchannel = match self.ccw_subchannel.as_mut() {
Some(sub) => sub,
None => {
self.qmp.execute(&qapi_qmp::blockdev_del {
node_name: node_name.to_owned(),
})?;
let slot = subchannel
.add_device(&node_name)
.map_err(|e| anyhow!("CCW subchannel add_device failed: {:?}", e))?;
return Err(anyhow!(
"CCW subchannel not available for virtio-blk-ccw hotplug"
));
}
};
let slot = match subchannel.add_device(&node_name) {
Ok(s) => s,
Err(e) => {
self.qmp.execute(&qapi_qmp::blockdev_del {
node_name: node_name.to_owned(),
})?;
return Err(anyhow!("CCW subchannel add_device failed: {:?}", e));
}
};
let devno = subchannel.address_format_ccw(slot);
let ccw_addr = subchannel.address_format_ccw_for_virt_server(slot);
@@ -869,16 +972,17 @@ impl Qmp {
blkdev_add_args,
ccw_addr
);
let device_add_result = self.qmp.execute(&qmp::device_add {
bus: None,
id: Some(node_name.clone()),
driver: block_driver.to_string(),
arguments: blkdev_add_args,
});
if let Err(e) = device_add_result {
// Roll back CCW subchannel state if QMP device_add fails
let _ = subchannel.remove_device(&node_name);
return Err(anyhow!("device_add {:?}", e));
if let Err(e) = self.device_add_with_rollback(
&node_name,
None,
block_driver,
blkdev_add_args,
) {
if let Some(ref mut sub) = self.ccw_subchannel {
// Roll back CCW subchannel state if QMP device_add fails
let _ = sub.remove_device(&node_name);
}
return Err(e);
}
info!(
@@ -911,15 +1015,13 @@ impl Qmp {
block_driver,
blkdev_add_args
);
self.qmp
.execute(&qmp::device_add {
bus: Some(bus),
id: Some(node_name.clone()),
driver: block_driver.to_string(),
arguments: blkdev_add_args,
})
.map_err(|e| anyhow!("device_add {:?}", e))
.map(|_| ())?;
self.device_add_with_rollback(
&node_name,
Some(bus),
block_driver,
blkdev_add_args,
)?;
let pci_path = self
.get_device_by_qdev_id(&node_name)
@@ -933,6 +1035,66 @@ impl Qmp {
}
}
/// Hotunplug block device.
pub fn hotunplug_block_device(
&mut self,
block_driver: &str,
index: u64,
) -> Result<()> {
let node_name = block_node_name(index);
let result = (|| -> Result<()> {
// Remove the frontend device (virtio-blk-pci / scsi-hd / virtio-blk-ccw).
self.qmp
.execute(&qmp::device_del {
id: node_name.clone(),
})
.map_err(|e| anyhow!("device_del for block device {}: {:?}", node_name, e))?;
// device_del is asynchronous — wait for the guest to acknowledge removal
// before tearing down the backend, otherwise blockdev_del may fail with
// "Node is still in use".
self.wait_for_device_deleted(&node_name, DEVICE_DELETED_TIMEOUT)
.context("hotunplug_block_device(): waiting for DEVICE_DELETED")?;
// Remove the blockdev backend node.
self.qmp
.execute(&qapi_qmp::blockdev_del {
node_name: node_name.clone(),
})
.map_err(|e| {
anyhow!("blockdev_del for block device {}: {:?}", node_name, e)
})?;
Ok(())
})();
if let Err(ref e) = result {
warn!(
sl!(),
"hotunplug_block_device(): failed for {}, cleaning up CCW state: {:?}",
node_name,
e
);
}
// Clean up CCW subchannel state (s390x) on all paths.
if block_driver == VIRTIO_BLK_CCW {
if let Some(ref mut subchannel) = self.ccw_subchannel {
let _ = subchannel.remove_device(&node_name);
}
}
result?;
info!(
sl!(),
"hotunplug_block_device(): successfully removed {}", node_name
);
Ok(())
}
pub fn hotplug_vfio_device(
&mut self,
hostdev_id: &str,
@@ -1128,3 +1290,8 @@ pub fn get_qmp_socket_path(sid: &str) -> String {
QMP_SOCKET_FILE.to_string()
}
}
/// Generate a blockdev node name based on the given index.
fn block_node_name(index: u64) -> String {
format!("drive-{index}")
}