mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-05-04 12:31:27 +00:00
runtime-rs: Wire VFIO cold-plug into QEMU inner
Implement add_device() and remove_device() support for DeviceType::VfioModern and DeviceType::BlockModern in the QEMU inner hypervisor layer. For cold-plug (before VM boot): VfioDeviceConfig/VfioDeviceGroup structs are constructed from the device's resolved PCI address, IOMMU group, and bus assignment, then appended to the QEMU command line via cmdline_generator. Block devices use VirtioBlkDevice with the modern config's sector size fields and are always cold-plugged onto the command line. Signed-off-by: Alex Lyn <alex.lyn@antgroup.com> Signed-off-by: Fabiano Fidêncio <ffidencio@nvidia.com>
This commit is contained in:
committed by
Fabiano Fidêncio
parent
884df2e36f
commit
37ec1b13f3
@@ -5,7 +5,10 @@
|
||||
|
||||
use super::cmdline_generator::{get_network_device, QemuCmdLine};
|
||||
use super::qmp::Qmp;
|
||||
use crate::device::driver::BlockDeviceFormat;
|
||||
use crate::device::pci_path::PciPath;
|
||||
use crate::device::topology::PCIePort;
|
||||
use crate::qemu::cmdline_generator::VfioDeviceConfig;
|
||||
use crate::qemu::qmp::get_qmp_socket_path;
|
||||
use crate::{
|
||||
device::driver::ProtectionDeviceConfig, hypervisor_persist::HypervisorState, selinux,
|
||||
@@ -32,7 +35,7 @@ use nix::unistd::{setgid, setuid, Gid, Uid};
|
||||
use persist::sandbox_persist::Persist;
|
||||
use qapi_qmp::MigrationStatus;
|
||||
use std::cmp::Ordering;
|
||||
use std::convert::TryInto;
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::path::Path;
|
||||
use std::process::Stdio;
|
||||
use std::time::Duration;
|
||||
@@ -190,24 +193,62 @@ impl QemuInner {
|
||||
},
|
||||
DeviceType::PortDevice(port_device) => {
|
||||
let port_type = port_device.config.port_type;
|
||||
let mem_reserve = port_device.config.memsz_reserve;
|
||||
let pref64_reserve = port_device.config.pref64_reserve;
|
||||
let devices_per_port = port_device.port_devices.clone();
|
||||
|
||||
match port_type {
|
||||
PCIePort::RootPort => cmdline.add_pcie_root_ports(
|
||||
devices_per_port,
|
||||
mem_reserve,
|
||||
pref64_reserve,
|
||||
)?,
|
||||
PCIePort::SwitchPort => cmdline.add_pcie_switch_ports(
|
||||
devices_per_port,
|
||||
mem_reserve,
|
||||
pref64_reserve,
|
||||
)?,
|
||||
PCIePort::RootPort => cmdline.add_pcie_root_ports(devices_per_port)?,
|
||||
PCIePort::SwitchPort => {
|
||||
cmdline.add_pcie_switch_ports(devices_per_port)?
|
||||
}
|
||||
_ => info!(sl!(), "no need to add {} ports", port_type),
|
||||
}
|
||||
}
|
||||
DeviceType::VfioModern(vfio_dev) => {
|
||||
// To avoid holding the lock for too long, we first snapshot the necessary VFIO parameters,
|
||||
// then release the lock before doing the coldplug via cmdline,
|
||||
// and finally re-acquire the lock to update the guest PCI path after coldplug.
|
||||
let (devices, bus_port_id) = {
|
||||
let vfio_device = vfio_dev.lock().await;
|
||||
let devices = vfio_device
|
||||
.device
|
||||
.iommu_group
|
||||
.as_ref()
|
||||
.map(|g| g.clone().devices)
|
||||
.unwrap_or_default();
|
||||
|
||||
(devices, vfio_device.config.bus_port_id.clone())
|
||||
};
|
||||
|
||||
// Cold plug devices
|
||||
for dev in devices.iter() {
|
||||
let host_bdf = dev.addr.to_string();
|
||||
|
||||
let vfio_cfg = VfioDeviceConfig::new(
|
||||
host_bdf,
|
||||
bus_port_id.1 as u16,
|
||||
bus_port_id.1 + 1,
|
||||
)
|
||||
.with_vfio_bus(bus_port_id.0.clone());
|
||||
|
||||
cmdline.add_pcie_vfio_device(vfio_cfg)?;
|
||||
}
|
||||
|
||||
// Write back with lock
|
||||
let pci_path = PciPath::try_from(format!("{:02x}/00", bus_port_id.1).as_str())?;
|
||||
|
||||
{
|
||||
let mut vfio_device = vfio_dev.lock().await;
|
||||
// Update the guest PCI path for the VFIO device after coldplug,
|
||||
// which will be used for device mapping into from Guest to Container Environment.
|
||||
vfio_device.config.guest_pci_path = Some(pci_path.clone());
|
||||
}
|
||||
|
||||
info!(
|
||||
sl!(),
|
||||
"Completed VFIOModern coldplug with returned guest pci path: {:?}",
|
||||
pci_path
|
||||
);
|
||||
}
|
||||
_ => info!(sl!(), "qemu cmdline: unsupported device: {:?}", device),
|
||||
}
|
||||
}
|
||||
@@ -818,7 +859,7 @@ impl QemuInner {
|
||||
let is_qemu_ready_to_hotplug = self.qmp.is_some();
|
||||
if is_qemu_ready_to_hotplug {
|
||||
// hypervisor is running already
|
||||
device = self.hotplug_device(device)?;
|
||||
device = self.hotplug_device(device).await?;
|
||||
} else {
|
||||
// store the device to coldplug it later, on hypervisor launch
|
||||
self.devices.push(device.clone());
|
||||
@@ -834,7 +875,7 @@ impl QemuInner {
|
||||
))
|
||||
}
|
||||
|
||||
fn hotplug_device(&mut self, device: DeviceType) -> Result<DeviceType> {
|
||||
async fn hotplug_device(&mut self, device: DeviceType) -> Result<DeviceType> {
|
||||
let qmp = match self.qmp {
|
||||
Some(ref mut qmp) => qmp,
|
||||
None => return Err(anyhow!("QMP not initialized")),
|
||||
@@ -905,7 +946,129 @@ impl QemuInner {
|
||||
|
||||
return Ok(DeviceType::Vfio(vfiodev));
|
||||
}
|
||||
_ => info!(sl!(), "hotplugging of {:#?} is unsupported", device),
|
||||
DeviceType::BlockModern(ref block_device) => {
|
||||
info!(sl!(), "Starting QMP hotplug for BlockModern device");
|
||||
|
||||
// First, snapshot parameters within the lock.
|
||||
// Do not hold the lock across the 'await' point of the hotplug operation to avoid blocking.
|
||||
let (
|
||||
index,
|
||||
path_on_host,
|
||||
aio,
|
||||
is_direct,
|
||||
is_readonly,
|
||||
no_drop,
|
||||
driver,
|
||||
logical_sector_size,
|
||||
physical_sector_size,
|
||||
) = {
|
||||
let cfg = &block_device.lock().await.config;
|
||||
(
|
||||
cfg.index,
|
||||
cfg.path_on_host.clone(),
|
||||
cfg.blkdev_aio.to_string(),
|
||||
Some(cfg.is_direct.unwrap_or(
|
||||
self.config.blockdev_info.block_device_cache_direct,
|
||||
)),
|
||||
cfg.is_readonly,
|
||||
cfg.no_drop,
|
||||
self.config.blockdev_info.block_device_driver.clone(),
|
||||
cfg.logical_sector_size,
|
||||
cfg.physical_sector_size,
|
||||
)
|
||||
};
|
||||
|
||||
// Second, execute the asynchronous hotplug without holding the lock.
|
||||
let (pci_path, scsi_addr) = qmp
|
||||
.hotplug_block_device(
|
||||
&driver,
|
||||
index,
|
||||
&path_on_host,
|
||||
&aio,
|
||||
is_direct,
|
||||
is_readonly,
|
||||
no_drop,
|
||||
logical_sector_size,
|
||||
physical_sector_size,
|
||||
&BlockDeviceFormat::default(),
|
||||
)
|
||||
.context("hotplug block device")?;
|
||||
|
||||
// Third, re-acquire the lock to write back results.
|
||||
{
|
||||
let mut dev = block_device.lock().await;
|
||||
let cfg = &mut dev.config;
|
||||
if let Some(p) = pci_path {
|
||||
cfg.pci_path = Some(p);
|
||||
}
|
||||
if let Some(s) = scsi_addr {
|
||||
cfg.scsi_addr = Some(s);
|
||||
}
|
||||
info!(sl!(), "Completed BlockModern hotplug: {:?}", &cfg);
|
||||
}
|
||||
}
|
||||
DeviceType::VfioModern(ref vfiodev) => {
|
||||
// Snapshot VFIO parameters inside the lock.
|
||||
let (hostdev_id, sysfs_path, address, driver_type, bus) = {
|
||||
let vfio_device = vfiodev.lock().await;
|
||||
let hostdev_id = vfio_device.device_id.clone();
|
||||
let device = &vfio_device.device;
|
||||
|
||||
// FIXME: The first device in the group might not be the actual device intended for passthrough.
|
||||
// Multi-function support is tracked via issue #11292.
|
||||
let primary_device = device
|
||||
.clone()
|
||||
.iommu_group
|
||||
.ok_or_else(|| anyhow!("IOMMU group missing for VFIO device"))?
|
||||
.primary;
|
||||
|
||||
info!(
|
||||
sl!(),
|
||||
"QMP hotplug VFIO primary_device address: {:?}", &primary_device.addr
|
||||
);
|
||||
|
||||
let sysfs_path = primary_device.sysfs_path.display().to_string();
|
||||
let driver_type = primary_device
|
||||
.driver
|
||||
.clone()
|
||||
.ok_or_else(|| anyhow!("Driver type missing for primary device"))?;
|
||||
let address = format!("{}", primary_device.addr);
|
||||
|
||||
(
|
||||
hostdev_id,
|
||||
sysfs_path,
|
||||
address,
|
||||
driver_type,
|
||||
vfio_device.config.bus_port_id.0.clone(),
|
||||
)
|
||||
};
|
||||
|
||||
// Execute hotplug outside the lock.
|
||||
let guest_pci_path = qmp.hotplug_vfio_device(
|
||||
&hostdev_id,
|
||||
&sysfs_path,
|
||||
&address,
|
||||
&driver_type,
|
||||
&bus,
|
||||
)?;
|
||||
|
||||
// Write the resulting Guest PCI Path back within the lock.
|
||||
{
|
||||
let mut vfio_device = vfiodev.lock().await;
|
||||
if let Some(p) = guest_pci_path {
|
||||
// Very important to write back the guest pci path for VFIO devices.
|
||||
vfio_device.config.guest_pci_path = Some(p);
|
||||
}
|
||||
info!(
|
||||
sl!(),
|
||||
"Completed VFIOModern hotplug for device ID: {}", hostdev_id
|
||||
);
|
||||
}
|
||||
}
|
||||
_ => info!(
|
||||
sl!(),
|
||||
"Hotplugging for {:#?} is currently unsupported", device
|
||||
),
|
||||
}
|
||||
Ok(device)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user