From a2bbbad711e62f86b48f9aec19d25e089417d5a0 Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Sat, 21 Oct 2023 05:37:33 -0700 Subject: [PATCH 1/3] runtime-rs: change hypervisor add_device trait to return device copy Block(virtio-blk) and vfio devices are currently not handled correctly by the agent as the agent is not provided with correct PCI paths for these devices. The PCI paths for these devices can be inferred from the PCI information provided by the hypervisor when the device is added. Hence changing the add_device trait function to return a device copy with PCI info potentially provided by the hypervisor. This can then be provided to the agent to correctly detect devices within the VM. This commit includes implementation for PCI info update for cloud-hupervisor for virtio-blk devices with stubs provided for other hypervisors. Removing Vsock from the DeviceType enum as Vsock currently does not implement the Device Trait, it has no attach and detach trait functions among others. Part of the reason is because these functions require Vsock to implement Clone trait as these functions need cloned copies to be passed down the hypervisor. The change introduced for returning a device copy from the add_device hypervisor trait explicitly requires a device to implement Copy trait. Hence removing Vsock from the DeviceType enum for now, as its implementation is incomplete and not currently used. Note, one of the blockers for adding the Clone trait to Vsock is that it currently includes a file handle which cannot be cloned. For Clone and Device Traits to be implemented for Vsock, it requires an implementation change in the future for it to be cloneable. Fixes: #8283 Signed-off-by: Archana Shinde --- .../crates/hypervisor/ch-config/src/ch_api.rs | 2 +- .../crates/hypervisor/src/ch/inner_device.rs | 97 ++++++++++++------- .../crates/hypervisor/src/ch/mod.rs | 2 +- .../src/device/driver/virtio_blk.rs | 21 +++- .../crates/hypervisor/src/device/mod.rs | 5 +- .../hypervisor/src/dragonball/inner_device.rs | 3 - .../crates/hypervisor/src/dragonball/mod.rs | 7 +- src/runtime-rs/crates/hypervisor/src/lib.rs | 2 +- .../crates/hypervisor/src/qemu/inner.rs | 4 +- .../crates/hypervisor/src/qemu/mod.rs | 2 +- .../resource/src/share_fs/share_virtio_fs.rs | 11 ++- 11 files changed, 103 insertions(+), 53 deletions(-) diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs index 29438204be..d49439b825 100644 --- a/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs @@ -70,7 +70,7 @@ pub async fn cloud_hypervisor_vm_stop(mut socket: UnixStream) -> Result Result<()> { + pub(crate) async fn add_device(&mut self, device: DeviceType) -> Result { if self.state != VmmState::VmRunning { // If the VM is not running, add the device to the pending list to // be handled later. @@ -55,8 +57,8 @@ impl CloudHypervisorInner { // - Network details need to be saved for later application. // match device { - DeviceType::ShareFs(_) => self.pending_devices.insert(0, device), - DeviceType::Network(_) => self.pending_devices.insert(0, device), + DeviceType::ShareFs(_) => self.pending_devices.insert(0, device.clone()), + DeviceType::Network(_) => self.pending_devices.insert(0, device.clone()), _ => { debug!( sl!(), @@ -65,20 +67,18 @@ impl CloudHypervisorInner { } } - return Ok(()); + return Ok(device); } - self.handle_add_device(device).await?; - - Ok(()) + self.handle_add_device(device).await } - async fn handle_add_device(&mut self, device: DeviceType) -> Result<()> { + async fn handle_add_device(&mut self, device: DeviceType) -> Result { match device { - DeviceType::ShareFs(sharefs) => self.handle_share_fs_device(sharefs.config).await, - DeviceType::HybridVsock(hvsock) => self.handle_hvsock_device(&hvsock.config).await, + DeviceType::ShareFs(sharefs) => self.handle_share_fs_device(sharefs).await, + DeviceType::HybridVsock(hvsock) => self.handle_hvsock_device(hvsock).await, DeviceType::Block(block) => self.handle_block_device(block).await, - DeviceType::Vfio(vfiodev) => self.handle_vfio_device(&vfiodev).await, + DeviceType::Vfio(vfiodev) => self.handle_vfio_device(vfiodev).await, _ => Err(anyhow!("unhandled device: {:?}", device)), } } @@ -108,9 +108,13 @@ impl CloudHypervisorInner { } } - async fn handle_share_fs_device(&mut self, cfg: ShareFsDeviceConfig) -> Result<()> { - if cfg.fs_type != VIRTIO_FS { - return Err(anyhow!("cannot handle share fs type: {:?}", cfg.fs_type)); + async fn handle_share_fs_device(&mut self, sharefs: ShareFsDevice) -> Result { + let device: ShareFsDevice = sharefs.clone(); + if device.config.fs_type != VIRTIO_FS { + return Err(anyhow!( + "cannot handle share fs type: {:?}", + device.config.fs_type + )); } let socket = self @@ -119,26 +123,26 @@ impl CloudHypervisorInner { .ok_or("missing socket") .map_err(|e| anyhow!(e))?; - let num_queues: usize = if cfg.queue_num > 0 { - cfg.queue_num as usize + let num_queues: usize = if device.config.queue_num > 0 { + device.config.queue_num as usize } else { DEFAULT_FS_QUEUES }; - let queue_size: u16 = if cfg.queue_num > 0 { - u16::try_from(cfg.queue_size)? + let queue_size: u16 = if device.config.queue_num > 0 { + u16::try_from(device.config.queue_size)? } else { DEFAULT_FS_QUEUE_SIZE }; - let socket_path = if cfg.sock_path.starts_with('/') { - PathBuf::from(cfg.sock_path) + let socket_path = if device.config.sock_path.starts_with('/') { + PathBuf::from(device.config.sock_path) } else { - scoped_join(&self.vm_path, cfg.sock_path)? + scoped_join(&self.vm_path, device.config.sock_path)? }; let fs_config = FsConfig { - tag: cfg.mount_tag, + tag: device.config.mount_tag, socket: socket_path, num_queues, queue_size, @@ -157,14 +161,16 @@ impl CloudHypervisorInner { debug!(sl!(), "fs add response: {:?}", detail); } - Ok(()) + Ok(DeviceType::ShareFs(sharefs)) } - async fn handle_hvsock_device(&mut self, _cfg: &HybridVsockConfig) -> Result<()> { - Ok(()) + async fn handle_hvsock_device(&mut self, device: HybridVsockDevice) -> Result { + Ok(DeviceType::HybridVsock(device)) } - async fn handle_vfio_device(&mut self, device: &VfioDevice) -> Result<()> { + async fn handle_vfio_device(&mut self, device: VfioDevice) -> Result { + let vfio_device: VfioDevice = device.clone(); + // A device with multi-funtions, or a IOMMU group with one more // devices, the Primary device is selected to be passed to VM. // And the the first one is Primary device. @@ -206,7 +212,7 @@ impl CloudHypervisorInner { .insert(device.device_id.clone(), dev_info.id); } - Ok(()) + Ok(DeviceType::Vfio(vfio_device)) } async fn remove_vfio_device(&mut self, device: &VfioDevice) -> Result<()> { @@ -242,7 +248,31 @@ impl CloudHypervisorInner { Ok(()) } - async fn handle_block_device(&mut self, device: BlockDevice) -> Result<()> { + // Various cloud-hypervisor APIs report a PCI address in "BB:DD.F" + // form within the PciDeviceInfo struct. + // eg "0000:00:DD.F" + fn clh_pci_info_to_path(bdf: &str) -> Result { + let tokens: Vec<&str> = bdf.split(':').collect(); + if tokens.len() != 3 || tokens[0] != "0000" || tokens[1] != "00" { + return Err(anyhow!( + "Unexpected PCI address {:?} for clh device add", + bdf + )); + } + + let toks: Vec<&str> = tokens[2].split('.').collect(); + if toks.len() != 2 || toks[1] != "0" || toks[0].len() != 2 { + return Err(anyhow!( + "Unexpected PCI address {:?} for clh device add", + bdf + )); + } + + PciPath::convert_from_string(toks[0]) + } + + async fn handle_block_device(&mut self, device: BlockDevice) -> Result { + let mut block_dev = device.clone(); let socket = self .api_socket .as_ref() @@ -272,9 +302,10 @@ impl CloudHypervisorInner { let dev_info: PciDeviceInfo = serde_json::from_str(detail.as_str()).map_err(|e| anyhow!(e))?; self.device_ids.insert(device.device_id, dev_info.id); + block_dev.config.pci_path = Some(Self::clh_pci_info_to_path(dev_info.bdf.as_str())?); } - Ok(()) + Ok(DeviceType::Block(block_dev)) } pub(crate) async fn get_shared_devices( diff --git a/src/runtime-rs/crates/hypervisor/src/ch/mod.rs b/src/runtime-rs/crates/hypervisor/src/ch/mod.rs index 72373978ce..37f52d11cb 100644 --- a/src/runtime-rs/crates/hypervisor/src/ch/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/ch/mod.rs @@ -79,7 +79,7 @@ impl Hypervisor for CloudHypervisor { inner.save_vm().await } - async fn add_device(&self, device: DeviceType) -> Result<()> { + async fn add_device(&self, device: DeviceType) -> Result { let mut inner = self.inner.write().await; inner.add_device(device).await } diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs index 4f6b44ef4b..1de3ff3897 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_blk.rs @@ -7,6 +7,7 @@ use crate::device::Device; use crate::device::DeviceType; use crate::Hypervisor as hypervisor; +use crate::PciPath; use anyhow::{anyhow, Context, Result}; use async_trait::async_trait; @@ -39,6 +40,9 @@ pub struct BlockConfig { /// device path in guest pub virt_path: String, + /// pci path is the slot at which the drive is attached + pub pci_path: Option, + /// device attach count pub attach_count: u64, @@ -78,11 +82,20 @@ impl Device for BlockDevice { { return Ok(()); } - if let Err(e) = h.add_device(DeviceType::Block(self.clone())).await { - self.decrease_attach_count().await?; - return Err(e); + + match h.add_device(DeviceType::Block(self.clone())).await { + Ok(dev) => { + // Update device info with the one received from device attach + if let DeviceType::Block(blk) = dev { + self.config = blk.config; + } + Ok(()) + } + Err(e) => { + self.decrease_attach_count().await?; + return Err(e); + } } - return Ok(()); } async fn detach(&mut self, h: &dyn hypervisor) -> Result> { diff --git a/src/runtime-rs/crates/hypervisor/src/device/mod.rs b/src/runtime-rs/crates/hypervisor/src/device/mod.rs index 8154920574..59bb7540d8 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/mod.rs @@ -10,7 +10,7 @@ use crate::device::driver::vhost_user_blk::VhostUserBlkDevice; use crate::{ BlockConfig, BlockDevice, HybridVsockConfig, HybridVsockDevice, Hypervisor as hypervisor, NetworkConfig, NetworkDevice, ShareFsDevice, ShareFsDeviceConfig, ShareFsMountConfig, - ShareFsMountDevice, VfioConfig, VfioDevice, VhostUserConfig, VsockConfig, VsockDevice, + ShareFsMountDevice, VfioConfig, VfioDevice, VhostUserConfig, VsockConfig, }; use anyhow::Result; use async_trait::async_trait; @@ -31,7 +31,7 @@ pub enum DeviceConfig { HybridVsockCfg(HybridVsockConfig), } -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum DeviceType { Block(BlockDevice), VhostUserBlk(VhostUserBlkDevice), @@ -40,7 +40,6 @@ pub enum DeviceType { ShareFs(ShareFsDevice), ShareFsMount(ShareFsMountDevice), HybridVsock(HybridVsockDevice), - Vsock(VsockDevice), } impl fmt::Display for DeviceType { diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs index 3e2ea3e860..1904f2e31f 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/inner_device.rs @@ -74,9 +74,6 @@ impl DragonballInner { DeviceType::ShareFsMount(sharefs_mount) => self .add_share_fs_mount(&sharefs_mount.config) .context("add share fs mount"), - DeviceType::Vsock(_) => { - todo!() - } } } diff --git a/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs b/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs index 3b98f38a23..c94de73c87 100644 --- a/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/dragonball/mod.rs @@ -92,9 +92,12 @@ impl Hypervisor for Dragonball { inner.resize_vcpu(old_vcpus, new_vcpus).await } - async fn add_device(&self, device: DeviceType) -> Result<()> { + async fn add_device(&self, device: DeviceType) -> Result { let mut inner = self.inner.write().await; - inner.add_device(device).await + match inner.add_device(device.clone()).await { + Ok(_) => Ok(device), + Err(err) => Err(err), + } } async fn remove_device(&self, device: DeviceType) -> Result<()> { diff --git a/src/runtime-rs/crates/hypervisor/src/lib.rs b/src/runtime-rs/crates/hypervisor/src/lib.rs index 9e6184b74b..deb7c92428 100644 --- a/src/runtime-rs/crates/hypervisor/src/lib.rs +++ b/src/runtime-rs/crates/hypervisor/src/lib.rs @@ -85,7 +85,7 @@ pub trait Hypervisor: std::fmt::Debug + Send + Sync { async fn resize_vcpu(&self, old_vcpus: u32, new_vcpus: u32) -> Result<(u32, u32)>; // returns (old_vcpus, new_vcpus) // device manager - async fn add_device(&self, device: DeviceType) -> Result<()>; + async fn add_device(&self, device: DeviceType) -> Result; async fn remove_device(&self, device: DeviceType) -> Result<()>; // utils diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs b/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs index 6f59d93391..80f86a56eb 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs @@ -146,9 +146,9 @@ use crate::device::DeviceType; // device manager part of Hypervisor impl QemuInner { - pub(crate) async fn add_device(&mut self, device: DeviceType) -> Result<()> { + pub(crate) async fn add_device(&mut self, device: DeviceType) -> Result { info!(sl!(), "QemuInner::add_device() {}", device); - todo!() + Ok(device) } pub(crate) async fn remove_device(&mut self, device: DeviceType) -> Result<()> { diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs b/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs index 1221af26b8..d26468632e 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs @@ -74,7 +74,7 @@ impl Hypervisor for Qemu { inner.save_vm().await } - async fn add_device(&self, device: DeviceType) -> Result<()> { + async fn add_device(&self, device: DeviceType) -> Result { let mut inner = self.inner.write().await; inner.add_device(device).await } diff --git a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs.rs b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs.rs index a1a1ba25c4..c0449fa3d0 100644 --- a/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs.rs +++ b/src/runtime-rs/crates/resource/src/share_fs/share_virtio_fs.rs @@ -89,9 +89,16 @@ pub(crate) async fn setup_inline_virtiofs(id: &str, h: &dyn Hypervisor) -> Resul prefetch_list_path: None, }, }; - h.add_device(DeviceType::ShareFsMount(virtio_fs)) + + let result = h + .add_device(DeviceType::ShareFsMount(virtio_fs)) .await - .with_context(|| format!("fail to attach passthrough fs {:?}", source)) + .with_context(|| format!("fail to attach passthrough fs {:?}", source)); + + match result { + Ok(_) => Ok(()), + Err(e) => Err(e), + } } pub async fn rafs_mount( From c3ce6a1d15c42aa88ca253f293fbdc52315f09f8 Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Sat, 21 Oct 2023 05:44:30 -0700 Subject: [PATCH 2/3] runtime-rs: Provide PCI path to the agent for virtio-block If PCI path for block device is not empty for a block device, use that as identifier for agent instead of virt path which is valid only for mmio devices. Signed-off-by: Archana Shinde --- src/runtime-rs/crates/resource/src/manager_inner.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/runtime-rs/crates/resource/src/manager_inner.rs b/src/runtime-rs/crates/resource/src/manager_inner.rs index 1f51ca9c0f..53b3ac4a12 100644 --- a/src/runtime-rs/crates/resource/src/manager_inner.rs +++ b/src/runtime-rs/crates/resource/src/manager_inner.rs @@ -297,8 +297,16 @@ impl ResourceManagerInner { // create block device for kata agent, // if driver is virtio-blk-pci, the id will be pci address. if let DeviceType::Block(device) = device_info { + // The following would work for drivers virtio-blk-pci and mmio. + // Once scsi support is added, need to handle scsi identifiers. + let id = if let Some(pci_path) = device.config.pci_path { + pci_path.convert_to_string() + } else { + device.config.virt_path.clone() + }; + let agent_device = Device { - id: device.config.virt_path.clone(), + id, container_path: d.path.clone(), field_type: device.config.driver_option, vm_path: device.config.virt_path, From 036b7787dd96272832f740ffd7e448b2c112efb1 Mon Sep 17 00:00:00 2001 From: Archana Shinde Date: Sat, 21 Oct 2023 06:28:54 -0700 Subject: [PATCH 3/3] runtime-rs: Use PCI path from hypervisor for vfio devices Remove earlier functionality that tries to assign PCI path to vfio devices from the host assuming pci slots to start from 1. Get this from the hypervisor instead. Signed-off-by: Archana Shinde --- .../crates/hypervisor/src/ch/inner_device.rs | 8 ++- .../hypervisor/src/device/driver/vfio.rs | 53 ++++++++++++------- 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs index 482a23a9b8..f7509bd8b9 100644 --- a/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_device.rs @@ -169,7 +169,7 @@ impl CloudHypervisorInner { } async fn handle_vfio_device(&mut self, device: VfioDevice) -> Result { - let vfio_device: VfioDevice = device.clone(); + let mut vfio_device: VfioDevice = device.clone(); // A device with multi-funtions, or a IOMMU group with one more // devices, the Primary device is selected to be passed to VM. @@ -210,6 +210,12 @@ impl CloudHypervisorInner { serde_json::from_str(detail.as_str()).map_err(|e| anyhow!(e))?; self.device_ids .insert(device.device_id.clone(), dev_info.id); + + // Update PCI path for the vfio host device. It is safe to directly access the slice element + // here as we have already checked if it exists. + // Todo: Handle vfio-ap mediated devices - return error for them. + vfio_device.devices[0].guest_pci_path = + Some(Self::clh_pci_info_to_path(&dev_info.bdf)?); } Ok(DeviceType::Vfio(vfio_device)) diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs index 132535dbdc..a689bcb355 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/vfio.rs @@ -389,7 +389,7 @@ impl VfioDevice { .get_vfio_device_vendor(&dev_bdf) .context("get property device and vendor failed")?; - let mut vfio_dev = HostDevice { + let vfio_dev = HostDevice { bus_slot_func: dev_bdf.clone(), device_vendor: Some(dev_vendor), sysfs_path: vfio_dev_details.1, @@ -397,18 +397,6 @@ impl VfioDevice { ..Default::default() }; - // when vfio pci, kata-agent handles with device_options, and its - // format: "DDDD:BB:DD.F=" - // DDDD:BB:DD.F is the device's PCI address on host - // is the device's PCI path in the guest - if self.bus_mode == VfioBusMode::PCI { - let pci_path = - generate_guest_pci_path(dev_bdf.clone()).context("generate pci path failed")?; - vfio_dev.guest_pci_path = Some(pci_path.clone()); - self.device_options - .push(format!("0000:{}={}", dev_bdf, pci_path.convert_to_string())); - } - Ok(vfio_dev) } @@ -493,13 +481,42 @@ impl Device for VfioDevice { } // do add device for vfio deivce - if let Err(e) = h.add_device(DeviceType::Vfio(self.clone())).await { - self.decrease_attach_count().await?; + match h.add_device(DeviceType::Vfio(self.clone())).await { + Ok(dev) => { + // Update device info with the one received from device attach + if let DeviceType::Vfio(vfio) = dev { + self.config = vfio.config; + self.devices = vfio.devices; + } - return Err(e); + if self.bus_mode == VfioBusMode::PCI { + for hostdev in self.devices.iter_mut() { + if hostdev.guest_pci_path.is_none() { + // guest_pci_path may be empty for certain hypervisors such as + // dragonball + hostdev.guest_pci_path = Some( + generate_guest_pci_path(hostdev.bus_slot_func.clone()) + .map_err(|e| anyhow!("generate pci path failed: {:?}", e))?, + ); + } + + // Safe to call unwrap here because of previous assignment. + let pci_path = hostdev.guest_pci_path.clone().unwrap(); + self.device_options.push(format!( + "0000:{}={}", + hostdev.bus_slot_func.clone(), + pci_path.convert_to_string() + )); + } + } + + Ok(()) + } + Err(e) => { + self.decrease_attach_count().await?; + return Err(e); + } } - - Ok(()) } async fn detach(&mut self, h: &dyn hypervisor) -> Result> {