From 720265c2d861e9ba38fa774bd0ac2e653a2b9d0e Mon Sep 17 00:00:00 2001 From: Pavel Mores Date: Thu, 15 Aug 2024 11:41:36 +0200 Subject: [PATCH 1/7] runtime-rs: support adding PCI bridges to qemu VM At least one PCI bridge is necessary to hotplug PCI devices. We only support PCI (at this point at least) since that's what the go runtime does (note that looking at the code in virtcontainers it might seem that other bus types are supported, however when the bridge objects are passed to govmm, all but PCI bridges are actually ignored). The entire logic of bridge setup is lifted from runtime-go for compatibility's sake. Signed-off-by: Pavel Mores --- .../hypervisor/src/qemu/cmdline_generator.rs | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs b/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs index 7fa49b96c0..209ff2f79f 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs @@ -1290,6 +1290,74 @@ impl ToQemuParams for DeviceIntelIommu { } } +#[derive(Debug)] +struct DevicePciBridge { + driver: String, + bus: String, + id: String, + chassis_nr: u32, + shpc: bool, + addr: u32, + io_reserve: String, + mem_reserve: String, + pref64_reserve: String, +} + +impl DevicePciBridge { + fn new(config: &HypervisorConfig, bridge_idx: u32) -> DevicePciBridge { + DevicePciBridge { + // The go runtime doesn't support bridges other than PCI although + // PCIe should also be available. Stick with the legacy behaviour + // of ignoring PCIe since it's not clear to me how to decide + // between the two. + driver: "pci-bridge".to_owned(), + bus: match config.machine_info.machine_type.as_str() { + "q35" | "virt" => "pcie.0", + _ => "pci.0", + } + .to_owned(), + id: format!("pci-bridge-{}", bridge_idx), + // Each bridge is required to be assigned a unique chassis id > 0. + chassis_nr: bridge_idx + 1, + shpc: false, + // 2 is documented by the go runtime as the first slot available + // for a bridge (on x86_64) + // (https://github.com/kata-containers/kata-containers/blob/99730256a2899c82d111400024621519d17ea15d/src/runtime/virtcontainers/qemu_arch_base.go#L212) + addr: 2 + bridge_idx, + // Values taken from the go runtime implementation which comments + // the choices as follows: + // Certain guest BIOS versions think !SHPC means no hotplug, and + // won't reserve the IO and memory windows that will be needed for + // devices added underneath this bridge. This will only break for + // certain combinations of exact qemu, BIOS and guest kernel + // versions, but for consistency, just hint the usual default + // windows for a bridge (as the BIOS would use with SHPC) so that + // we can do ACPI hotplug. + // (https://github.com/kata-containers/kata-containers/blob/99730256a2899c82d111400024621519d17ea15d/src/runtime/virtcontainers/qemu.go#L2474) + io_reserve: "4k".to_owned(), + mem_reserve: "1m".to_owned(), + pref64_reserve: "1m".to_owned(), + } + } +} + +#[async_trait] +impl ToQemuParams for DevicePciBridge { + async fn qemu_params(&self) -> Result> { + let mut params = Vec::new(); + params.push(self.driver.clone()); + params.push(format!("bus={}", self.bus)); + params.push(format!("id={}", self.id)); + params.push(format!("chassis_nr={}", self.chassis_nr)); + params.push(format!("shpc={}", if self.shpc { "on" } else { "off" })); + params.push(format!("addr={}", self.addr)); + params.push(format!("io-reserve={}", self.io_reserve)); + params.push(format!("mem-reserve={}", self.mem_reserve)); + params.push(format!("pref64-reserve={}", self.pref64_reserve)); + Ok(vec!["-device".to_owned(), params.join(",")]) + } +} + // Qemu provides methods and types for managing QEMU instances. // To manage a qemu instance after it has been launched you need // to pass the -qmp option during launch requesting the qemu instance @@ -1484,6 +1552,11 @@ impl<'a> QemuCmdLine<'a> { qemu_cmd_line.add_rng(); } + if qemu_cmd_line.bus_type() != VirtioBusType::Ccw && config.device_info.default_bridges > 0 + { + qemu_cmd_line.add_bridges(config.device_info.default_bridges); + } + Ok(qemu_cmd_line) } @@ -1526,6 +1599,13 @@ impl<'a> QemuCmdLine<'a> { self.machine.set_kernel_irqchip("split"); } + fn add_bridges(&mut self, count: u32) { + for idx in 0..count { + let bridge = DevicePciBridge::new(self.config, idx); + self.devices.push(Box::new(bridge)); + } + } + pub fn add_virtiofs_share( &mut self, virtiofsd_socket_path: &str, From efc8e93bfe40c59550138f99fe04051c411c31be Mon Sep 17 00:00:00 2001 From: Pavel Mores Date: Thu, 15 Aug 2024 11:41:36 +0200 Subject: [PATCH 2/7] runtime-rs: factor bus_type() out of QemuCmdLine The function takes a whole QemuCmdLine but only actually uses HypervisorConfig. We increase callability of the function by limiting its interface to what it needs. This will come handy shortly. Signed-off-by: Pavel Mores --- .../hypervisor/src/qemu/cmdline_generator.rs | 49 +++++++++++-------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs b/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs index 209ff2f79f..3abb990095 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs @@ -49,7 +49,7 @@ trait ToQemuParams: Send + Sync { async fn qemu_params(&self) -> Result>; } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Clone, Copy)] enum VirtioBusType { Pci, Ccw, @@ -70,6 +70,14 @@ impl Display for VirtioBusType { } } +fn bus_type(config: &HypervisorConfig) -> VirtioBusType { + if config.machine_info.machine_type.contains("-ccw-") { + VirtioBusType::Ccw + } else { + VirtioBusType::Pci + } +} + // Conventions used in qemu command line generation // ================================================ // @@ -1548,12 +1556,11 @@ impl<'a> QemuCmdLine<'a> { qemu_cmd_line.add_rtc(); - if qemu_cmd_line.bus_type() != VirtioBusType::Ccw { + if bus_type(config) != VirtioBusType::Ccw { qemu_cmd_line.add_rng(); } - if qemu_cmd_line.bus_type() != VirtioBusType::Ccw && config.device_info.default_bridges > 0 - { + if bus_type(config) != VirtioBusType::Ccw && config.device_info.default_bridges > 0 { qemu_cmd_line.add_bridges(config.device_info.default_bridges); } @@ -1580,14 +1587,6 @@ impl<'a> QemuCmdLine<'a> { self.devices.push(Box::new(rng_device)); } - fn bus_type(&self) -> VirtioBusType { - if self.config.machine_info.machine_type.contains("-ccw-") { - VirtioBusType::Ccw - } else { - VirtioBusType::Pci - } - } - fn add_iommu(&mut self) { let dev_iommu = DeviceIntelIommu::new(); self.devices.push(Box::new(dev_iommu)); @@ -1622,9 +1621,11 @@ impl<'a> QemuCmdLine<'a> { self.devices.push(Box::new(virtiofsd_socket_chardev)); - let mut virtiofs_device = DeviceVhostUserFs::new(chardev_name, mount_tag, self.bus_type()); + let bus_type = bus_type(self.config); + + let mut virtiofs_device = DeviceVhostUserFs::new(chardev_name, mount_tag, bus_type); virtiofs_device.set_queue_size(queue_size); - if self.config.device_info.enable_iommu_platform && self.bus_type() == VirtioBusType::Ccw { + if self.config.device_info.enable_iommu_platform && bus_type == VirtioBusType::Ccw { virtiofs_device.set_iommu_platform(true); } self.devices.push(Box::new(virtiofs_device)); @@ -1638,7 +1639,7 @@ impl<'a> QemuCmdLine<'a> { //self.devices.push(Box::new(mem_file)); self.memory.set_memory_backend_file(&mem_file); - match self.bus_type() { + match bus_type { VirtioBusType::Pci => { self.machine.set_nvdimm(true); self.devices.push(Box::new(NumaNode::new(&mem_file.id))); @@ -1652,7 +1653,7 @@ impl<'a> QemuCmdLine<'a> { pub fn add_vsock(&mut self, vhostfd: tokio::fs::File, guest_cid: u32) -> Result<()> { clear_cloexec(vhostfd.as_raw_fd()).context("clearing O_CLOEXEC failed on vsock fd")?; - let mut vhost_vsock_pci = VhostVsock::new(vhostfd, guest_cid, self.bus_type()); + let mut vhost_vsock_pci = VhostVsock::new(vhostfd, guest_cid, bus_type(self.config)); if !self.config.disable_nesting_checks && should_disable_modern() { vhost_vsock_pci.set_disable_modern(true); @@ -1699,8 +1700,10 @@ impl<'a> QemuCmdLine<'a> { pub fn add_block_device(&mut self, device_id: &str, path: &str) -> Result<()> { self.devices .push(Box::new(BlockBackend::new(device_id, path))); - self.devices - .push(Box::new(DeviceVirtioBlk::new(device_id, self.bus_type()))); + self.devices.push(Box::new(DeviceVirtioBlk::new( + device_id, + bus_type(self.config), + ))); Ok(()) } @@ -1734,7 +1737,9 @@ impl<'a> QemuCmdLine<'a> { if should_disable_modern() { virtio_net_device.set_disable_modern(true); } - if self.config.device_info.enable_iommu_platform && self.bus_type() == VirtioBusType::Ccw { + if self.config.device_info.enable_iommu_platform + && bus_type(self.config) == VirtioBusType::Ccw + { virtio_net_device.set_iommu_platform(true); } if self.config.network_info.network_queues > 1 { @@ -1747,8 +1752,10 @@ impl<'a> QemuCmdLine<'a> { } pub fn add_console(&mut self, console_socket_path: &str) { - let mut serial_dev = DeviceVirtioSerial::new("serial0", self.bus_type()); - if self.config.device_info.enable_iommu_platform && self.bus_type() == VirtioBusType::Ccw { + let mut serial_dev = DeviceVirtioSerial::new("serial0", bus_type(self.config)); + if self.config.device_info.enable_iommu_platform + && bus_type(self.config) == VirtioBusType::Ccw + { serial_dev.set_iommu_platform(true); } self.devices.push(Box::new(serial_dev)); From cda04fa5392730aef446f7edc9427f9ae2a6a347 Mon Sep 17 00:00:00 2001 From: Pavel Mores Date: Thu, 15 Aug 2024 11:41:36 +0200 Subject: [PATCH 3/7] runtime-rs: factor setup of network device out of QemuCmdLine Network device hotplugging will use the same infrastructure (Netdev, DeviceVirtioNet) as coldplugging, i.e. QemuCmdLine. To make the code of network device setup visible outside of QemuCmdLine we factor it out to a non-member function `get_network_device()` and make QemuCmdLine just delegate to it. Signed-off-by: Pavel Mores --- .../hypervisor/src/qemu/cmdline_generator.rs | 56 +++++++++++-------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs b/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs index 3abb990095..484c0630b2 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs @@ -983,7 +983,7 @@ fn format_fds(files: &[File]) -> String { } #[derive(Debug)] -struct Netdev { +pub struct Netdev { id: String, // File descriptors for vhost multi-queue support. @@ -1723,28 +1723,8 @@ impl<'a> QemuCmdLine<'a> { host_dev_name: &str, guest_mac: Address, ) -> Result<()> { - let mut netdev = Netdev::new( - &format!("network-{}", dev_index), - host_dev_name, - self.config.network_info.network_queues, - )?; - if self.config.network_info.disable_vhost_net { - netdev.set_disable_vhost_net(true); - } - - let mut virtio_net_device = DeviceVirtioNet::new(&netdev.id, guest_mac); - - if should_disable_modern() { - virtio_net_device.set_disable_modern(true); - } - if self.config.device_info.enable_iommu_platform - && bus_type(self.config) == VirtioBusType::Ccw - { - virtio_net_device.set_iommu_platform(true); - } - if self.config.network_info.network_queues > 1 { - virtio_net_device.set_num_queues(self.config.network_info.network_queues); - } + let (netdev, virtio_net_device) = + get_network_device(self.config, dev_index, host_dev_name, guest_mac)?; self.devices.push(Box::new(netdev)); self.devices.push(Box::new(virtio_net_device)); @@ -1796,3 +1776,33 @@ impl<'a> QemuCmdLine<'a> { Ok(result) } } + +pub fn get_network_device( + config: &HypervisorConfig, + dev_index: u64, + host_dev_name: &str, + guest_mac: Address, +) -> Result<(Netdev, DeviceVirtioNet)> { + let mut netdev = Netdev::new( + &format!("network-{}", dev_index), + host_dev_name, + config.network_info.network_queues, + )?; + if config.network_info.disable_vhost_net { + netdev.set_disable_vhost_net(true); + } + + let mut virtio_net_device = DeviceVirtioNet::new(&netdev.id, guest_mac); + + if should_disable_modern() { + virtio_net_device.set_disable_modern(true); + } + if config.device_info.enable_iommu_platform && bus_type(config) == VirtioBusType::Ccw { + virtio_net_device.set_iommu_platform(true); + } + if config.network_info.network_queues > 1 { + virtio_net_device.set_num_queues(config.network_info.network_queues); + } + + Ok((netdev, virtio_net_device)) +} From 3f46dfcf2fca6cb1798745b9934415de00b0c559 Mon Sep 17 00:00:00 2001 From: Pavel Mores Date: Thu, 15 Aug 2024 11:41:36 +0200 Subject: [PATCH 4/7] runtime-rs: don't treat NetworkConfig::index as unique in qemu-rs NetworkConfig::index has been used to generate an id for a network device backend. However, it turns out that it's not unique (it's always zero as confirmed by a comment at its definition) so it's not suitable to generate an id that needs to be unique. Use the host device name instead. Signed-off-by: Pavel Mores --- .../crates/hypervisor/src/qemu/cmdline_generator.rs | 12 +++--------- src/runtime-rs/crates/hypervisor/src/qemu/inner.rs | 1 - 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs b/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs index 484c0630b2..9daedb23d7 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs @@ -1717,14 +1717,9 @@ impl<'a> QemuCmdLine<'a> { )); } - pub fn add_network_device( - &mut self, - dev_index: u64, - host_dev_name: &str, - guest_mac: Address, - ) -> Result<()> { + pub fn add_network_device(&mut self, host_dev_name: &str, guest_mac: Address) -> Result<()> { let (netdev, virtio_net_device) = - get_network_device(self.config, dev_index, host_dev_name, guest_mac)?; + get_network_device(self.config, host_dev_name, guest_mac)?; self.devices.push(Box::new(netdev)); self.devices.push(Box::new(virtio_net_device)); @@ -1779,12 +1774,11 @@ impl<'a> QemuCmdLine<'a> { pub fn get_network_device( config: &HypervisorConfig, - dev_index: u64, host_dev_name: &str, guest_mac: Address, ) -> Result<(Netdev, DeviceVirtioNet)> { let mut netdev = Netdev::new( - &format!("network-{}", dev_index), + &format!("network-{}", host_dev_name), host_dev_name, config.network_info.network_queues, )?; diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs b/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs index e292662a4b..e691045faf 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs @@ -120,7 +120,6 @@ impl QemuInner { let _netns_guard = NetnsGuard::new(&netns).context("new netns guard")?; cmdline.add_network_device( - network.config.index, &network.config.host_dev_name, network.config.guest_mac.clone().unwrap(), )?; From 4eb7e2966c5cdd3c3d531ff7eb37792cced164c5 Mon Sep 17 00:00:00 2001 From: Pavel Mores Date: Thu, 15 Aug 2024 11:41:36 +0200 Subject: [PATCH 5/7] runtime-rs: add netdev hotplugging helpers to qemu-rs Before adding network device hotplugging functionality itself we add a couple of helpers in a separate commit since their functionality is non-trivial. To hotplug a device we need a free PCI slot. We add find_free_slot() which can be called to obtain one. It looks for PCI bridges connected to the root bridge and looks for an unoccupied slot on each of them. The first found is returned to the caller. The algorithm explicitly doesn't support any more complex bridge hierarchies since those are never produced when coldplugging PCI bridges. Sending netdev queue and vhost file descriptors to QEMU is slightly involved and implemented in pass_fd(). The actual socket has to be passed in an SCM_RIGHTS socket control message (also called ancillary data, see man 3 cmsg) so we have to use the msghdr structure and sendmsg() call (see man 2 sendmsg) to send the message. Since qapi-rs doesn't support sending messages with ancillary data we have to do the sending sort of "under it", manually, by retrieving qapi-rs's socket and using it directly. Signed-off-by: Pavel Mores --- .../crates/hypervisor/src/qemu/qmp.rs | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs b/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs index 7339efdf8b..4bd296ae63 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs @@ -4,8 +4,10 @@ // use anyhow::{anyhow, Result}; +use nix::sys::socket::{sendmsg, ControlMessage, MsgFlags}; use std::fmt::{Debug, Error, Formatter}; use std::io::BufReader; +use std::os::fd::{AsRawFd, RawFd}; use std::os::unix::net::UnixStream; use std::time::Duration; @@ -291,6 +293,85 @@ impl Qmp { } Ok(()) } + + #[allow(dead_code)] + fn find_free_slot(&mut self) -> Result<(String, i64)> { + let pci = self.qmp.execute(&qapi_qmp::query_pci {})?; + for pci_info in &pci { + for pci_dev in &pci_info.devices { + let pci_bridge = match &pci_dev.pci_bridge { + Some(bridge) => bridge, + None => continue, + }; + + info!(sl!(), "found PCI bridge: {}", pci_dev.qdev_id); + + if let Some(bridge_devices) = &pci_bridge.devices { + let occupied_slots = bridge_devices + .iter() + .map(|pci_dev| pci_dev.slot) + .collect::>(); + + info!( + sl!(), + "already occupied slots on bridge {}: {:#?}", + pci_dev.qdev_id, + occupied_slots + ); + + // from virtcontainers' bridges.go + let pci_bridge_max_capacity = 30; + for slot in 0..pci_bridge_max_capacity { + if !occupied_slots.iter().any(|elem| *elem == slot) { + info!( + sl!(), + "found free slot on bridge {}: {}", pci_dev.qdev_id, slot + ); + return Ok((pci_dev.qdev_id.clone(), slot)); + } + } + } + } + } + Err(anyhow!("no free slots on PCI bridges")) + } + + #[allow(dead_code)] + fn pass_fd(&mut self, fd: RawFd, fdname: &str) -> Result<()> { + info!(sl!(), "passing fd {:?} as {}", fd, fdname); + + // Put the QMP 'getfd' command itself into the message payload. + let getfd_cmd = format!( + "{{ \"execute\": \"getfd\", \"arguments\": {{ \"fdname\": \"{}\" }} }}", + fdname + ); + let buf = getfd_cmd.as_bytes(); + let bufs = &mut [std::io::IoSlice::new(buf)][..]; + + debug!(sl!(), "bufs: {:?}", bufs); + + let fds = [fd]; + let cmsg = [ControlMessage::ScmRights(&fds)]; + + let result = sendmsg::<()>( + self.qmp.inner_mut().get_mut_write().as_raw_fd(), + bufs, + &cmsg, + MsgFlags::empty(), + None, + ); + info!(sl!(), "sendmsg() result: {:#?}", result); + + let result = self.qmp.read_response::<&qmp::getfd>(); + + match result { + Ok(_) => { + info!(sl!(), "successfully passed {} ({})", fdname, fd); + Ok(()) + } + Err(err) => Err(anyhow!("failed to pass {} ({}): {}", fdname, fd, err)), + } + } } fn vcpu_id_from_core_id(core_id: i64) -> String { From ac393f6316b5da69fd2277ebc04fe154ee5cfeeb Mon Sep 17 00:00:00 2001 From: Pavel Mores Date: Thu, 15 Aug 2024 11:41:36 +0200 Subject: [PATCH 6/7] runtime-rs: implement netdev hotplugging for qemu-rs With the helpers from previous commit, the actual hotplugging implementation, though lengthy, is mostly just assembling a QMP command to hotplug the network device backend and then doing the same for the corresponding frontend. Note that hotplug_network_device() takes cmdline_generator types Netdev and DeviceVirtioNet. This is intentional and aims to take advantage of the similarity between parameter sets needed to coldplug and hotplug devices reuse and simplify our code. To enable using the types from qmp, accessors were added as needed. Signed-off-by: Pavel Mores --- .../hypervisor/src/qemu/cmdline_generator.rs | 32 ++++++ .../crates/hypervisor/src/qemu/qmp.rs | 100 +++++++++++++++++- 2 files changed, 130 insertions(+), 2 deletions(-) diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs b/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs index 9daedb23d7..b8d0ea6e84 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs @@ -1021,6 +1021,18 @@ impl Netdev { self.disable_vhost_net = disable_vhost_net; self } + + pub fn get_id(&self) -> &String { + &self.id + } + + pub fn get_fds(&self) -> &Vec { + &self.fds["fds"] + } + + pub fn get_vhostfds(&self) -> &Vec { + &self.fds["vhostfds"] + } } #[async_trait] @@ -1089,6 +1101,26 @@ impl DeviceVirtioNet { self.iommu_platform = iommu_platform; self } + + pub fn get_netdev_id(&self) -> &String { + &self.netdev_id + } + + pub fn get_device_driver(&self) -> &String { + &self.device_driver + } + + pub fn get_mac_addr(&self) -> String { + format!("{:?}", self.mac_address) + } + + pub fn get_num_queues(&self) -> u32 { + self.num_queues + } + + pub fn get_disable_modern(&self) -> bool { + self.disable_modern + } } #[async_trait] diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs b/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs index 4bd296ae63..ba35fd61ae 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs @@ -3,6 +3,8 @@ // SPDX-License-Identifier: Apache-2.0 // +use crate::qemu::cmdline_generator::{DeviceVirtioNet, Netdev}; + use anyhow::{anyhow, Result}; use nix::sys::socket::{sendmsg, ControlMessage, MsgFlags}; use std::fmt::{Debug, Error, Formatter}; @@ -294,7 +296,6 @@ impl Qmp { Ok(()) } - #[allow(dead_code)] fn find_free_slot(&mut self) -> Result<(String, i64)> { let pci = self.qmp.execute(&qapi_qmp::query_pci {})?; for pci_info in &pci { @@ -336,7 +337,6 @@ impl Qmp { Err(anyhow!("no free slots on PCI bridges")) } - #[allow(dead_code)] fn pass_fd(&mut self, fd: RawFd, fdname: &str) -> Result<()> { info!(sl!(), "passing fd {:?} as {}", fd, fdname); @@ -372,6 +372,102 @@ impl Qmp { Err(err) => Err(anyhow!("failed to pass {} ({}): {}", fdname, fd, err)), } } + + #[allow(dead_code)] + pub fn hotplug_network_device( + &mut self, + netdev: &Netdev, + virtio_net_device: &DeviceVirtioNet, + ) -> Result<()> { + debug!( + sl!(), + "hotplug_network_device(): PCI before {}: {:#?}", + virtio_net_device.get_netdev_id(), + self.qmp.execute(&qapi_qmp::query_pci {})? + ); + + let (bus, slot) = self.find_free_slot()?; + + let mut fd_names = vec![]; + for (idx, fd) in netdev.get_fds().iter().enumerate() { + let fdname = format!("fd{}", idx); + self.pass_fd(fd.as_raw_fd(), fdname.as_ref())?; + fd_names.push(fdname); + } + + let mut vhostfd_names = vec![]; + for (idx, fd) in netdev.get_vhostfds().iter().enumerate() { + let vhostfdname = format!("vhostfd{}", idx); + self.pass_fd(fd.as_raw_fd(), vhostfdname.as_ref())?; + vhostfd_names.push(vhostfdname); + } + + self.qmp + .execute(&qapi_qmp::netdev_add(qapi_qmp::Netdev::tap { + id: netdev.get_id().clone(), + tap: qapi_qmp::NetdevTapOptions { + br: None, + downscript: None, + fd: None, + // Logic in cmdline_generator::Netdev::new() seems to + // guarantee that there will always be at least one fd. + fds: Some(fd_names.join(",")), + helper: None, + ifname: None, + poll_us: None, + queues: None, + script: None, + sndbuf: None, + vhost: if vhostfd_names.is_empty() { + None + } else { + Some(true) + }, + vhostfd: None, + vhostfds: if vhostfd_names.is_empty() { + None + } else { + Some(vhostfd_names.join(",")) + }, + vhostforce: None, + vnet_hdr: None, + }, + }))?; + + let mut netdev_frontend_args = Dictionary::new(); + netdev_frontend_args.insert( + "netdev".to_owned(), + virtio_net_device.get_netdev_id().clone().into(), + ); + netdev_frontend_args.insert("addr".to_owned(), format!("{:02}", slot).into()); + netdev_frontend_args.insert("mac".to_owned(), virtio_net_device.get_mac_addr().into()); + netdev_frontend_args.insert("mq".to_owned(), "on".into()); + // As the golang runtime documents the vectors computation, it's + // 2N+2 vectors, N for tx queues, N for rx queues, 1 for config, and one for possible control vq + netdev_frontend_args.insert( + "vectors".to_owned(), + (2 * virtio_net_device.get_num_queues() + 2).into(), + ); + if virtio_net_device.get_disable_modern() { + netdev_frontend_args.insert("disable-modern".to_owned(), true.into()); + } + + self.qmp.execute(&qmp::device_add { + bus: Some(bus), + id: Some(format!("frontend-{}", virtio_net_device.get_netdev_id())), + driver: virtio_net_device.get_device_driver().clone(), + arguments: netdev_frontend_args, + })?; + + debug!( + sl!(), + "hotplug_network_device(): PCI after {}: {:#?}", + virtio_net_device.get_netdev_id(), + self.qmp.execute(&qapi_qmp::query_pci {})? + ); + + Ok(()) + } } fn vcpu_id_from_core_id(core_id: i64) -> String { From 23927d8a94ff1c0f385956970fede43dfc696d84 Mon Sep 17 00:00:00 2001 From: Pavel Mores Date: Thu, 15 Aug 2024 11:41:36 +0200 Subject: [PATCH 7/7] runtime-rs: plug in netdev hotplugging functionality and actually call it add_device() now checks if QEMU is running already by checking if we have a QMP connection. If we do a new function hotplug_device() is called which hotplugs the device if it's a network one. Signed-off-by: Pavel Mores --- .../crates/hypervisor/src/qemu/inner.rs | 33 +++++++++++++++++-- .../crates/hypervisor/src/qemu/qmp.rs | 1 - 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs b/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs index e691045faf..1d3380ca40 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/inner.rs @@ -3,7 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 // -use super::cmdline_generator::{QemuCmdLine, QMP_SOCKET_FILE}; +use super::cmdline_generator::{get_network_device, QemuCmdLine, QMP_SOCKET_FILE}; use super::qmp::Qmp; use crate::{ hypervisor_persist::HypervisorState, utils::enter_netns, HypervisorConfig, MemoryConfig, @@ -539,9 +539,16 @@ use crate::device::DeviceType; // device manager part of Hypervisor impl QemuInner { - pub(crate) async fn add_device(&mut self, device: DeviceType) -> Result { + pub(crate) async fn add_device(&mut self, mut device: DeviceType) -> Result { info!(sl!(), "QemuInner::add_device() {}", device); - self.devices.push(device.clone()); + let is_qemu_ready_to_hotplug = self.qmp.is_some(); + if is_qemu_ready_to_hotplug { + // hypervisor is running already + device = self.hotplug_device(device)?; + } else { + // store the device to coldplug it later, on hypervisor launch + self.devices.push(device.clone()); + } Ok(device) } @@ -552,6 +559,26 @@ impl QemuInner { device )) } + + fn hotplug_device(&mut self, device: DeviceType) -> Result { + let qmp = match self.qmp { + Some(ref mut qmp) => qmp, + None => return Err(anyhow!("QMP not initialized")), + }; + + match device { + DeviceType::Network(ref network_device) => { + let (netdev, virtio_net_device) = get_network_device( + &self.config, + &network_device.config.host_dev_name, + network_device.config.guest_mac.clone().unwrap(), + )?; + qmp.hotplug_network_device(&netdev, &virtio_net_device)? + } + _ => info!(sl!(), "hotplugging of {:#?} is unsupported", device), + } + Ok(device) + } } // private helpers diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs b/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs index ba35fd61ae..32efa5c43e 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/qmp.rs @@ -373,7 +373,6 @@ impl Qmp { } } - #[allow(dead_code)] pub fn hotplug_network_device( &mut self, netdev: &Netdev,