From 005b33397606d00d47a08b14bb72021586d438dc Mon Sep 17 00:00:00 2001 From: Alex Lyn Date: Tue, 12 Mar 2024 22:25:39 +0800 Subject: [PATCH] runtime-rs: add network helpers and impl ToQemuParams Add network helpers and impl ToQemuParams trait to build netdev params which are put into cmdline for Qemu VM running. Fixes: #8865 Signed-off-by: Alex Lyn --- .../src/device/driver/virtio_net.rs | 2 +- .../hypervisor/src/qemu/cmdline_generator.rs | 20 ++ .../crates/hypervisor/src/qemu/mod.rs | 1 + .../crates/hypervisor/src/qemu/network.rs | 275 ++++++++++++++++++ 4 files changed, 297 insertions(+), 1 deletion(-) create mode 100644 src/runtime-rs/crates/hypervisor/src/qemu/network.rs diff --git a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_net.rs b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_net.rs index 4462761bed..f9c1a91a2a 100644 --- a/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_net.rs +++ b/src/runtime-rs/crates/hypervisor/src/device/driver/virtio_net.rs @@ -13,7 +13,7 @@ use crate::device::topology::PCIeTopology; use crate::device::{Device, DeviceType}; use crate::Hypervisor as hypervisor; -#[derive(Clone)] +#[derive(Clone, Default)] pub struct Address(pub [u8; 6]); impl fmt::Debug for Address { diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs b/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs index 9f79a096e4..8044e5dc69 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/cmdline_generator.rs @@ -3,6 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 // +use super::network::NetDevice; use crate::utils::clear_fd_flags; use crate::{kernel_param::KernelParams, HypervisorConfig, NetworkConfig}; @@ -704,6 +705,25 @@ impl ToQemuParams for Serial { } } +#[async_trait] +impl ToQemuParams for NetDevice { + // qemu_params returns the qemu parameters built out of this network device. + async fn qemu_params(&self) -> Result> { + let mut qemu_params: Vec = Vec::new(); + + let netdev_params = self.qemu_netdev_params()?; + let device_params = self.qemu_device_params()?; + + qemu_params.push("-netdev".to_owned()); + qemu_params.push(netdev_params.join(",")); + + qemu_params.push("-device".to_owned()); + qemu_params.push(device_params.join(",")); + + Ok(qemu_params) + } +} + fn is_running_in_vm() -> Result { let res = read_to_string("/proc/cpuinfo")? .lines() diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs b/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs index 57afe0b216..d1399def25 100644 --- a/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs +++ b/src/runtime-rs/crates/hypervisor/src/qemu/mod.rs @@ -5,6 +5,7 @@ mod cmdline_generator; mod inner; +mod network; use crate::device::DeviceType; use crate::hypervisor_persist::HypervisorState; diff --git a/src/runtime-rs/crates/hypervisor/src/qemu/network.rs b/src/runtime-rs/crates/hypervisor/src/qemu/network.rs new file mode 100644 index 0000000000..37addf4890 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/src/qemu/network.rs @@ -0,0 +1,275 @@ +// Copyright (c) 2024 Ant Group +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::collections::HashMap; +use std::convert::TryFrom; +use std::os::fd::RawFd; + +use crate::utils::clear_fd_flags; +use crate::{Address, NetworkConfig}; +use anyhow::{anyhow, Context, Result}; + +// VirtioTransport is the transport in use for a virtio device. +#[derive(Debug, Default, PartialEq)] +enum VirtioTransport { + #[default] + Pci, +} + +impl ToString for VirtioTransport { + fn to_string(&self) -> String { + match self { + VirtioTransport::Pci => "pci".to_owned(), + } + } +} + +impl TryFrom<&str> for VirtioTransport { + type Error = anyhow::Error; + + fn try_from(_transport: &str) -> Result { + Ok(VirtioTransport::Pci) + } +} + +// DeviceDriver is set in "-device driver=" +#[derive(Debug, Default, PartialEq)] +enum DeviceDriver { + // VirtioNetPci("virtio-net-pci") is a virtio-net device using PCI transport. + #[default] + VirtioNetPci, + + // VfioPci("vfio-pci") is an attached host device using PCI transport. + VfioPci, +} + +impl ToString for DeviceDriver { + fn to_string(&self) -> String { + match self { + DeviceDriver::VirtioNetPci => "virtio-net-pci".to_owned(), + DeviceDriver::VfioPci => "vfio-pci".to_owned(), + } + } +} + +impl TryFrom<&str> for DeviceDriver { + type Error = anyhow::Error; + + fn try_from(device_driver: &str) -> Result { + Ok(match device_driver { + "virtio-net-pci" => DeviceDriver::VirtioNetPci, + "vfio-pci" => DeviceDriver::VfioPci, + _ => return Err(anyhow!("unsupported transport")), + }) + } +} + +#[derive(Debug, Default, PartialEq)] +enum NetDev { + /// Tap("tap") is a tap networking device type. + #[default] + Tap, + + /// MacTap("macvtap") is a macvtap networking device type. + #[allow(dead_code)] + MacvTap, +} + +impl ToString for NetDev { + fn to_string(&self) -> String { + match self { + NetDev::Tap | NetDev::MacvTap => "tap".to_owned(), + // VhostUser is to be added in future. + // NetDev::VhostUser => "vhost-user".to_owned(), + } + } +} + +// NetDevice represents a guest networking device +// -netdev tap,id=hostnet0,vhost=on,vhostfds=x:y:z,fds=a:b:c +// -device virtio-net-pci,netdev=hostnet0,id=net0,mac=24:42:54:20:50:46,bus=pci.0,addr=0x7 +#[derive(Debug, Default)] +pub struct NetDevice { + // device_type is the netdev type (e.g. tap). + device_type: NetDev, + + // driver is the qemu device driver + device_driver: DeviceDriver, + + // id is the net device identifier. + id: String, + + // if_name is the interface name, + if_name: String, + + // bus is the bus path name of a PCI device. + bus: String, + + // pci_addr is the address offset of a PCI device. + pci_addr: String, + + // fds represents the list of already existing file descriptors to be used. + // This is mostly useful for mq support. + // { + // fds: Vec, + // vhost_fds: Vec, + // } + fds: HashMap>, + + // disable_vhost_net disables virtio device emulation from the host kernel instead of from qemu. + disable_vhost_net: bool, + + // mac_address is the networking device interface MAC address. + mac_address: Address, + + // disable_modern prevents qemu from relying on fast MMIO. + disable_modern: bool, + + // transport is the virtio transport for this device. + transport: VirtioTransport, +} + +impl NetDevice { + #[allow(dead_code)] + pub fn new( + config: &NetworkConfig, + disable_vhost_net: bool, + tun_fds: Vec, + vhost_fds: Vec, + ) -> Self { + // we have only two s: + // { + // "fds": vec![fd1, fd2,...], + // "vhostfds": vec![fd3, fd4,...], + // } + let mut fds: HashMap> = HashMap::with_capacity(2); + fds.insert("fds".to_owned(), tun_fds); + fds.insert("vhostfds".to_owned(), vhost_fds); + + // FIXME(Hard Code): It's safe to unwrap here because of the valid input. + // Ideally device_driver should be derived from transport to minimize code duplication. + // While currently we focus on PCI for the initial implementation. + // And we'll support other transports, e.g. s390x's CCW. + let device_driver = DeviceDriver::try_from("virtio-net-pci").unwrap(); + let transport = VirtioTransport::try_from("pci").unwrap(); + + NetDevice { + device_type: NetDev::Tap, + device_driver, + id: format!("network-{}", &config.index), + if_name: config.virt_iface_name.clone(), + mac_address: config.guest_mac.clone().unwrap(), + disable_vhost_net, + disable_modern: false, + fds, + transport, + ..Default::default() + } + } + + fn mq_param(&self) -> String { + let mut params = vec!["mq=on".to_owned()]; + if self.transport == VirtioTransport::Pci { + // https://www.linux-kvm.org/page/Multiqueue + // -netdev tap,vhost=on,queues=N + // enable mq and specify msix vectors in qemu cmdline + // (2N+2 vectors, N for tx queues, N for rx queues, 1 for config, and one for possible control vq) + // -device virtio-net-pci,mq=on,vectors=2N+2... + // enable mq in guest by 'ethtool -L eth0 combined $queue_num' + // Clearlinux automatically sets up the queues properly + // The agent implementation should do this to ensure that it is + // always set + + // vectors = len(netdev.FDs) * 2 + 2 + if let Some(fds) = self.fds.get("fds") { + params.push(format!("vectors={}", 2 * fds.len() + 2)); + } + } + + params.join(",") + } + + pub fn qemu_device_params(&self) -> Result> { + let mut device_params: Vec = Vec::new(); + + device_params.push(format!("driver={}", &self.device_driver.to_string())); + device_params.push(format!("netdev={}", &self.id)); + + let mac = self.mac_address.to_string(); + device_params.push(format!("mac={}", &mac)); + + if !self.bus.is_empty() { + device_params.push(format!("bus={}", &self.bus)); + } + + if !self.pci_addr.is_empty() { + // FIXME: pci_addr: PciPath + device_params.push(format!("addr={}", &self.pci_addr)); + } + + device_params.push(format!( + "disable-modern={}", + if self.disable_modern { "true" } else { "false" } + )); + + if !self.fds.is_empty() { + device_params.push(self.mq_param()); + } + + Ok(device_params) + } + + pub fn qemu_netdev_params(&self) -> Result> { + let mut netdev_params: Vec = Vec::new(); + let netdev_type = self.device_type.to_string(); + netdev_params.push(netdev_type); + netdev_params.push(format!("id={}", self.id)); + + if !self.disable_vhost_net { + netdev_params.push("vhost=on".to_owned()); + if let Some(vhost_fds) = self.fds.get("vhostfds") { + for fd in vhost_fds.iter() { + clear_fd_flags(*fd)?; + } + let s = vhost_fds + .iter() + .map(|&n| n.to_string()) + .collect::>() + .join(":"); + + netdev_params.push(format!("vhostfds={}", s)); + } + } + + if let Some(tuntap_fds) = self.fds.get("fds") { + for fd in tuntap_fds.iter() { + clear_fd_flags(*fd).context("clear flag of fd failed")?; + } + let s = tuntap_fds + .iter() + .map(|&n| n.to_string()) + .collect::>() + .join(":"); + netdev_params.push(format!("fds={}", s)); + } else { + netdev_params.push(format!("ifname={}", self.if_name)); + netdev_params.push("script=no".to_owned()); + netdev_params.push("downscript=no".to_owned()); + } + + Ok(netdev_params) + } +} + +impl ToString for Address { + fn to_string(&self) -> String { + let b: [u8; 6] = self.0; + + format!( + "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", + b[0], b[1], b[2], b[3], b[4], b[5] + ) + } +}