diff --git a/src/libs/kata-types/src/config/default.rs b/src/libs/kata-types/src/config/default.rs index d2d922715b..7108c4add1 100644 --- a/src/libs/kata-types/src/config/default.rs +++ b/src/libs/kata-types/src/config/default.rs @@ -71,6 +71,7 @@ pub const MIN_QEMU_MEMORY_SIZE_MB: u32 = 64; // Default configuration for Cloud Hypervisor (CH) pub const DEFAULT_CH_BINARY_PATH: &str = "/usr/bin/cloud-hypervisor"; +pub const DEFAULT_CH_ROOTFS_TYPE: &str = "ext4"; pub const DEFAULT_CH_CONTROL_PATH: &str = ""; pub const DEFAULT_CH_ENTROPY_SOURCE: &str = "/dev/urandom"; pub const DEFAULT_CH_GUEST_KERNEL_IMAGE: &str = "vmlinuz"; diff --git a/src/runtime-rs/Cargo.lock b/src/runtime-rs/Cargo.lock index 598d1940a9..fb58de5491 100644 --- a/src/runtime-rs/Cargo.lock +++ b/src/runtime-rs/Cargo.lock @@ -426,6 +426,8 @@ version = "0.1.0" dependencies = [ "anyhow", "api_client", + "kata-types", + "nix 0.26.2", "serde", "serde_json", "tokio", @@ -614,7 +616,7 @@ dependencies = [ "kvm-bindings", "kvm-ioctls", "libc", - "memoffset", + "memoffset 0.6.5", "vm-memory", "vmm-sys-util 0.11.0", ] @@ -1562,6 +1564,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +dependencies = [ + "autocfg", +] + [[package]] name = "miniz_oxide" version = "0.5.3" @@ -1665,7 +1676,7 @@ dependencies = [ "cc", "cfg-if 1.0.0", "libc", - "memoffset", + "memoffset 0.6.5", ] [[package]] @@ -1677,7 +1688,7 @@ dependencies = [ "bitflags", "cfg-if 1.0.0", "libc", - "memoffset", + "memoffset 0.6.5", ] [[package]] @@ -1692,6 +1703,20 @@ dependencies = [ "libc", ] +[[package]] +name = "nix" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" +dependencies = [ + "bitflags", + "cfg-if 1.0.0", + "libc", + "memoffset 0.7.1", + "pin-utils", + "static_assertions", +] + [[package]] name = "no-std-compat" version = "0.4.1" @@ -2720,6 +2745,12 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02a8428da277a8e3a15271d79943e80ccc2ef254e78813a166a08d65e4c3ece5" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strum" version = "0.24.0" diff --git a/src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml b/src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml index 2fd58f9f3d..a513709994 100644 --- a/src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml +++ b/src/runtime-rs/crates/hypervisor/ch-config/Cargo.toml @@ -20,3 +20,6 @@ tokio = { version = "1.25.0", features = ["sync", "rt"] } # being used. This version is used to pin the CH config structure # which is relatively static. api_client = { git = "https://github.com/cloud-hypervisor/cloud-hypervisor", crate = "api_client", tag = "v27.0" } + +kata-types = { path = "../../../../libs/kata-types"} +nix = "0.26.2" diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs index fe812c7ca8..d332a154f6 100644 --- a/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/ch_api.rs @@ -2,18 +2,11 @@ // // SPDX-License-Identifier: Apache-2.0 -use crate::net_util::MAC_ADDR_LEN; -use crate::{ - ConsoleConfig, ConsoleOutputMode, CpuTopology, CpusConfig, DeviceConfig, FsConfig, MacAddr, - MemoryConfig, NetConfig, PayloadConfig, PmemConfig, RngConfig, VmConfig, VsockConfig, -}; -use anyhow::{anyhow, Context, Result}; +use crate::{DeviceConfig, FsConfig, VmConfig}; +use anyhow::{anyhow, Result}; use api_client::simple_api_full_command_and_response; -use std::fmt::Display; -use std::net::Ipv4Addr; use std::os::unix::net::UnixStream; -use std::path::PathBuf; use tokio::task; pub async fn cloud_hypervisor_vmm_ping(mut socket: UnixStream) -> Result> { @@ -38,20 +31,9 @@ pub async fn cloud_hypervisor_vmm_shutdown(mut socket: UnixStream) -> Result>, - pmem_devices: Option>, + cfg: VmConfig, ) -> Result> { - let cfg = cloud_hypervisor_vm_create_cfg( - sandbox_path, - vsock_socket_path, - shared_fs_devices, - pmem_devices, - ) - .await?; - let serialised = serde_json::to_string_pretty(&cfg)?; task::spawn_blocking(move || -> Result> { @@ -124,151 +106,3 @@ pub async fn cloud_hypervisor_vm_fs_add( result } - -pub async fn cloud_hypervisor_vm_create_cfg( - // FIXME: - _sandbox_path: String, - vsock_socket_path: String, - shared_fs_devices: Option>, - pmem_devices: Option>, -) -> Result { - let topology = CpuTopology { - threads_per_core: 1, - cores_per_die: 12, - dies_per_package: 1, - packages: 1, - }; - - let cpus = CpusConfig { - boot_vcpus: 1, - max_vcpus: 12, - max_phys_bits: 46, - topology: Some(topology), - ..Default::default() - }; - - let rng = RngConfig { - src: PathBuf::from("/dev/urandom"), - ..Default::default() - }; - - let kernel_args = vec![ - "root=/dev/pmem0p1", - "rootflags=dax,data=ordered,errors=remount-ro", - "ro", - "rootfstype=ext4", - "panic=1", - "no_timer_check", - "noreplace-smp", - "console=ttyS0,115200n8", - "systemd.log_target=console", - "systemd.unit=kata-containers", - "systemd.mask=systemd-networkd.service", - "systemd.mask=systemd-networkd.socket", - "agent.log=debug", - ]; - - let cmdline = kernel_args.join(" "); - - let kernel = PathBuf::from("/opt/kata/share/kata-containers/vmlinux.container"); - - // Note that PmemConfig replaces the PayloadConfig.initrd. - let payload = PayloadConfig { - kernel: Some(kernel), - cmdline: Some(cmdline), - ..Default::default() - }; - - let serial = ConsoleConfig { - mode: ConsoleOutputMode::Tty, - ..Default::default() - }; - - let ip = Ipv4Addr::new(192, 168, 10, 10); - let mask = Ipv4Addr::new(255, 255, 255, 0); - - let mac_str = "12:34:56:78:90:01"; - - let mac = parse_mac(mac_str)?; - - let network = NetConfig { - ip, - mask, - mac, - ..Default::default() - }; - - let memory = MemoryConfig { - size: (1024 * 1024 * 2048), - - // Required - shared: true, - - prefault: false, - hugepages: false, - mergeable: false, - - // FIXME: - hotplug_size: Some(16475226112), - - ..Default::default() - }; - - let fs = shared_fs_devices; - let pmem = pmem_devices; - - let vsock = VsockConfig { - cid: 3, - socket: PathBuf::from(vsock_socket_path), - ..Default::default() - }; - - let cfg = VmConfig { - cpus, - memory, - fs, - serial, - pmem, - payload: Some(payload), - vsock: Some(vsock), - rng, - net: Some(vec![network]), - ..Default::default() - }; - - Ok(cfg) -} - -fn parse_mac(s: &S) -> Result -where - S: AsRef + ?Sized + Display, -{ - let v: Vec<&str> = s.as_ref().split(':').collect(); - let mut bytes = [0u8; MAC_ADDR_LEN]; - - if v.len() != MAC_ADDR_LEN { - return Err(anyhow!( - "invalid MAC {} (length {}, expected {})", - s, - v.len(), - MAC_ADDR_LEN - )); - } - - for i in 0..MAC_ADDR_LEN { - if v[i].len() != 2 { - return Err(anyhow!( - "invalid MAC {} (segment {} length {}, expected {})", - s, - i, - v.len(), - 2 - )); - } - - bytes[i] = - u8::from_str_radix(v[i], 16).context(format!("failed to parse MAC address: {}", s))?; - } - - Ok(MacAddr { bytes }) -} diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/convert.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/convert.rs new file mode 100644 index 0000000000..f1dc8574c0 --- /dev/null +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/convert.rs @@ -0,0 +1,251 @@ +// Copyright (c) 2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 + +use crate::net_util::MAC_ADDR_LEN; +use crate::NamedHypervisorConfig; +use crate::VmConfig; +use crate::{ + ConsoleConfig, ConsoleOutputMode, CpuFeatures, CpuTopology, CpusConfig, MacAddr, MemoryConfig, + PayloadConfig, RngConfig, VsockConfig, +}; +use anyhow::{anyhow, Context, Result}; +use kata_types::config::default::DEFAULT_CH_ENTROPY_SOURCE; +use kata_types::config::hypervisor::{CpuInfo, MachineInfo, MemoryInfo}; +use kata_types::config::BootInfo; +use std::convert::TryFrom; +use std::fmt::Display; +use std::path::PathBuf; + +// 1 MiB +const MIB: u64 = 1024 * 1024; + +const DEFAULT_CH_MAX_PHYS_BITS: u8 = 46; + +impl TryFrom for VmConfig { + type Error = anyhow::Error; + + fn try_from(n: NamedHypervisorConfig) -> Result { + let kernel_params = n.kernel_params; + let cfg = n.cfg; + let vsock_socket_path = n.vsock_socket_path; + let sandbox_path = n.sandbox_path; + let fs = n.shared_fs_devices; + let pmem = n.pmem_devices; + + let cpus = CpusConfig::try_from(cfg.cpu_info)?; + + let rng = RngConfig::try_from(cfg.machine_info)?; + + // Note that PmemConfig replaces the PayloadConfig.initrd. + let payload = PayloadConfig::try_from((cfg.boot_info, kernel_params))?; + + let serial = get_serial_cfg()?; + let console = get_console_cfg()?; + + let memory = MemoryConfig::try_from(cfg.memory_info)?; + + std::fs::create_dir_all(sandbox_path).context("failed to create sandbox path")?; + + let vsock = VsockConfig { + cid: 3, + socket: PathBuf::from(vsock_socket_path), + ..Default::default() + }; + + let cfg = VmConfig { + cpus, + memory, + serial, + console, + payload: Some(payload), + fs, + pmem, + vsock: Some(vsock), + rng, + ..Default::default() + }; + + Ok(cfg) + } +} + +impl TryFrom for MemoryConfig { + type Error = anyhow::Error; + + fn try_from(mem: MemoryInfo) -> Result { + let sysinfo = nix::sys::sysinfo::sysinfo()?; + + let max_mem_bytes = sysinfo.ram_total(); + + let mem_bytes: u64 = MIB + .checked_mul(mem.default_memory as u64) + .ok_or("cannot convert default memory to bytes") + .map_err(|e| anyhow!(e))?; + + // The amount of memory that can be hot-plugged is the total less the + // amount allocated at VM start. + let hotplug_size_bytes = max_mem_bytes + .checked_sub(mem_bytes) + .ok_or("failed to calculate max hotplug size for CH") + .map_err(|e| anyhow!(e))?; + + let cfg = MemoryConfig { + size: mem_bytes, + + // Required + shared: true, + + hotplug_size: Some(hotplug_size_bytes), + + ..Default::default() + }; + + Ok(cfg) + } +} + +impl TryFrom for CpusConfig { + type Error = anyhow::Error; + + fn try_from(cpu: CpuInfo) -> Result { + let boot_vcpus = u8::try_from(cpu.default_vcpus)?; + let max_vcpus = u8::try_from(cpu.default_maxvcpus)?; + + let topology = CpuTopology { + threads_per_core: 1, + cores_per_die: max_vcpus, + dies_per_package: 1, + packages: 1, + }; + + let max_phys_bits = DEFAULT_CH_MAX_PHYS_BITS; + + let cfg = CpusConfig { + boot_vcpus, + max_vcpus, + max_phys_bits, + topology: Some(topology), + + ..Default::default() + }; + + Ok(cfg) + } +} + +impl TryFrom for CpuFeatures { + type Error = anyhow::Error; + + #[cfg(target_arch = "x86_64")] + fn try_from(s: String) -> Result { + let amx = s.split(',').any(|x| x == "amx"); + + let cpu_features = CpuFeatures { amx }; + + Ok(cpu_features) + } + + #[cfg(not(target_arch = "x86_64"))] + fn try_from(_s: String) -> Result { + Ok(CpuFeatures::default()) + } +} + +// The 2nd tuple element is the space separated kernel parameters list. +// This cannot be created only from BootInfo since that contains the +// user-specified kernel parameters only. +impl TryFrom<(BootInfo, String)> for PayloadConfig { + type Error = anyhow::Error; + + fn try_from(args: (BootInfo, String)) -> Result { + let b = args.0; + let cmdline = args.1; + + let kernel = PathBuf::from(b.kernel); + + let payload = PayloadConfig { + kernel: Some(kernel), + cmdline: Some(cmdline), + + ..Default::default() + }; + + Ok(payload) + } +} + +impl TryFrom for RngConfig { + type Error = anyhow::Error; + + fn try_from(m: MachineInfo) -> Result { + let entropy_source = if !m.entropy_source.is_empty() { + m.entropy_source + } else { + DEFAULT_CH_ENTROPY_SOURCE.to_string() + }; + + let rng = RngConfig { + src: PathBuf::from(entropy_source), + + ..Default::default() + }; + + Ok(rng) + } +} + +fn get_serial_cfg() -> Result { + let cfg = ConsoleConfig { + file: None, + mode: ConsoleOutputMode::Tty, + iommu: false, + }; + + Ok(cfg) +} + +fn get_console_cfg() -> Result { + let cfg = ConsoleConfig { + file: None, + mode: ConsoleOutputMode::Off, + iommu: false, + }; + + Ok(cfg) +} + +#[allow(dead_code)] +fn parse_mac(s: &S) -> Result +where + S: AsRef + ?Sized + Display, +{ + let v: Vec<&str> = s.as_ref().split(':').collect(); + let mut bytes = [0u8; MAC_ADDR_LEN]; + + if v.len() != MAC_ADDR_LEN { + return Err(anyhow!( + "invalid MAC {} (length {}, expected {})", + s, + v.len(), + MAC_ADDR_LEN + )); + } + + for i in 0..MAC_ADDR_LEN { + if v[i].len() != 2 { + return Err(anyhow!( + "invalid MAC {} (segment {} length {}, expected {})", + s, + i, + v.len(), + 2 + )); + } + + bytes[i] = + u8::from_str_radix(v[i], 16).context(format!("failed to parse MAC address: {}", s))?; + } + + Ok(MacAddr { bytes }) +} diff --git a/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs b/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs index 3e3fb3412a..157ab89706 100644 --- a/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs +++ b/src/runtime-rs/crates/hypervisor/ch-config/src/lib.rs @@ -7,10 +7,12 @@ use std::net::Ipv4Addr; use std::path::PathBuf; pub mod ch_api; +pub mod convert; pub mod net_util; mod virtio_devices; use crate::virtio_devices::RateLimiterConfig; +use kata_types::config::hypervisor::Hypervisor as HypervisorConfig; pub use net_util::MacAddr; pub const MAX_NUM_PCI_SEGMENTS: u16 = 16; @@ -479,3 +481,16 @@ fn usize_is_zero(v: &usize) -> bool { fn u16_is_zero(v: &u16) -> bool { *v == 0 } + +// Type used to simplify conversion from a generic Hypervisor config +// to a CH specific VmConfig. +#[derive(Debug, Clone)] +pub struct NamedHypervisorConfig { + pub kernel_params: String, + pub sandbox_path: String, + pub vsock_socket_path: String, + pub cfg: HypervisorConfig, + + pub shared_fs_devices: Option>, + pub pmem_devices: Option>, +} diff --git a/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs index b3271ee791..2ecc328bad 100644 --- a/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs +++ b/src/runtime-rs/crates/hypervisor/src/ch/inner_hypervisor.rs @@ -6,18 +6,23 @@ use super::inner::CloudHypervisorInner; use crate::ch::utils::get_api_socket_path; use crate::ch::utils::{get_jailer_root, get_sandbox_path, get_vsock_path}; +use crate::kernel_param::KernelParams; use crate::Device; use crate::VsockConfig; +use crate::VM_ROOTFS_DRIVER_PMEM; use crate::{VcpuThreadIds, VmmState}; use anyhow::{anyhow, Context, Result}; use ch_config::ch_api::{ cloud_hypervisor_vm_create, cloud_hypervisor_vm_start, cloud_hypervisor_vmm_ping, cloud_hypervisor_vmm_shutdown, }; +use ch_config::{NamedHypervisorConfig, VmConfig}; use core::future::poll_fn; use futures::executor::block_on; use futures::future::join_all; use kata_types::capabilities::{Capabilities, CapabilityBits}; +use kata_types::config::default::DEFAULT_CH_ROOTFS_TYPE; +use std::convert::TryFrom; use std::fs::create_dir_all; use std::os::unix::net::UnixStream; use std::path::Path; @@ -54,6 +59,40 @@ impl CloudHypervisorInner { Ok(()) } + async fn get_kernel_params(&self) -> Result { + let cfg = self + .config + .as_ref() + .ok_or("no hypervisor config for CH") + .map_err(|e| anyhow!(e))?; + + let enable_debug = cfg.debug_info.enable_debug; + + // Note that the configuration option hypervisor.block_device_driver is not used. + let rootfs_driver = VM_ROOTFS_DRIVER_PMEM; + + let rootfs_type = match cfg.boot_info.rootfs_type.is_empty() { + true => DEFAULT_CH_ROOTFS_TYPE, + false => &cfg.boot_info.rootfs_type, + }; + + // Start by adding the default set of kernel parameters. + let mut params = KernelParams::new(enable_debug); + + let mut rootfs_param = KernelParams::new_rootfs_kernel_params(rootfs_driver, rootfs_type)?; + + // Add the rootfs device + params.append(&mut rootfs_param); + + // Finally, add the user-specified options at the end + // (so they will take priority). + params.append(&mut KernelParams::from_string(&cfg.boot_info.kernel_params)); + + let kernel_params = params.to_string()?; + + Ok(kernel_params) + } + async fn boot_vm(&mut self) -> Result<()> { let shared_fs_devices = self.get_shared_fs_devices().await?; @@ -71,14 +110,35 @@ impl CloudHypervisorInner { let vsock_socket_path = get_vsock_path(&self.id)?; - let response = cloud_hypervisor_vm_create( + let hypervisor_config = self + .config + .as_ref() + .ok_or("no hypervisor config for CH") + .map_err(|e| anyhow!(e))?; + + debug!( + sl!(), + "generic Hypervisor configuration: {:?}", hypervisor_config + ); + + let kernel_params = self.get_kernel_params().await?; + + let named_cfg = NamedHypervisorConfig { + kernel_params, sandbox_path, vsock_socket_path, - socket.try_clone().context("failed to clone socket")?, + cfg: hypervisor_config.clone(), shared_fs_devices, pmem_devices, - ) - .await?; + }; + + let cfg = VmConfig::try_from(named_cfg)?; + + debug!(sl!(), "CH specific VmConfig configuration: {:?}", cfg); + + let response = + cloud_hypervisor_vm_create(socket.try_clone().context("failed to clone socket")?, cfg) + .await?; if let Some(detail) = response { debug!(sl!(), "vm boot response: {:?}", detail);