diff --git a/src/dragonball/Cargo.lock b/src/dragonball/Cargo.lock index c5be35bfb9..f2e087213b 100644 --- a/src/dragonball/Cargo.lock +++ b/src/dragonball/Cargo.lock @@ -247,9 +247,9 @@ dependencies = [ [[package]] name = "dbs-boot" -version = "0.3.1" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a74a8c05a1674d3032e610b4f201c7440c345559bad3dfe6b455ce195785108" +checksum = "5466a92f75aa928a9103dcb2088f6d1638ef9da8945fad7389a73864dfa0182c" dependencies = [ "dbs-arch", "kvm-bindings", diff --git a/src/dragonball/Cargo.toml b/src/dragonball/Cargo.toml index 5036d72728..f70463266e 100644 --- a/src/dragonball/Cargo.toml +++ b/src/dragonball/Cargo.toml @@ -15,7 +15,7 @@ bytes = "1.1.0" dbs-address-space = "0.2.0" dbs-allocator = "0.1.0" dbs-arch = "0.2.0" -dbs-boot = "0.3.0" +dbs-boot = "0.4.0" dbs-device = "0.2.0" dbs-interrupt = { version = "0.2.0", features = ["kvm-irq"] } dbs-legacy-devices = "0.1.0" diff --git a/src/dragonball/src/api/v1/vmm_action.rs b/src/dragonball/src/api/v1/vmm_action.rs index a271d04cbf..6ad7bfcb72 100644 --- a/src/dragonball/src/api/v1/vmm_action.rs +++ b/src/dragonball/src/api/v1/vmm_action.rs @@ -626,12 +626,6 @@ impl VmmService { #[cfg(feature = "hotplug")] fn resize_vcpu(&mut self, vmm: &mut Vmm, config: VcpuResizeInfo) -> VmmRequestResult { - if !cfg!(target_arch = "x86_64") { - // TODO: Arm need to support vcpu hotplug. issue: #6010 - warn!("This arch do not support vm resize!"); - return Ok(VmmData::Empty); - } - if !cfg!(feature = "dbs-upcall") { warn!("We only support cpu resize through upcall server in the guest kernel now, please enable dbs-upcall feature."); return Ok(VmmData::Empty); diff --git a/src/dragonball/src/device_manager/mod.rs b/src/dragonball/src/device_manager/mod.rs index 766c5eef93..49e9666ac4 100644 --- a/src/dragonball/src/device_manager/mod.rs +++ b/src/dragonball/src/device_manager/mod.rs @@ -1019,6 +1019,7 @@ mod tests { use vm_memory::{GuestAddress, MmapRegion}; use super::*; + #[cfg(target_arch = "x86_64")] use crate::vm::CpuTopology; impl DeviceManager { diff --git a/src/dragonball/src/vcpu/vcpu_manager.rs b/src/dragonball/src/vcpu/vcpu_manager.rs index 8f39af5194..41200d0107 100644 --- a/src/dragonball/src/vcpu/vcpu_manager.rs +++ b/src/dragonball/src/vcpu/vcpu_manager.rs @@ -374,11 +374,17 @@ impl VcpuManager { entry_addr: GuestAddress, ) -> Result<()> { info!("create boot vcpus"); - self.create_vcpus( - self.vcpu_config.boot_vcpu_count, - Some(request_ts), - Some(entry_addr), - )?; + let boot_vcpu_count = if cfg!(target_arch = "aarch64") { + // On aarch64, kvm doesn't allow to call KVM_CREATE_VCPU ioctl after vm has been booted + // because of vgic check. To support vcpu hotplug/hotunplug feature, we should create + // all the vcpufd at booting procedure. + // SetVmConfiguration API will ensure max_vcpu_count >= boot_vcpu_count, so it is safe + // to directly use max_vcpu_count here. + self.vcpu_config.max_vcpu_count + } else { + self.vcpu_config.boot_vcpu_count + }; + self.create_vcpus(boot_vcpu_count, Some(request_ts), Some(entry_addr))?; Ok(()) } @@ -1213,7 +1219,10 @@ mod tests { assert!(vcpu_manager .create_boot_vcpus(TimestampUs::default(), GuestAddress(0)) .is_ok()); + #[cfg(target_arch = "x86_64")] assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3); // test start boot vcpus assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok()); @@ -1267,8 +1276,14 @@ mod tests { assert!(vcpu_manager .create_boot_vcpus(TimestampUs::default(), GuestAddress(0)) .is_ok()); + #[cfg(target_arch = "x86_64")] assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3); + assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok()); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2); // invalid cpuid for pause let cpu_indexes = vec![2]; @@ -1304,9 +1319,14 @@ mod tests { assert!(vcpu_manager .create_boot_vcpus(TimestampUs::default(), GuestAddress(0)) .is_ok()); + #[cfg(target_arch = "x86_64")] assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3); assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok()); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2); // invalid cpuid for exit let cpu_indexes = vec![2]; @@ -1330,9 +1350,14 @@ mod tests { assert!(vcpu_manager .create_boot_vcpus(TimestampUs::default(), GuestAddress(0)) .is_ok()); + #[cfg(target_arch = "x86_64")] assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3); assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok()); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2); // exit all success assert!(vcpu_manager.exit_all_vcpus().is_ok()); @@ -1351,9 +1376,14 @@ mod tests { assert!(vcpu_manager .create_boot_vcpus(TimestampUs::default(), GuestAddress(0)) .is_ok()); + #[cfg(target_arch = "x86_64")] assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3); assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok()); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2); // invalid cpuid for exit let cpu_indexes = vec![2]; @@ -1377,9 +1407,14 @@ mod tests { assert!(vcpu_manager .create_boot_vcpus(TimestampUs::default(), GuestAddress(0)) .is_ok()); + #[cfg(target_arch = "x86_64")] assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3); assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok()); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2); // revalidate all success assert!(vcpu_manager.revalidate_all_vcpus_cache().is_ok()); @@ -1395,9 +1430,14 @@ mod tests { assert!(vcpu_manager .create_boot_vcpus(TimestampUs::default(), GuestAddress(0)) .is_ok()); + #[cfg(target_arch = "x86_64")] assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3); assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok()); + #[cfg(target_arch = "aarch64")] + assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2); // set vcpus in hotplug action let cpu_ids = vec![0]; diff --git a/src/dragonball/src/vm/aarch64.rs b/src/dragonball/src/vm/aarch64.rs index edc7532457..fe8f23207c 100644 --- a/src/dragonball/src/vm/aarch64.rs +++ b/src/dragonball/src/vm/aarch64.rs @@ -6,70 +6,35 @@ // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. -use std::collections::HashMap; -use std::fmt::Debug; use std::ops::Deref; +use std::sync::MutexGuard; use dbs_arch::gic::GICDevice; use dbs_arch::pmu::initialize_pmu; -use dbs_arch::{DeviceInfoForFDT, DeviceType, VpmuFeatureLevel}; +use dbs_arch::{MMIODeviceInfo, VpmuFeatureLevel}; +use dbs_boot::fdt_utils::*; use dbs_boot::InitrdConfig; use dbs_utils::epoll_manager::EpollManager; use dbs_utils::time::TimestampUs; -use linux_loader::loader::Cmdline; -use vm_memory::{GuestAddressSpace, GuestMemory}; +use linux_loader::cmdline::{Cmdline, Error as CmdlineError}; +use vm_memory::GuestAddressSpace; use vmm_sys_util::eventfd::EventFd; use super::{Vm, VmError}; use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl}; use crate::error::{Error, StartMicroVmError}; use crate::event_manager::EventManager; +use crate::vcpu::VcpuManager; -/// Configures the system and should be called once per vm before starting vcpu threads. -/// For aarch64, we only setup the FDT. -/// -/// # Arguments -/// -/// * `guest_mem` - The memory to be used by the guest. -/// * `cmdline` - The kernel commandline. -/// * `vcpu_mpidr` - Array of MPIDR register values per vcpu. -/// * `device_info` - A hashmap containing the attached devices for building FDT device nodes. -/// * `gic_device` - The GIC device. -/// * `initrd` - Information about an optional initrd. -#[allow(clippy::borrowed_box)] -fn configure_system( - guest_mem: &M, - cmdline: &str, - vcpu_mpidr: Vec, - device_info: Option<&HashMap<(DeviceType, String), T>>, - gic_device: &Box, - initrd: &Option, - vpmu_feature: &VpmuFeatureLevel, -) -> super::Result<()> { - dbs_boot::fdt::create_fdt( - guest_mem, - vcpu_mpidr, - cmdline, - device_info, - gic_device, - initrd, - vpmu_feature, - ) - .map_err(Error::BootSystem)?; - Ok(()) -} - -#[cfg(target_arch = "aarch64")] impl Vm { /// Gets a reference to the irqchip of the VM - #[allow(clippy::borrowed_box)] - pub fn get_irqchip(&self) -> &Box { - self.irqchip_handle.as_ref().unwrap() + pub fn get_irqchip(&self) -> &dyn GICDevice { + self.irqchip_handle.as_ref().unwrap().as_ref() } /// Creates the irq chip in-kernel device model. pub fn setup_interrupt_controller(&mut self) -> std::result::Result<(), StartMicroVmError> { - let vcpu_count = self.vm_config.vcpu_count; + let vcpu_count = self.vm_config.max_vcpu_count; self.irqchip_handle = Some( dbs_arch::gic::create_gic(&self.vm_fd, vcpu_count.into()) @@ -99,12 +64,11 @@ impl Vm { /// Initialize the virtual machine instance. /// /// It initialize the virtual machine instance by: - /// 1) initialize virtual machine global state and configuration. - /// 2) create system devices, such as interrupt controller. - /// 3) create and start IO devices, such as serial, console, block, net, vsock etc. - /// 4) create and initialize vCPUs. - /// 5) configure CPU power management features. - /// 6) load guest kernel image. + /// 1) Initialize virtual machine reset event fd. + /// 2) Create and initialize vCPUs. + /// 3) Create and initialize interrupt controller. + /// 4) Create and initialize vPMU device. + /// 5) Create and initialize devices, such as virtio, block, net, vsock, vfio etc. pub fn init_microvm( &mut self, epoll_mgr: EpollManager, @@ -139,6 +103,52 @@ impl Vm { Ok(()) } + /// Generate fdt information about VM. + fn get_fdt_vm_info<'a>( + &'a self, + vm_memory: &'a GuestMemoryImpl, + cmdline: &'a str, + initrd_config: Option<&'a InitrdConfig>, + vcpu_manager: &'a MutexGuard, + ) -> FdtVmInfo { + let guest_memory = vm_memory.memory(); + let vcpu_mpidr = vcpu_manager + .vcpus() + .into_iter() + .map(|cpu| cpu.get_mpidr()) + .collect(); + let vm_config = self.vm_config(); + let mut vcpu_boot_onlined = vec![1; vm_config.vcpu_count as usize]; + vcpu_boot_onlined.resize(vm_config.max_vcpu_count as usize, 0); + let vpmu_feature = vcpu_manager.vpmu_feature(); + // This configuration is used for passing cache information into guest. + // TODO: dragonball-sandbox #274; kata-containers #6969 + let cache_passthrough_enabled = false; + let fdt_vcpu_info = FdtVcpuInfo::new( + vcpu_mpidr, + vcpu_boot_onlined, + vpmu_feature, + cache_passthrough_enabled, + ); + + FdtVmInfo::new(guest_memory, cmdline, initrd_config, fdt_vcpu_info) + } + + // This method is used for passing cache/numa information into guest + // TODO: dragonball-sandbox #274,#275; kata-containers #6969 + /// Generate fdt information about cache/numa + fn get_fdt_numa_info(&self) -> FdtNumaInfo { + FdtNumaInfo::default() + } + + /// Generate fdt information about devices + fn get_fdt_device_info(&self) -> FdtDeviceInfo { + FdtDeviceInfo::new( + self.device_manager().get_mmio_device_info(), + self.get_irqchip(), + ) + } + /// Execute system architecture specific configurations. /// /// 1) set guest kernel boot parameters @@ -150,24 +160,23 @@ impl Vm { initrd: Option, ) -> std::result::Result<(), StartMicroVmError> { let vcpu_manager = self.vcpu_manager().map_err(StartMicroVmError::Vcpu)?; - let vpmu_feature = vcpu_manager.vpmu_feature(); - let vcpu_mpidr = vcpu_manager - .vcpus() - .into_iter() - .map(|cpu| cpu.get_mpidr()) - .collect(); - let guest_memory = vm_memory.memory(); + let cmdline_cstring = cmdline + .as_cstring() + .map_err(StartMicroVmError::ProcessCommandlne)?; + let fdt_vm_info = self.get_fdt_vm_info( + vm_memory, + cmdline_cstring + .to_str() + .map_err(|_| StartMicroVmError::ProcessCommandlne(CmdlineError::InvalidAscii))?, + initrd.as_ref(), + &vcpu_manager, + ); + let fdt_numa_info = self.get_fdt_numa_info(); + let fdt_device_info = self.get_fdt_device_info(); - configure_system( - guest_memory, - cmdline.as_cstring().unwrap().to_str().unwrap(), - vcpu_mpidr, - self.device_manager.get_mmio_device_info(), - self.get_irqchip(), - &initrd, - &vpmu_feature, - ) - .map_err(StartMicroVmError::ConfigureSystem) + dbs_boot::fdt::create_fdt(fdt_vm_info, fdt_numa_info, fdt_device_info) + .map(|_| ()) + .map_err(|e| StartMicroVmError::ConfigureSystem(Error::BootSystem(e))) } pub(crate) fn register_events( diff --git a/src/dragonball/src/vm/mod.rs b/src/dragonball/src/vm/mod.rs index a9ccb02ea3..852d78c2b4 100644 --- a/src/dragonball/src/vm/mod.rs +++ b/src/dragonball/src/vm/mod.rs @@ -860,6 +860,7 @@ impl Vm { #[cfg(test)] pub mod tests { + #[cfg(target_arch = "x86_64")] use kvm_ioctls::VcpuExit; use linux_loader::cmdline::Cmdline; use test_utils::skip_if_not_root; diff --git a/tools/packaging/kernel/kata_config_version b/tools/packaging/kernel/kata_config_version index f96ac06721..fe4afb0df8 100644 --- a/tools/packaging/kernel/kata_config_version +++ b/tools/packaging/kernel/kata_config_version @@ -1 +1 @@ -105 +106 diff --git a/tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0005-upcall-dragonball-devmgr-suppots-cpu-hotplug-on-arm6.patch b/tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0005-upcall-dragonball-devmgr-suppots-cpu-hotplug-on-arm6.patch new file mode 100644 index 0000000000..74dcc732e5 --- /dev/null +++ b/tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0005-upcall-dragonball-devmgr-suppots-cpu-hotplug-on-arm6.patch @@ -0,0 +1,163 @@ +From 16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72 Mon Sep 17 00:00:00 2001 +Message-Id: <16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72.1685428663.git.jiyunxue@linux.alibaba.com> +From: xuejun-xj +Date: Wed, 10 May 2023 13:55:43 +0800 +Subject: [PATCH 1/3] upcall: dragonball-devmgr suppots cpu hotplug on arm64 + +Enable vcpuhotplug feature on aarch64 in guest kernel. It communicates +with dragonball by using upcall. This commit does these changes: + +1. Wraps x86 related fields with CONFIG_X86_64. +2. Add "cpu_event_notification" for arm64. +3. Add "add_cpu_dev" and "del_cpu_dev" for arm64. + +Signed-off-by: xuejun-xj +Reviewed-by : Chao Wu +Reviewed-by: Zizheng Bian +Reviewed-by: Baolin Wang +--- + .../upcall_srv/dragonball_device_manager.c | 84 ++++++++++++++++++- + 1 file changed, 81 insertions(+), 3 deletions(-) + +diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c +index 5a95b2ba63e8..088d38623b8d 100644 +--- a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c ++++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c +@@ -85,15 +85,21 @@ struct devmgr_req { + #if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) + struct { + uint8_t count; ++#ifdef CONFIG_X86_64 + uint8_t apic_ver; + uint8_t apic_ids[256]; ++#endif + } cpu_dev_info; + #endif + } msg_load; + }; + + struct cpu_dev_reply_info { ++#if defined(CONFIG_X86_64) + uint32_t apic_index; ++#elif defined(CONFIG_ARM64) ++ uint32_t cpu_id; ++#endif + }; + + struct devmgr_reply { +@@ -190,7 +196,8 @@ static void _fill_msg_header(struct devmgr_msg_header *msg, uint32_t msg_size, + msg->msg_flags = msg_flags; + } + +-#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64) ++#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) ++#if defined(CONFIG_X86_64) + static int get_cpu_id(int apic_id) + { + int i; +@@ -219,6 +226,24 @@ static void cpu_event_notification( + _fill_msg_header(&rep->msg_header, + sizeof(struct cpu_dev_reply_info), action_type, 0); + } ++#elif defined(CONFIG_ARM64) ++/** ++ * Return the first failed hotplug index of the cpu_id to dragonball. ++ * If hotplug/hotunplug succeeds, it will equals to the expected cpu count. ++ */ ++static void cpu_event_notification( ++ uint8_t cpu_id, ++ int ret, ++ uint32_t action_type, ++ struct devmgr_reply *rep) ++{ ++ pr_info("cpu event notification: cpu_id %d\n", cpu_id); ++ rep->msg_load.cpu_dev_info.cpu_id = cpu_id; ++ rep->ret = ret; ++ _fill_msg_header(&rep->msg_header, ++ sizeof(struct cpu_dev_reply_info), action_type, 0); ++} ++#endif + #endif + + #if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO) +@@ -262,7 +287,8 @@ static int del_mmio_dev(struct devmgr_req *req, + #endif + + +-#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64) ++#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) ++#if defined(CONFIG_X86_64) + static int add_cpu_upcall(int apic_id, uint8_t apic_ver) + { + int cpu_id, node_id; +@@ -430,6 +456,58 @@ static int del_cpu_dev(struct devmgr_req *req, + cpu_event_notification(i, ret, DEL_CPU, rep); + return ret; + } ++#elif defined(CONFIG_ARM64) ++static int add_cpu_dev(struct devmgr_req *req, struct devmgr_reply *rep) ++{ ++ int i, ret = 0; ++ unsigned int cpu_id, nr_online_cpus; ++ uint8_t count = req->msg_load.cpu_dev_info.count; ++ ++ nr_online_cpus = num_online_cpus(); ++ ++ pr_info("Current vcpu number: %d, Add vcpu number: %d\n", ++ nr_online_cpus, count); ++ ++ for (i = 0; i < count; ++i) { ++ cpu_id = nr_online_cpus + i; ++ ret = add_cpu(cpu_id); ++ if (ret != 0) ++ break; ++ } ++ ++ cpu_event_notification(nr_online_cpus + i, ret, ADD_CPU, rep); ++ return ret; ++} ++ ++static int del_cpu_dev(struct devmgr_req *req, struct devmgr_reply *rep) ++{ ++ int i, ret = 0; ++ unsigned int cpu_id, nr_online_cpus; ++ uint8_t count = req->msg_load.cpu_dev_info.count; ++ ++ nr_online_cpus = num_online_cpus(); ++ ++ pr_info("Current vcpu number: %d, Delete vcpu number: %d\n", ++ nr_online_cpus, count); ++ ++ if (count >= nr_online_cpus) { ++ pr_err("cpu del parameter check error: cannot remove all vcpus\n"); ++ ret = -EINVAL; ++ cpu_event_notification(0, ret, DEL_CPU, rep); ++ return ret; ++ } ++ ++ for (i = 0; i < count; ++i) { ++ cpu_id = nr_online_cpus - i - 1; ++ ret = remove_cpu(cpu_id); ++ if (ret != 0) ++ break; ++ } ++ ++ cpu_event_notification(nr_online_cpus - i, ret, DEL_CPU, rep); ++ return ret; ++} ++#endif + #endif + + static struct { +@@ -440,7 +518,7 @@ static struct { + {ADD_MMIO, add_mmio_dev}, + {DEL_MMIO, del_mmio_dev}, + #endif +-#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64) ++#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) + {ADD_CPU, add_cpu_dev}, + {DEL_CPU, del_cpu_dev}, + #endif +-- +2.28.0 + diff --git a/tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0006-msi-control-msi-irq-number-activated.patch b/tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0006-msi-control-msi-irq-number-activated.patch new file mode 100644 index 0000000000..b40cf6666e --- /dev/null +++ b/tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0006-msi-control-msi-irq-number-activated.patch @@ -0,0 +1,67 @@ +From 6e07ca77fe7b5c15e0e98d9e86294c7dd2553a5a Mon Sep 17 00:00:00 2001 +Message-Id: <6e07ca77fe7b5c15e0e98d9e86294c7dd2553a5a.1685428663.git.jiyunxue@linux.alibaba.com> +In-Reply-To: <16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72.1685428663.git.jiyunxue@linux.alibaba.com> +References: <16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72.1685428663.git.jiyunxue@linux.alibaba.com> +From: xuejun-xj +Date: Wed, 10 May 2023 14:51:40 +0800 +Subject: [PATCH 2/3] msi: control msi irq number activated + +When passthroughing pci device, kernel will initialize and activate +(max_cpu_count+1) msi irq. However, in vcpu hotplugging situation, +because of vgic, max_cpu_count may be greater than online_cpu_count. +Those offline cpus will also be activated by kernel, which cause failure +of passthroughing pci device. + +To solve this problem, this patch add a function +"check_affinity_mask_online" to check if msi_desc->affinity contains +online cpus. If current cpu is offline, it will continue the for loop to +skip activating related irq. + +Signed-off-by: xuejun-xj +Reviewed-by: Shuo Tan +Reviewed-by: Baolin Wang +--- + kernel/irq/msi.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c +index d924676c8781..d60a3fc654e6 100644 +--- a/kernel/irq/msi.c ++++ b/kernel/irq/msi.c +@@ -395,6 +395,23 @@ static bool msi_check_reservation_mode(struct irq_domain *domain, + return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit; + } + ++/* This function is used for check whether the cpu affinity belongs to the ++ * online cpus. When we passthrough the nvme devices, the kernel will allocate ++ * maxcpus+1 MSI irqs and then activate them. In vcpu hotplug situations, it ++ * may happen that kernel activates the offline cpus when bootcpus < maxcpus. ++ * To avoid this conflict, this function check the affinities. ++ */ ++static inline bool check_affinity_mask_online(struct irq_affinity_desc *affinity) ++{ ++ int cpu; ++ ++ for_each_cpu(cpu, &affinity->mask) ++ if (cpu_online(cpu)) ++ return true; ++ ++ return false; ++} ++ + int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, + int nvec) + { +@@ -445,6 +462,9 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, + goto skip_activate; + + for_each_msi_vector(desc, i, dev) { ++ if (desc->affinity ++ && !check_affinity_mask_online(desc->affinity)) ++ continue; + if (desc->irq == i) { + virq = desc->irq; + dev_dbg(dev, "irq [%d-%d] for MSI\n", +-- +2.28.0 + diff --git a/tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0007-smp-update-bringup_nonboot_cpus-parameters.patch b/tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0007-smp-update-bringup_nonboot_cpus-parameters.patch new file mode 100644 index 0000000000..71d4fb977f --- /dev/null +++ b/tools/packaging/kernel/patches/5.10.x/dragonball-experimental/0007-smp-update-bringup_nonboot_cpus-parameters.patch @@ -0,0 +1,139 @@ +From a05086142be13d43c7fc92500bcb870a2f37e485 Mon Sep 17 00:00:00 2001 +Message-Id: +In-Reply-To: <16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72.1685428663.git.jiyunxue@linux.alibaba.com> +References: <16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72.1685428663.git.jiyunxue@linux.alibaba.com> +From: xuejun-xj +Date: Tue, 23 May 2023 09:43:02 +0800 +Subject: [PATCH 3/3] smp: update bringup_nonboot_cpus parameters + +On aarch64, kvm doesn't allow vmm to call KVM_CREATE_VCPU ioctls after +vm has already started, which is caused by vgic_initialized check in +kvm_arch_vcpu_precreate() function. Therefore, to support vcpu hotplug +feature on aarch64, all the vcpus should be created and configured ready +for start at booting procedure. + +To solve the problem, dragonball will add a property in each cpu node, +called "boot-onlined". This property indicates whether this cpu should +be onlined at first boot. It has two values: 0 and 1. 0 means offline, +while 1 means online. + +This commit also add a helper function called "of_get_cpu_boot_onlined", +which parse the cpu node and get the value of boot-onlined property. +Then update the global variable "boot_onlined_cpu". + +When kernel calling smp_init(), bringup_nonboot_cpus will start all the +other cpus except cpu0. The activated cpu number equals setup_max_cpus. +In vcpu hotplug scenario, vmm will create all the vcpufd before vm is +initialized, while activating only a few vcpus at first boot. The +setup_max_cpus variable will be initialized as all vcpu count. This +cause that the other cpus cannot find enough cpu threads, and they will +wait for 5 seconds each cpu. + +Therefore, we use boot_onlined_cpu instead of setup_max_cpus to give +"bringup_nonboot_cpus" correct cpu number it needs. + +Signed-off-by: xuejun-xj +--- + .../devicetree/bindings/arm/cpus.yaml | 11 +++++++++ + arch/arm64/kernel/smp.c | 24 +++++++++++++++++++ + kernel/smp.c | 10 +++++++- + 3 files changed, 44 insertions(+), 1 deletion(-) + +diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml +index 14cd727d3c4b..691bb352d842 100644 +--- a/Documentation/devicetree/bindings/arm/cpus.yaml ++++ b/Documentation/devicetree/bindings/arm/cpus.yaml +@@ -316,6 +316,17 @@ properties: + formed by encoding the target CPU id into the low bits of the + physical start address it should jump to. + ++ boot-onlined: ++ $ref: '/schemas/types.yaml#/definitions/uint32' ++ description: | ++ The boot-onlined property is an optional u32 value that indicates ++ whether the cpu device should be activated at first boot. This is ++ useful in vcpu hotplug scenario to pass correct value of activated ++ cpu number. ++ ++ This property has two values: 0 and 1. 1 means the cpu should be ++ activated while 0 means it shouldn't. ++ + if: + # If the enable-method property contains one of those values + properties: +diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c +index 18e9727d3f64..5db8041929a6 100644 +--- a/arch/arm64/kernel/smp.c ++++ b/arch/arm64/kernel/smp.c +@@ -464,6 +464,27 @@ void __init smp_prepare_boot_cpu(void) + init_gic_priority_masking(); + } + ++#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_ARM64) ++extern unsigned int boot_onlined_cpu; ++static void __init of_get_cpu_boot_onlined(struct device_node *dn) ++{ ++ unsigned int boot_onlined; ++ int r; ++ ++ r = of_property_read_u32(dn, "boot-onlined", &boot_onlined); ++ if (r) { ++ pr_err("%pOF: missing boot-onlined property\n", dn); ++ return; ++ } ++ /* ++ * Property boot-onlined has two values: 0 and 1. ++ * 0 means offline, and 1 means online. ++ * Here just count the number of boot_onlined_cpu. ++ */ ++ boot_onlined_cpu += boot_onlined; ++} ++#endif ++ + static u64 __init of_get_cpu_mpidr(struct device_node *dn) + { + const __be32 *cell; +@@ -654,6 +675,9 @@ static void __init of_parse_and_init_cpus(void) + struct device_node *dn; + + for_each_of_cpu_node(dn) { ++#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_ARM64) ++ of_get_cpu_boot_onlined(dn); ++#endif + u64 hwid = of_get_cpu_mpidr(dn); + + if (hwid == INVALID_HWID) +diff --git a/kernel/smp.c b/kernel/smp.c +index 25240fb2df94..567615b9a008 100644 +--- a/kernel/smp.c ++++ b/kernel/smp.c +@@ -801,17 +801,25 @@ void __init setup_nr_cpu_ids(void) + nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; + } + ++/* Setup number of CPUs to activate */ ++unsigned int boot_onlined_cpu = 0; ++ + /* Called by boot processor to activate the rest. */ + void __init smp_init(void) + { + int num_nodes, num_cpus; ++ int num_onlined_cpu = setup_max_cpus; + + idle_threads_init(); + cpuhp_threads_init(); + + pr_info("Bringing up secondary CPUs ...\n"); + +- bringup_nonboot_cpus(setup_max_cpus); ++#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_ARM64) ++ if (boot_onlined_cpu != 0) ++ num_onlined_cpu = boot_onlined_cpu; ++#endif ++ bringup_nonboot_cpus(num_onlined_cpu); + + num_nodes = num_online_nodes(); + num_cpus = num_online_cpus(); +-- +2.28.0 +