Merge pull request #6806 from xuejun-xj/xuejun/vcpuhotplug

Dragonball: support vcpu hotplug on aarch64
This commit is contained in:
Zhongtao Hu 2023-05-30 18:47:50 +08:00 committed by GitHub
commit 8b6cb2cd75
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 496 additions and 82 deletions

View File

@ -247,9 +247,9 @@ dependencies = [
[[package]]
name = "dbs-boot"
version = "0.3.1"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a74a8c05a1674d3032e610b4f201c7440c345559bad3dfe6b455ce195785108"
checksum = "5466a92f75aa928a9103dcb2088f6d1638ef9da8945fad7389a73864dfa0182c"
dependencies = [
"dbs-arch",
"kvm-bindings",

View File

@ -15,7 +15,7 @@ bytes = "1.1.0"
dbs-address-space = "0.2.0"
dbs-allocator = "0.1.0"
dbs-arch = "0.2.0"
dbs-boot = "0.3.0"
dbs-boot = "0.4.0"
dbs-device = "0.2.0"
dbs-interrupt = { version = "0.2.0", features = ["kvm-irq"] }
dbs-legacy-devices = "0.1.0"

View File

@ -626,12 +626,6 @@ impl VmmService {
#[cfg(feature = "hotplug")]
fn resize_vcpu(&mut self, vmm: &mut Vmm, config: VcpuResizeInfo) -> VmmRequestResult {
if !cfg!(target_arch = "x86_64") {
// TODO: Arm need to support vcpu hotplug. issue: #6010
warn!("This arch do not support vm resize!");
return Ok(VmmData::Empty);
}
if !cfg!(feature = "dbs-upcall") {
warn!("We only support cpu resize through upcall server in the guest kernel now, please enable dbs-upcall feature.");
return Ok(VmmData::Empty);

View File

@ -1019,6 +1019,7 @@ mod tests {
use vm_memory::{GuestAddress, MmapRegion};
use super::*;
#[cfg(target_arch = "x86_64")]
use crate::vm::CpuTopology;
impl DeviceManager {

View File

@ -374,11 +374,17 @@ impl VcpuManager {
entry_addr: GuestAddress,
) -> Result<()> {
info!("create boot vcpus");
self.create_vcpus(
self.vcpu_config.boot_vcpu_count,
Some(request_ts),
Some(entry_addr),
)?;
let boot_vcpu_count = if cfg!(target_arch = "aarch64") {
// On aarch64, kvm doesn't allow to call KVM_CREATE_VCPU ioctl after vm has been booted
// because of vgic check. To support vcpu hotplug/hotunplug feature, we should create
// all the vcpufd at booting procedure.
// SetVmConfiguration API will ensure max_vcpu_count >= boot_vcpu_count, so it is safe
// to directly use max_vcpu_count here.
self.vcpu_config.max_vcpu_count
} else {
self.vcpu_config.boot_vcpu_count
};
self.create_vcpus(boot_vcpu_count, Some(request_ts), Some(entry_addr))?;
Ok(())
}
@ -1213,7 +1219,10 @@ mod tests {
assert!(vcpu_manager
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
.is_ok());
#[cfg(target_arch = "x86_64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
#[cfg(target_arch = "aarch64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3);
// test start boot vcpus
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
@ -1267,8 +1276,14 @@ mod tests {
assert!(vcpu_manager
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
.is_ok());
#[cfg(target_arch = "x86_64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
#[cfg(target_arch = "aarch64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3);
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
#[cfg(target_arch = "aarch64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2);
// invalid cpuid for pause
let cpu_indexes = vec![2];
@ -1304,9 +1319,14 @@ mod tests {
assert!(vcpu_manager
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
.is_ok());
#[cfg(target_arch = "x86_64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
#[cfg(target_arch = "aarch64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3);
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
#[cfg(target_arch = "aarch64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2);
// invalid cpuid for exit
let cpu_indexes = vec![2];
@ -1330,9 +1350,14 @@ mod tests {
assert!(vcpu_manager
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
.is_ok());
#[cfg(target_arch = "x86_64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
#[cfg(target_arch = "aarch64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3);
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
#[cfg(target_arch = "aarch64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2);
// exit all success
assert!(vcpu_manager.exit_all_vcpus().is_ok());
@ -1351,9 +1376,14 @@ mod tests {
assert!(vcpu_manager
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
.is_ok());
#[cfg(target_arch = "x86_64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
#[cfg(target_arch = "aarch64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3);
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
#[cfg(target_arch = "aarch64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2);
// invalid cpuid for exit
let cpu_indexes = vec![2];
@ -1377,9 +1407,14 @@ mod tests {
assert!(vcpu_manager
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
.is_ok());
#[cfg(target_arch = "x86_64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
#[cfg(target_arch = "aarch64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3);
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
#[cfg(target_arch = "aarch64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2);
// revalidate all success
assert!(vcpu_manager.revalidate_all_vcpus_cache().is_ok());
@ -1395,9 +1430,14 @@ mod tests {
assert!(vcpu_manager
.create_boot_vcpus(TimestampUs::default(), GuestAddress(0))
.is_ok());
#[cfg(target_arch = "x86_64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 1);
#[cfg(target_arch = "aarch64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 3);
assert!(vcpu_manager.start_boot_vcpus(BpfProgram::default()).is_ok());
#[cfg(target_arch = "aarch64")]
assert_eq!(get_present_unstart_vcpus(&vcpu_manager), 2);
// set vcpus in hotplug action
let cpu_ids = vec![0];

View File

@ -6,70 +6,35 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the THIRD-PARTY file.
use std::collections::HashMap;
use std::fmt::Debug;
use std::ops::Deref;
use std::sync::MutexGuard;
use dbs_arch::gic::GICDevice;
use dbs_arch::pmu::initialize_pmu;
use dbs_arch::{DeviceInfoForFDT, DeviceType, VpmuFeatureLevel};
use dbs_arch::{MMIODeviceInfo, VpmuFeatureLevel};
use dbs_boot::fdt_utils::*;
use dbs_boot::InitrdConfig;
use dbs_utils::epoll_manager::EpollManager;
use dbs_utils::time::TimestampUs;
use linux_loader::loader::Cmdline;
use vm_memory::{GuestAddressSpace, GuestMemory};
use linux_loader::cmdline::{Cmdline, Error as CmdlineError};
use vm_memory::GuestAddressSpace;
use vmm_sys_util::eventfd::EventFd;
use super::{Vm, VmError};
use crate::address_space_manager::{GuestAddressSpaceImpl, GuestMemoryImpl};
use crate::error::{Error, StartMicroVmError};
use crate::event_manager::EventManager;
use crate::vcpu::VcpuManager;
/// Configures the system and should be called once per vm before starting vcpu threads.
/// For aarch64, we only setup the FDT.
///
/// # Arguments
///
/// * `guest_mem` - The memory to be used by the guest.
/// * `cmdline` - The kernel commandline.
/// * `vcpu_mpidr` - Array of MPIDR register values per vcpu.
/// * `device_info` - A hashmap containing the attached devices for building FDT device nodes.
/// * `gic_device` - The GIC device.
/// * `initrd` - Information about an optional initrd.
#[allow(clippy::borrowed_box)]
fn configure_system<T: DeviceInfoForFDT + Clone + Debug, M: GuestMemory>(
guest_mem: &M,
cmdline: &str,
vcpu_mpidr: Vec<u64>,
device_info: Option<&HashMap<(DeviceType, String), T>>,
gic_device: &Box<dyn GICDevice>,
initrd: &Option<super::InitrdConfig>,
vpmu_feature: &VpmuFeatureLevel,
) -> super::Result<()> {
dbs_boot::fdt::create_fdt(
guest_mem,
vcpu_mpidr,
cmdline,
device_info,
gic_device,
initrd,
vpmu_feature,
)
.map_err(Error::BootSystem)?;
Ok(())
}
#[cfg(target_arch = "aarch64")]
impl Vm {
/// Gets a reference to the irqchip of the VM
#[allow(clippy::borrowed_box)]
pub fn get_irqchip(&self) -> &Box<dyn GICDevice> {
self.irqchip_handle.as_ref().unwrap()
pub fn get_irqchip(&self) -> &dyn GICDevice {
self.irqchip_handle.as_ref().unwrap().as_ref()
}
/// Creates the irq chip in-kernel device model.
pub fn setup_interrupt_controller(&mut self) -> std::result::Result<(), StartMicroVmError> {
let vcpu_count = self.vm_config.vcpu_count;
let vcpu_count = self.vm_config.max_vcpu_count;
self.irqchip_handle = Some(
dbs_arch::gic::create_gic(&self.vm_fd, vcpu_count.into())
@ -99,12 +64,11 @@ impl Vm {
/// Initialize the virtual machine instance.
///
/// It initialize the virtual machine instance by:
/// 1) initialize virtual machine global state and configuration.
/// 2) create system devices, such as interrupt controller.
/// 3) create and start IO devices, such as serial, console, block, net, vsock etc.
/// 4) create and initialize vCPUs.
/// 5) configure CPU power management features.
/// 6) load guest kernel image.
/// 1) Initialize virtual machine reset event fd.
/// 2) Create and initialize vCPUs.
/// 3) Create and initialize interrupt controller.
/// 4) Create and initialize vPMU device.
/// 5) Create and initialize devices, such as virtio, block, net, vsock, vfio etc.
pub fn init_microvm(
&mut self,
epoll_mgr: EpollManager,
@ -139,6 +103,52 @@ impl Vm {
Ok(())
}
/// Generate fdt information about VM.
fn get_fdt_vm_info<'a>(
&'a self,
vm_memory: &'a GuestMemoryImpl,
cmdline: &'a str,
initrd_config: Option<&'a InitrdConfig>,
vcpu_manager: &'a MutexGuard<VcpuManager>,
) -> FdtVmInfo {
let guest_memory = vm_memory.memory();
let vcpu_mpidr = vcpu_manager
.vcpus()
.into_iter()
.map(|cpu| cpu.get_mpidr())
.collect();
let vm_config = self.vm_config();
let mut vcpu_boot_onlined = vec![1; vm_config.vcpu_count as usize];
vcpu_boot_onlined.resize(vm_config.max_vcpu_count as usize, 0);
let vpmu_feature = vcpu_manager.vpmu_feature();
// This configuration is used for passing cache information into guest.
// TODO: dragonball-sandbox #274; kata-containers #6969
let cache_passthrough_enabled = false;
let fdt_vcpu_info = FdtVcpuInfo::new(
vcpu_mpidr,
vcpu_boot_onlined,
vpmu_feature,
cache_passthrough_enabled,
);
FdtVmInfo::new(guest_memory, cmdline, initrd_config, fdt_vcpu_info)
}
// This method is used for passing cache/numa information into guest
// TODO: dragonball-sandbox #274,#275; kata-containers #6969
/// Generate fdt information about cache/numa
fn get_fdt_numa_info(&self) -> FdtNumaInfo {
FdtNumaInfo::default()
}
/// Generate fdt information about devices
fn get_fdt_device_info(&self) -> FdtDeviceInfo<MMIODeviceInfo> {
FdtDeviceInfo::new(
self.device_manager().get_mmio_device_info(),
self.get_irqchip(),
)
}
/// Execute system architecture specific configurations.
///
/// 1) set guest kernel boot parameters
@ -150,24 +160,23 @@ impl Vm {
initrd: Option<InitrdConfig>,
) -> std::result::Result<(), StartMicroVmError> {
let vcpu_manager = self.vcpu_manager().map_err(StartMicroVmError::Vcpu)?;
let vpmu_feature = vcpu_manager.vpmu_feature();
let vcpu_mpidr = vcpu_manager
.vcpus()
.into_iter()
.map(|cpu| cpu.get_mpidr())
.collect();
let guest_memory = vm_memory.memory();
let cmdline_cstring = cmdline
.as_cstring()
.map_err(StartMicroVmError::ProcessCommandlne)?;
let fdt_vm_info = self.get_fdt_vm_info(
vm_memory,
cmdline_cstring
.to_str()
.map_err(|_| StartMicroVmError::ProcessCommandlne(CmdlineError::InvalidAscii))?,
initrd.as_ref(),
&vcpu_manager,
);
let fdt_numa_info = self.get_fdt_numa_info();
let fdt_device_info = self.get_fdt_device_info();
configure_system(
guest_memory,
cmdline.as_cstring().unwrap().to_str().unwrap(),
vcpu_mpidr,
self.device_manager.get_mmio_device_info(),
self.get_irqchip(),
&initrd,
&vpmu_feature,
)
.map_err(StartMicroVmError::ConfigureSystem)
dbs_boot::fdt::create_fdt(fdt_vm_info, fdt_numa_info, fdt_device_info)
.map(|_| ())
.map_err(|e| StartMicroVmError::ConfigureSystem(Error::BootSystem(e)))
}
pub(crate) fn register_events(

View File

@ -860,6 +860,7 @@ impl Vm {
#[cfg(test)]
pub mod tests {
#[cfg(target_arch = "x86_64")]
use kvm_ioctls::VcpuExit;
use linux_loader::cmdline::Cmdline;
use test_utils::skip_if_not_root;

View File

@ -1 +1 @@
105
106

View File

@ -0,0 +1,163 @@
From 16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72 Mon Sep 17 00:00:00 2001
Message-Id: <16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72.1685428663.git.jiyunxue@linux.alibaba.com>
From: xuejun-xj <jiyunxue@linux.alibaba.com>
Date: Wed, 10 May 2023 13:55:43 +0800
Subject: [PATCH 1/3] upcall: dragonball-devmgr suppots cpu hotplug on arm64
Enable vcpuhotplug feature on aarch64 in guest kernel. It communicates
with dragonball by using upcall. This commit does these changes:
1. Wraps x86 related fields with CONFIG_X86_64.
2. Add "cpu_event_notification" for arm64.
3. Add "add_cpu_dev" and "del_cpu_dev" for arm64.
Signed-off-by: xuejun-xj <jiyunxue@linux.alibaba.com>
Reviewed-by : Chao Wu <chaowu@linux.alibaba.com>
Reviewed-by: Zizheng Bian <zizheng.bian@linux.alibaba.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
.../upcall_srv/dragonball_device_manager.c | 84 ++++++++++++++++++-
1 file changed, 81 insertions(+), 3 deletions(-)
diff --git a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
index 5a95b2ba63e8..088d38623b8d 100644
--- a/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
+++ b/drivers/misc/dragonball/upcall_srv/dragonball_device_manager.c
@@ -85,15 +85,21 @@ struct devmgr_req {
#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
struct {
uint8_t count;
+#ifdef CONFIG_X86_64
uint8_t apic_ver;
uint8_t apic_ids[256];
+#endif
} cpu_dev_info;
#endif
} msg_load;
};
struct cpu_dev_reply_info {
+#if defined(CONFIG_X86_64)
uint32_t apic_index;
+#elif defined(CONFIG_ARM64)
+ uint32_t cpu_id;
+#endif
};
struct devmgr_reply {
@@ -190,7 +196,8 @@ static void _fill_msg_header(struct devmgr_msg_header *msg, uint32_t msg_size,
msg->msg_flags = msg_flags;
}
-#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+#if defined(CONFIG_X86_64)
static int get_cpu_id(int apic_id)
{
int i;
@@ -219,6 +226,24 @@ static void cpu_event_notification(
_fill_msg_header(&rep->msg_header,
sizeof(struct cpu_dev_reply_info), action_type, 0);
}
+#elif defined(CONFIG_ARM64)
+/**
+ * Return the first failed hotplug index of the cpu_id to dragonball.
+ * If hotplug/hotunplug succeeds, it will equals to the expected cpu count.
+ */
+static void cpu_event_notification(
+ uint8_t cpu_id,
+ int ret,
+ uint32_t action_type,
+ struct devmgr_reply *rep)
+{
+ pr_info("cpu event notification: cpu_id %d\n", cpu_id);
+ rep->msg_load.cpu_dev_info.cpu_id = cpu_id;
+ rep->ret = ret;
+ _fill_msg_header(&rep->msg_header,
+ sizeof(struct cpu_dev_reply_info), action_type, 0);
+}
+#endif
#endif
#if defined(CONFIG_DRAGONBALL_HOTPLUG_VIRTIO_MMIO)
@@ -262,7 +287,8 @@ static int del_mmio_dev(struct devmgr_req *req,
#endif
-#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
+#if defined(CONFIG_X86_64)
static int add_cpu_upcall(int apic_id, uint8_t apic_ver)
{
int cpu_id, node_id;
@@ -430,6 +456,58 @@ static int del_cpu_dev(struct devmgr_req *req,
cpu_event_notification(i, ret, DEL_CPU, rep);
return ret;
}
+#elif defined(CONFIG_ARM64)
+static int add_cpu_dev(struct devmgr_req *req, struct devmgr_reply *rep)
+{
+ int i, ret = 0;
+ unsigned int cpu_id, nr_online_cpus;
+ uint8_t count = req->msg_load.cpu_dev_info.count;
+
+ nr_online_cpus = num_online_cpus();
+
+ pr_info("Current vcpu number: %d, Add vcpu number: %d\n",
+ nr_online_cpus, count);
+
+ for (i = 0; i < count; ++i) {
+ cpu_id = nr_online_cpus + i;
+ ret = add_cpu(cpu_id);
+ if (ret != 0)
+ break;
+ }
+
+ cpu_event_notification(nr_online_cpus + i, ret, ADD_CPU, rep);
+ return ret;
+}
+
+static int del_cpu_dev(struct devmgr_req *req, struct devmgr_reply *rep)
+{
+ int i, ret = 0;
+ unsigned int cpu_id, nr_online_cpus;
+ uint8_t count = req->msg_load.cpu_dev_info.count;
+
+ nr_online_cpus = num_online_cpus();
+
+ pr_info("Current vcpu number: %d, Delete vcpu number: %d\n",
+ nr_online_cpus, count);
+
+ if (count >= nr_online_cpus) {
+ pr_err("cpu del parameter check error: cannot remove all vcpus\n");
+ ret = -EINVAL;
+ cpu_event_notification(0, ret, DEL_CPU, rep);
+ return ret;
+ }
+
+ for (i = 0; i < count; ++i) {
+ cpu_id = nr_online_cpus - i - 1;
+ ret = remove_cpu(cpu_id);
+ if (ret != 0)
+ break;
+ }
+
+ cpu_event_notification(nr_online_cpus - i, ret, DEL_CPU, rep);
+ return ret;
+}
+#endif
#endif
static struct {
@@ -440,7 +518,7 @@ static struct {
{ADD_MMIO, add_mmio_dev},
{DEL_MMIO, del_mmio_dev},
#endif
-#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_X86_64)
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU)
{ADD_CPU, add_cpu_dev},
{DEL_CPU, del_cpu_dev},
#endif
--
2.28.0

View File

@ -0,0 +1,67 @@
From 6e07ca77fe7b5c15e0e98d9e86294c7dd2553a5a Mon Sep 17 00:00:00 2001
Message-Id: <6e07ca77fe7b5c15e0e98d9e86294c7dd2553a5a.1685428663.git.jiyunxue@linux.alibaba.com>
In-Reply-To: <16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72.1685428663.git.jiyunxue@linux.alibaba.com>
References: <16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72.1685428663.git.jiyunxue@linux.alibaba.com>
From: xuejun-xj <jiyunxue@linux.alibaba.com>
Date: Wed, 10 May 2023 14:51:40 +0800
Subject: [PATCH 2/3] msi: control msi irq number activated
When passthroughing pci device, kernel will initialize and activate
(max_cpu_count+1) msi irq. However, in vcpu hotplugging situation,
because of vgic, max_cpu_count may be greater than online_cpu_count.
Those offline cpus will also be activated by kernel, which cause failure
of passthroughing pci device.
To solve this problem, this patch add a function
"check_affinity_mask_online" to check if msi_desc->affinity contains
online cpus. If current cpu is offline, it will continue the for loop to
skip activating related irq.
Signed-off-by: xuejun-xj <jiyunxue@linux.alibaba.com>
Reviewed-by: Shuo Tan <shuo.tan@linux.alibaba.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
kernel/irq/msi.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index d924676c8781..d60a3fc654e6 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -395,6 +395,23 @@ static bool msi_check_reservation_mode(struct irq_domain *domain,
return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
}
+/* This function is used for check whether the cpu affinity belongs to the
+ * online cpus. When we passthrough the nvme devices, the kernel will allocate
+ * maxcpus+1 MSI irqs and then activate them. In vcpu hotplug situations, it
+ * may happen that kernel activates the offline cpus when bootcpus < maxcpus.
+ * To avoid this conflict, this function check the affinities.
+ */
+static inline bool check_affinity_mask_online(struct irq_affinity_desc *affinity)
+{
+ int cpu;
+
+ for_each_cpu(cpu, &affinity->mask)
+ if (cpu_online(cpu))
+ return true;
+
+ return false;
+}
+
int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
int nvec)
{
@@ -445,6 +462,9 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
goto skip_activate;
for_each_msi_vector(desc, i, dev) {
+ if (desc->affinity
+ && !check_affinity_mask_online(desc->affinity))
+ continue;
if (desc->irq == i) {
virq = desc->irq;
dev_dbg(dev, "irq [%d-%d] for MSI\n",
--
2.28.0

View File

@ -0,0 +1,139 @@
From a05086142be13d43c7fc92500bcb870a2f37e485 Mon Sep 17 00:00:00 2001
Message-Id: <a05086142be13d43c7fc92500bcb870a2f37e485.1685428663.git.jiyunxue@linux.alibaba.com>
In-Reply-To: <16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72.1685428663.git.jiyunxue@linux.alibaba.com>
References: <16e3b3da9fb8b79b006d8c9d1f68b2dec9980d72.1685428663.git.jiyunxue@linux.alibaba.com>
From: xuejun-xj <jiyunxue@linux.alibaba.com>
Date: Tue, 23 May 2023 09:43:02 +0800
Subject: [PATCH 3/3] smp: update bringup_nonboot_cpus parameters
On aarch64, kvm doesn't allow vmm to call KVM_CREATE_VCPU ioctls after
vm has already started, which is caused by vgic_initialized check in
kvm_arch_vcpu_precreate() function. Therefore, to support vcpu hotplug
feature on aarch64, all the vcpus should be created and configured ready
for start at booting procedure.
To solve the problem, dragonball will add a property in each cpu node,
called "boot-onlined". This property indicates whether this cpu should
be onlined at first boot. It has two values: 0 and 1. 0 means offline,
while 1 means online.
This commit also add a helper function called "of_get_cpu_boot_onlined",
which parse the cpu node and get the value of boot-onlined property.
Then update the global variable "boot_onlined_cpu".
When kernel calling smp_init(), bringup_nonboot_cpus will start all the
other cpus except cpu0. The activated cpu number equals setup_max_cpus.
In vcpu hotplug scenario, vmm will create all the vcpufd before vm is
initialized, while activating only a few vcpus at first boot. The
setup_max_cpus variable will be initialized as all vcpu count. This
cause that the other cpus cannot find enough cpu threads, and they will
wait for 5 seconds each cpu.
Therefore, we use boot_onlined_cpu instead of setup_max_cpus to give
"bringup_nonboot_cpus" correct cpu number it needs.
Signed-off-by: xuejun-xj <jiyunxue@linux.alibaba.com>
---
.../devicetree/bindings/arm/cpus.yaml | 11 +++++++++
arch/arm64/kernel/smp.c | 24 +++++++++++++++++++
kernel/smp.c | 10 +++++++-
3 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml
index 14cd727d3c4b..691bb352d842 100644
--- a/Documentation/devicetree/bindings/arm/cpus.yaml
+++ b/Documentation/devicetree/bindings/arm/cpus.yaml
@@ -316,6 +316,17 @@ properties:
formed by encoding the target CPU id into the low bits of the
physical start address it should jump to.
+ boot-onlined:
+ $ref: '/schemas/types.yaml#/definitions/uint32'
+ description: |
+ The boot-onlined property is an optional u32 value that indicates
+ whether the cpu device should be activated at first boot. This is
+ useful in vcpu hotplug scenario to pass correct value of activated
+ cpu number.
+
+ This property has two values: 0 and 1. 1 means the cpu should be
+ activated while 0 means it shouldn't.
+
if:
# If the enable-method property contains one of those values
properties:
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 18e9727d3f64..5db8041929a6 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -464,6 +464,27 @@ void __init smp_prepare_boot_cpu(void)
init_gic_priority_masking();
}
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_ARM64)
+extern unsigned int boot_onlined_cpu;
+static void __init of_get_cpu_boot_onlined(struct device_node *dn)
+{
+ unsigned int boot_onlined;
+ int r;
+
+ r = of_property_read_u32(dn, "boot-onlined", &boot_onlined);
+ if (r) {
+ pr_err("%pOF: missing boot-onlined property\n", dn);
+ return;
+ }
+ /*
+ * Property boot-onlined has two values: 0 and 1.
+ * 0 means offline, and 1 means online.
+ * Here just count the number of boot_onlined_cpu.
+ */
+ boot_onlined_cpu += boot_onlined;
+}
+#endif
+
static u64 __init of_get_cpu_mpidr(struct device_node *dn)
{
const __be32 *cell;
@@ -654,6 +675,9 @@ static void __init of_parse_and_init_cpus(void)
struct device_node *dn;
for_each_of_cpu_node(dn) {
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_ARM64)
+ of_get_cpu_boot_onlined(dn);
+#endif
u64 hwid = of_get_cpu_mpidr(dn);
if (hwid == INVALID_HWID)
diff --git a/kernel/smp.c b/kernel/smp.c
index 25240fb2df94..567615b9a008 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -801,17 +801,25 @@ void __init setup_nr_cpu_ids(void)
nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
}
+/* Setup number of CPUs to activate */
+unsigned int boot_onlined_cpu = 0;
+
/* Called by boot processor to activate the rest. */
void __init smp_init(void)
{
int num_nodes, num_cpus;
+ int num_onlined_cpu = setup_max_cpus;
idle_threads_init();
cpuhp_threads_init();
pr_info("Bringing up secondary CPUs ...\n");
- bringup_nonboot_cpus(setup_max_cpus);
+#if defined(CONFIG_DRAGONBALL_HOTPLUG_CPU) && defined(CONFIG_ARM64)
+ if (boot_onlined_cpu != 0)
+ num_onlined_cpu = boot_onlined_cpu;
+#endif
+ bringup_nonboot_cpus(num_onlined_cpu);
num_nodes = num_online_nodes();
num_cpus = num_online_cpus();
--
2.28.0