mirror of
https://github.com/kata-containers/kata-containers.git
synced 2025-04-27 11:31:05 +00:00
kernel: add arm experimental patches to support vcpu hotplug and virtio-mem
As the support for vcpu hotplug is on the road, I pick them up here as experimental to let user try cpu hotplug and virtio-mem on arm64. Fixes: #3280 Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
This commit is contained in:
parent
934788eb53
commit
1b6f7401e0
@ -0,0 +1,145 @@
|
||||
From 790af0565140c9df7394c195c22960d92f117c30 Mon Sep 17 00:00:00 2001
|
||||
From: Salil Mehta <salil.mehta@huawei.com>
|
||||
Date: Wed, 1 Dec 2021 14:58:33 +0800
|
||||
Subject: [PATCH 1/7] arm64: kernel: Handle disabled[(+)present] cpus in
|
||||
MADT/GICC during init
|
||||
|
||||
With ACPI enabled, cpus get identified by the presence of the GICC
|
||||
entry in the MADT Table. Each GICC entry part of MADT presents cpu as
|
||||
enabled or disabled. As of now, the disabled cpus are skipped as
|
||||
physical cpu hotplug is not supported. These remain disabled even after
|
||||
the kernel has booted.
|
||||
|
||||
To support virtual cpu hotplug(in which case disabled vcpus could be
|
||||
hotplugged even after kernel has booted), QEMU will populate MADT Table
|
||||
with appropriate details of GICC entry for each possible(present+disabled)
|
||||
vcpu. Now, during the init time vcpus will be identified as present or
|
||||
disabled. To achieve this, below changes have been made with respect to
|
||||
the present/possible vcpu handling along with the mentioned reasoning:
|
||||
|
||||
1. Identify all possible(present+disabled) vcpus at boot/init time
|
||||
and set their present mask and possible mask. In the existing code,
|
||||
cpus are being marked present quite late within smp_prepare_cpus()
|
||||
function, which gets called in context to the kernel thread. Since
|
||||
the cpu hotplug is not supported, present cpus are always equal to
|
||||
the possible cpus. But with cpu hotplug enabled, this assumption is
|
||||
not true. Hence, present cpus should be marked while MADT GICC entries
|
||||
are bring parsed for each vcpu.
|
||||
2. Set possible cpus to include disabled. This needs to be done now
|
||||
while parsing MADT GICC entries corresponding to each vcpu as the
|
||||
disabled vcpu info is available only at this point as for hotplug
|
||||
case possible vcpus is not equal to present vcpus.
|
||||
3. We will store the parsed madt/gicc entry even for the disabled vcpus
|
||||
during init time. This is needed as some modules like PMU registers
|
||||
IRQs for each possible vcpus during init time. Therefore, a valid
|
||||
entry of the MADT GICC should be present for all possible vcpus.
|
||||
4. Refactoring related to DT/OF is also done to align it with the init
|
||||
changes to support vcpu hotplug.
|
||||
|
||||
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
|
||||
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
|
||||
---
|
||||
arch/arm64/kernel/smp.c | 28 +++++++++++++++++++++-------
|
||||
1 file changed, 21 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
|
||||
index 6f6ff072acbd..4b317e71b1c4 100644
|
||||
--- a/arch/arm64/kernel/smp.c
|
||||
+++ b/arch/arm64/kernel/smp.c
|
||||
@@ -524,13 +524,12 @@ static int __init smp_cpu_setup(int cpu)
|
||||
if (ops->cpu_init(cpu))
|
||||
return -ENODEV;
|
||||
|
||||
- set_cpu_possible(cpu, true);
|
||||
-
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool bootcpu_valid __initdata;
|
||||
static unsigned int cpu_count = 1;
|
||||
+static unsigned int disabled_cpu_count;
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS];
|
||||
@@ -549,10 +548,17 @@ struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu)
|
||||
static void __init
|
||||
acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
|
||||
{
|
||||
+ unsigned int total_cpu_count = disabled_cpu_count + cpu_count;
|
||||
u64 hwid = processor->arm_mpidr;
|
||||
|
||||
if (!(processor->flags & ACPI_MADT_ENABLED)) {
|
||||
+#ifndef CONFIG_ACPI_HOTPLUG_CPU
|
||||
pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid);
|
||||
+#else
|
||||
+ cpu_madt_gicc[total_cpu_count] = *processor;
|
||||
+ set_cpu_possible(total_cpu_count, true);
|
||||
+ disabled_cpu_count++;
|
||||
+#endif
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -561,7 +567,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
|
||||
return;
|
||||
}
|
||||
|
||||
- if (is_mpidr_duplicate(cpu_count, hwid)) {
|
||||
+ if (is_mpidr_duplicate(total_cpu_count, hwid)) {
|
||||
pr_err("duplicate CPU MPIDR 0x%llx in MADT\n", hwid);
|
||||
return;
|
||||
}
|
||||
@@ -582,9 +588,9 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
|
||||
return;
|
||||
|
||||
/* map the logical cpu id to cpu MPIDR */
|
||||
- set_cpu_logical_map(cpu_count, hwid);
|
||||
+ set_cpu_logical_map(total_cpu_count, hwid);
|
||||
|
||||
- cpu_madt_gicc[cpu_count] = *processor;
|
||||
+ cpu_madt_gicc[total_cpu_count] = *processor;
|
||||
|
||||
/*
|
||||
* Set-up the ACPI parking protocol cpu entries
|
||||
@@ -595,7 +601,10 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
|
||||
* initialize the cpu if the parking protocol is
|
||||
* the only available enable method).
|
||||
*/
|
||||
- acpi_set_mailbox_entry(cpu_count, processor);
|
||||
+ acpi_set_mailbox_entry(total_cpu_count, processor);
|
||||
+
|
||||
+ set_cpu_possible(total_cpu_count, true);
|
||||
+ set_cpu_present(total_cpu_count, true);
|
||||
|
||||
cpu_count++;
|
||||
}
|
||||
@@ -629,6 +638,9 @@ static void __init acpi_parse_and_init_cpus(void)
|
||||
acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
|
||||
acpi_parse_gic_cpu_interface, 0);
|
||||
|
||||
+ pr_debug("possible cpus(%u) present cpus(%u) disabled cpus(%u)\n",
|
||||
+ cpu_count+disabled_cpu_count, cpu_count, disabled_cpu_count);
|
||||
+
|
||||
/*
|
||||
* In ACPI, SMP and CPU NUMA information is provided in separate
|
||||
* static tables, namely the MADT and the SRAT.
|
||||
@@ -699,6 +711,9 @@ static void __init of_parse_and_init_cpus(void)
|
||||
set_cpu_logical_map(cpu_count, hwid);
|
||||
|
||||
early_map_cpu_to_node(cpu_count, of_node_to_nid(dn));
|
||||
+
|
||||
+ set_cpu_possible(cpu_count, true);
|
||||
+ set_cpu_present(cpu_count, true);
|
||||
next:
|
||||
cpu_count++;
|
||||
}
|
||||
@@ -783,7 +798,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
|
||||
if (err)
|
||||
continue;
|
||||
|
||||
- set_cpu_present(cpu, true);
|
||||
numa_store_cpu_info(cpu);
|
||||
}
|
||||
}
|
||||
--
|
||||
2.17.1
|
||||
|
@ -0,0 +1,86 @@
|
||||
From 2bd0439913fde8598113cc3959764a877c0bd1ad Mon Sep 17 00:00:00 2001
|
||||
From: Salil Mehta <salil.mehta@huawei.com>
|
||||
Date: Wed, 1 Dec 2021 16:01:17 +0800
|
||||
Subject: [PATCH 2/7] arm64: kernel: Bound the total(present+disabled) cpus
|
||||
with nr_cpu_ids
|
||||
|
||||
Bound the total number of identified cpus(including disabled cpus) by
|
||||
maximum allowed limit by the kernel. Max value is either specified as
|
||||
part of the kernel parameters 'nr_cpus' or specified during compile
|
||||
time using CONFIG_NR_CPUS.
|
||||
|
||||
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
|
||||
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
|
||||
---
|
||||
arch/arm64/kernel/smp.c | 18 ++++++++++++------
|
||||
1 file changed, 12 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
|
||||
index 4b317e71b1c4..18a0576f2721 100644
|
||||
--- a/arch/arm64/kernel/smp.c
|
||||
+++ b/arch/arm64/kernel/smp.c
|
||||
@@ -528,6 +528,7 @@ static int __init smp_cpu_setup(int cpu)
|
||||
}
|
||||
|
||||
static bool bootcpu_valid __initdata;
|
||||
+static bool cpus_clipped __initdata = false;
|
||||
static unsigned int cpu_count = 1;
|
||||
static unsigned int disabled_cpu_count;
|
||||
|
||||
@@ -551,6 +552,11 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
|
||||
unsigned int total_cpu_count = disabled_cpu_count + cpu_count;
|
||||
u64 hwid = processor->arm_mpidr;
|
||||
|
||||
+ if (total_cpu_count > nr_cpu_ids) {
|
||||
+ cpus_clipped = true;
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
if (!(processor->flags & ACPI_MADT_ENABLED)) {
|
||||
#ifndef CONFIG_ACPI_HOTPLUG_CPU
|
||||
pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid);
|
||||
@@ -584,9 +590,6 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
|
||||
return;
|
||||
}
|
||||
|
||||
- if (cpu_count >= NR_CPUS)
|
||||
- return;
|
||||
-
|
||||
/* map the logical cpu id to cpu MPIDR */
|
||||
set_cpu_logical_map(total_cpu_count, hwid);
|
||||
|
||||
@@ -704,8 +707,10 @@ static void __init of_parse_and_init_cpus(void)
|
||||
continue;
|
||||
}
|
||||
|
||||
- if (cpu_count >= NR_CPUS)
|
||||
+ if (cpu_count >= NR_CPUS) {
|
||||
+ cpus_clipped = true;
|
||||
goto next;
|
||||
+ }
|
||||
|
||||
pr_debug("cpu logical map 0x%llx\n", hwid);
|
||||
set_cpu_logical_map(cpu_count, hwid);
|
||||
@@ -726,6 +731,7 @@ static void __init of_parse_and_init_cpus(void)
|
||||
*/
|
||||
void __init smp_init_cpus(void)
|
||||
{
|
||||
+ unsigned int total_cpu_count = disabled_cpu_count + cpu_count;
|
||||
int i;
|
||||
|
||||
if (acpi_disabled)
|
||||
@@ -733,9 +739,9 @@ void __init smp_init_cpus(void)
|
||||
else
|
||||
acpi_parse_and_init_cpus();
|
||||
|
||||
- if (cpu_count > nr_cpu_ids)
|
||||
+ if (cpus_clipped)
|
||||
pr_warn("Number of cores (%d) exceeds configured maximum of %u - clipping\n",
|
||||
- cpu_count, nr_cpu_ids);
|
||||
+ total_cpu_count, nr_cpu_ids);
|
||||
|
||||
if (!bootcpu_valid) {
|
||||
pr_err("missing boot CPU MPIDR, not enabling secondaries\n");
|
||||
--
|
||||
2.17.1
|
||||
|
@ -0,0 +1,116 @@
|
||||
From 58ceaa003bab7d2613f01ec58925a75e1f731240 Mon Sep 17 00:00:00 2001
|
||||
From: Salil Mehta <salil.mehta@huawei.com>
|
||||
Date: Thu, 2 Dec 2021 13:57:51 +0800
|
||||
Subject: [PATCH 3/7] arm64: kernel: Init cpu operations for all possible vcpus
|
||||
|
||||
Currently, cpu-operations are only initialized for the cpus which
|
||||
already have logical cpuid to hwid assoication established. And this
|
||||
only happens for the cpus which are present during boot time.
|
||||
|
||||
To support virtual cpu hotplug, we shall initialze the cpu-operations
|
||||
for all possible(present+disabled) vcpus. This means logical cpuid to
|
||||
hwid/mpidr association might not exists(i.e. might be INVALID_HWID)
|
||||
during init. Later, when the vcpu is actually hotplugged logical cpuid
|
||||
is allocated and associated with the hwid/mpidr.
|
||||
|
||||
This patch does some refactoring to support above change.
|
||||
|
||||
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
|
||||
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
|
||||
---
|
||||
arch/arm64/kernel/smp.c | 39 +++++++++++++++------------------------
|
||||
1 file changed, 15 insertions(+), 24 deletions(-)
|
||||
|
||||
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
|
||||
index 18a0576f2721..fed4415e8cfe 100644
|
||||
--- a/arch/arm64/kernel/smp.c
|
||||
+++ b/arch/arm64/kernel/smp.c
|
||||
@@ -518,13 +518,16 @@ static int __init smp_cpu_setup(int cpu)
|
||||
const struct cpu_operations *ops;
|
||||
|
||||
if (init_cpu_ops(cpu))
|
||||
- return -ENODEV;
|
||||
+ goto out;
|
||||
|
||||
ops = get_cpu_ops(cpu);
|
||||
if (ops->cpu_init(cpu))
|
||||
- return -ENODEV;
|
||||
+ goto out;
|
||||
|
||||
return 0;
|
||||
+out:
|
||||
+ __cpu_logical_map[cpu] = INVALID_HWID;
|
||||
+ return -ENODEV;
|
||||
}
|
||||
|
||||
static bool bootcpu_valid __initdata;
|
||||
@@ -562,7 +565,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
|
||||
pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid);
|
||||
#else
|
||||
cpu_madt_gicc[total_cpu_count] = *processor;
|
||||
- set_cpu_possible(total_cpu_count, true);
|
||||
+ if (!smp_cpu_setup(total_cpu_count))
|
||||
+ set_cpu_possible(total_cpu_count, true);
|
||||
disabled_cpu_count++;
|
||||
#endif
|
||||
return;
|
||||
@@ -606,9 +610,10 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
|
||||
*/
|
||||
acpi_set_mailbox_entry(total_cpu_count, processor);
|
||||
|
||||
- set_cpu_possible(total_cpu_count, true);
|
||||
- set_cpu_present(total_cpu_count, true);
|
||||
-
|
||||
+ if (!smp_cpu_setup(total_cpu_count)) {
|
||||
+ set_cpu_possible(total_cpu_count, true);
|
||||
+ set_cpu_present(total_cpu_count, true);
|
||||
+ }
|
||||
cpu_count++;
|
||||
}
|
||||
|
||||
@@ -716,9 +721,10 @@ static void __init of_parse_and_init_cpus(void)
|
||||
set_cpu_logical_map(cpu_count, hwid);
|
||||
|
||||
early_map_cpu_to_node(cpu_count, of_node_to_nid(dn));
|
||||
-
|
||||
- set_cpu_possible(cpu_count, true);
|
||||
- set_cpu_present(cpu_count, true);
|
||||
+ if (!smp_cpu_setup(cpu_count)) {
|
||||
+ set_cpu_possible(cpu_count, true);
|
||||
+ set_cpu_present(cpu_count, true);
|
||||
+ }
|
||||
next:
|
||||
cpu_count++;
|
||||
}
|
||||
@@ -732,7 +738,6 @@ static void __init of_parse_and_init_cpus(void)
|
||||
void __init smp_init_cpus(void)
|
||||
{
|
||||
unsigned int total_cpu_count = disabled_cpu_count + cpu_count;
|
||||
- int i;
|
||||
|
||||
if (acpi_disabled)
|
||||
of_parse_and_init_cpus();
|
||||
@@ -747,20 +752,6 @@ void __init smp_init_cpus(void)
|
||||
pr_err("missing boot CPU MPIDR, not enabling secondaries\n");
|
||||
return;
|
||||
}
|
||||
-
|
||||
- /*
|
||||
- * We need to set the cpu_logical_map entries before enabling
|
||||
- * the cpus so that cpu processor description entries (DT cpu nodes
|
||||
- * and ACPI MADT entries) can be retrieved by matching the cpu hwid
|
||||
- * with entries in cpu_logical_map while initializing the cpus.
|
||||
- * If the cpu set-up fails, invalidate the cpu_logical_map entry.
|
||||
- */
|
||||
- for (i = 1; i < nr_cpu_ids; i++) {
|
||||
- if (cpu_logical_map(i) != INVALID_HWID) {
|
||||
- if (smp_cpu_setup(i))
|
||||
- set_cpu_logical_map(i, INVALID_HWID);
|
||||
- }
|
||||
- }
|
||||
}
|
||||
|
||||
void __init smp_prepare_cpus(unsigned int max_cpus)
|
||||
--
|
||||
2.17.1
|
||||
|
@ -0,0 +1,125 @@
|
||||
From 6b7b492fc89e97e5ee51f9d033000fb6483a5298 Mon Sep 17 00:00:00 2001
|
||||
From: Salil Mehta <salil.mehta@huawei.com>
|
||||
Date: Wed, 1 Dec 2021 16:21:50 +0800
|
||||
Subject: [PATCH 4/7] arm64: kernel: Arch specific ACPI hooks(like logical
|
||||
cpuid<->hwid etc.)
|
||||
|
||||
To support virtual cpu hotplug, some arch specifc hooks must be
|
||||
facilitated. These hooks are called by the generic ACPI cpu hotplug
|
||||
framework during a vcpu hot-(un)plug event handling. The changes
|
||||
required involve:
|
||||
|
||||
1. Allocation of the logical cpuid corresponding to the hwid/mpidr
|
||||
2. Mapping of logical cpuid to hwid/mpidr and marking present
|
||||
3. Removing vcpu from present mask during hot-unplug
|
||||
4. For arm64, all possible cpus are registered within topology_init()
|
||||
Hence, we need to override the weak ACPI call of arch_register_cpu()
|
||||
(which returns -ENODEV) and return success.
|
||||
5. NUMA node mapping set for this vcpu using SRAT Table info during init
|
||||
time will be discarded as the logical cpu-ids used at that time
|
||||
might not be correct. This mapping will be set again using the
|
||||
proximity/node info obtained by evaluating _PXM ACPI method.
|
||||
|
||||
Note, during hot unplug of vcpu, we do not unmap the association between
|
||||
the logical cpuid and hwid/mpidr. This remains persistent.
|
||||
|
||||
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
|
||||
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
|
||||
---
|
||||
arch/arm64/kernel/smp.c | 80 +++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 80 insertions(+)
|
||||
|
||||
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
|
||||
index fed4415e8cfe..8ab68ec01090 100644
|
||||
--- a/arch/arm64/kernel/smp.c
|
||||
+++ b/arch/arm64/kernel/smp.c
|
||||
@@ -543,6 +543,86 @@ struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu)
|
||||
return &cpu_madt_gicc[cpu];
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
|
||||
+int arch_register_cpu(int num)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int set_numa_node_for_cpu(acpi_handle handle, int cpu)
|
||||
+{
|
||||
+#ifdef CONFIG_ACPI_NUMA
|
||||
+ int node_id;
|
||||
+
|
||||
+ /* will evaluate _PXM */
|
||||
+ node_id = acpi_get_node(handle);
|
||||
+ if (node_id != NUMA_NO_NODE)
|
||||
+ set_cpu_numa_node(cpu, node_id);
|
||||
+#endif
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void unset_numa_node_for_cpu(int cpu)
|
||||
+{
|
||||
+#ifdef CONFIG_ACPI_NUMA
|
||||
+ set_cpu_numa_node(cpu, NUMA_NO_NODE);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+static int allocate_logical_cpuid(u64 physid)
|
||||
+{
|
||||
+ int first_invalid_idx = -1;
|
||||
+ bool first = true;
|
||||
+ int i;
|
||||
+
|
||||
+ for_each_possible_cpu(i) {
|
||||
+ /*
|
||||
+ * logical cpuid<->hwid association remains persistent once
|
||||
+ * established
|
||||
+ */
|
||||
+ if (cpu_logical_map(i) == physid)
|
||||
+ return i;
|
||||
+
|
||||
+ if ((cpu_logical_map(i) == INVALID_HWID) && first) {
|
||||
+ first_invalid_idx = i;
|
||||
+ first = false;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return first_invalid_idx;
|
||||
+}
|
||||
+
|
||||
+int acpi_unmap_cpu(int cpu)
|
||||
+{
|
||||
+ set_cpu_present(cpu, false);
|
||||
+ unset_numa_node_for_cpu(cpu);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
|
||||
+ int *cpuid)
|
||||
+{
|
||||
+ int cpu;
|
||||
+
|
||||
+ cpu = allocate_logical_cpuid(physid);
|
||||
+ if (cpu < 0) {
|
||||
+ pr_warn("Unable to map logical cpuid to physid 0x%llx\n",
|
||||
+ physid);
|
||||
+ return -ENOSPC;
|
||||
+ }
|
||||
+
|
||||
+ /* map the logical cpu id to cpu MPIDR */
|
||||
+ __cpu_logical_map[cpu] = physid;
|
||||
+ set_numa_node_for_cpu(handle, cpu);
|
||||
+
|
||||
+ set_cpu_present(cpu, true);
|
||||
+ *cpuid = cpu;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* acpi_map_gic_cpu_interface - parse processor MADT entry
|
||||
*
|
||||
--
|
||||
2.17.1
|
||||
|
@ -0,0 +1,82 @@
|
||||
From 5c979f026c1319c712e7fa4882ec3a4ef3e2101b Mon Sep 17 00:00:00 2001
|
||||
From: Jianyong Wu <jianyong.wu@arm.com>
|
||||
Date: Fri, 3 Dec 2021 17:11:39 +0800
|
||||
Subject: [PATCH 5/7] cpu/numa: fix failure when hot-remove cpu
|
||||
|
||||
when hot-remove cpu, the map from cpu to numa will set to NUMA_NO_NODE
|
||||
which will lead to failure as the map is used by others. thus we need a
|
||||
specific map to descrip the unpluged cpu.
|
||||
Here we introduce a new map to descrip the unpluged cpu map.
|
||||
|
||||
Singed-off-by: Jianyong Wu <jianyong.wu@arm.com>
|
||||
---
|
||||
arch/arm64/include/asm/smp.h | 2 ++
|
||||
arch/arm64/kernel/setup.c | 14 ++++++++++++++
|
||||
arch/arm64/kernel/smp.c | 5 ++++-
|
||||
3 files changed, 20 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
|
||||
index fc55f5a57a06..7949f6090eed 100644
|
||||
--- a/arch/arm64/include/asm/smp.h
|
||||
+++ b/arch/arm64/include/asm/smp.h
|
||||
@@ -47,6 +47,8 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
|
||||
*/
|
||||
extern u64 __cpu_logical_map[NR_CPUS];
|
||||
extern u64 cpu_logical_map(unsigned int cpu);
|
||||
+extern u64 get_acpicpu_numa_node(unsigned int cpu);
|
||||
+extern int set_acpicpu_numa_node(unsigned int cpu, unsigned int node);
|
||||
|
||||
static inline void set_cpu_logical_map(unsigned int cpu, u64 hwid)
|
||||
{
|
||||
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
|
||||
index be5f85b0a24d..68d7a7894e10 100644
|
||||
--- a/arch/arm64/kernel/setup.c
|
||||
+++ b/arch/arm64/kernel/setup.c
|
||||
@@ -284,6 +284,20 @@ static int __init reserve_memblock_reserved_regions(void)
|
||||
}
|
||||
arch_initcall(reserve_memblock_reserved_regions);
|
||||
|
||||
+u64 __acpicpu_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
|
||||
+
|
||||
+u64 get_acpicpu_numa_node(unsigned int cpu)
|
||||
+{
|
||||
+ return __acpicpu_node_map[cpu];
|
||||
+}
|
||||
+
|
||||
+int set_acpicpu_numa_node(unsigned int cpu, unsigned int node)
|
||||
+{
|
||||
+ __acpicpu_node_map[cpu] = node;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
|
||||
|
||||
u64 cpu_logical_map(unsigned int cpu)
|
||||
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
|
||||
index 8ab68ec01090..0c07921b0b61 100644
|
||||
--- a/arch/arm64/kernel/smp.c
|
||||
+++ b/arch/arm64/kernel/smp.c
|
||||
@@ -557,7 +557,10 @@ static int set_numa_node_for_cpu(acpi_handle handle, int cpu)
|
||||
/* will evaluate _PXM */
|
||||
node_id = acpi_get_node(handle);
|
||||
if (node_id != NUMA_NO_NODE)
|
||||
+ {
|
||||
+ set_acpicpu_numa_node(cpu, node_id);
|
||||
set_cpu_numa_node(cpu, node_id);
|
||||
+ }
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
@@ -565,7 +568,7 @@ static int set_numa_node_for_cpu(acpi_handle handle, int cpu)
|
||||
static void unset_numa_node_for_cpu(int cpu)
|
||||
{
|
||||
#ifdef CONFIG_ACPI_NUMA
|
||||
- set_cpu_numa_node(cpu, NUMA_NO_NODE);
|
||||
+ set_acpicpu_numa_node(cpu, NUMA_NO_NODE);
|
||||
#endif
|
||||
}
|
||||
|
||||
--
|
||||
2.17.1
|
||||
|
@ -0,0 +1,69 @@
|
||||
From e3a11f2f7ccb0dbbb8cf95944e89b34fd928107a Mon Sep 17 00:00:00 2001
|
||||
From: Jianyong Wu <jianyong.wu@arm.com>
|
||||
Date: Mon, 6 Dec 2021 10:52:37 +0800
|
||||
Subject: [PATCH 6/7] arm64/mm: avoid fixmap race condition when create pud
|
||||
mapping
|
||||
|
||||
The 'fixmap' is a global resource and is used recursively by
|
||||
create pud mapping(), leading to a potential race condition in the
|
||||
presence of a concurrent call to alloc_init_pud():
|
||||
|
||||
kernel_init thread virtio-mem workqueue thread
|
||||
================== ===========================
|
||||
|
||||
alloc_init_pud(...) alloc_init_pud(...)
|
||||
pudp = pud_set_fixmap_offset(...) pudp = pud_set_fixmap_offset(...)
|
||||
READ_ONCE(*pudp)
|
||||
pud_clear_fixmap(...)
|
||||
READ_ONCE(*pudp) // CRASH!
|
||||
|
||||
As kernel may sleep during creating pud mapping, introduce a mutex lock to
|
||||
serialise use of the fixmap entries by alloc_init_pud(). However, there is
|
||||
no need for locking in early boot stage and it doesn't work well with
|
||||
KASLR enabled when early boot. So, enable lock when system_state doesn't
|
||||
equal to "SYSTEM_BOOTING".
|
||||
|
||||
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
|
||||
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
|
||||
Fixes: f4710445458c ("arm64: mm: use fixmap when creating page tables")
|
||||
Link: https://lore.kernel.org/r/20220201114400.56885-1-jianyong.wu@arm.com
|
||||
Signed-off-by: Will Deacon <will@kernel.org>
|
||||
---
|
||||
arch/arm64/mm/mmu.c | 7 +++++++
|
||||
1 file changed, 7 insertions(+)
|
||||
|
||||
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
|
||||
index cfd9deb347c3..432fab4ce2b4 100644
|
||||
--- a/arch/arm64/mm/mmu.c
|
||||
+++ b/arch/arm64/mm/mmu.c
|
||||
@@ -63,6 +63,7 @@ static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
|
||||
static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
|
||||
|
||||
static DEFINE_SPINLOCK(swapper_pgdir_lock);
|
||||
+static DEFINE_SPINLOCK(fixmap_lock);
|
||||
|
||||
void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
@@ -328,6 +329,11 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
|
||||
}
|
||||
BUG_ON(p4d_bad(p4d));
|
||||
|
||||
+ /*
|
||||
+ * We only have one fixmap entry per page-table level, so take
|
||||
+ * the fixmap lock until we're done.
|
||||
+ */
|
||||
+ spin_lock(&fixmap_lock);
|
||||
pudp = pud_set_fixmap_offset(p4dp, addr);
|
||||
do {
|
||||
pud_t old_pud = READ_ONCE(*pudp);
|
||||
@@ -358,6 +364,7 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
|
||||
} while (pudp++, addr = next, addr != end);
|
||||
|
||||
pud_clear_fixmap();
|
||||
+ spin_unlock(&fixmap_lock);
|
||||
}
|
||||
|
||||
static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
|
||||
--
|
||||
2.17.1
|
||||
|
@ -0,0 +1,67 @@
|
||||
From b1a3d86afbccb5485d2a53cc7e4e097a40f9d443 Mon Sep 17 00:00:00 2001
|
||||
From: Jianyong Wu <jianyong.wu@arm.com>
|
||||
Date: Tue, 14 Dec 2021 14:18:39 +0800
|
||||
Subject: [PATCH 7/7] virtio-mem: enable virtio-mem on arm64
|
||||
|
||||
It seems that virtio-mem works on arm64 now and can be enabled.
|
||||
|
||||
Signed-off-by: Jianyong Wu <jianyong.wu@arm.com>
|
||||
---
|
||||
arch/arm64/mm/mmu.c | 12 +++++++-----
|
||||
drivers/virtio/Kconfig | 2 +-
|
||||
2 files changed, 8 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
|
||||
index 432fab4ce2b4..809fe52d3035 100644
|
||||
--- a/arch/arm64/mm/mmu.c
|
||||
+++ b/arch/arm64/mm/mmu.c
|
||||
@@ -63,7 +63,7 @@ static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
|
||||
static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
|
||||
|
||||
static DEFINE_SPINLOCK(swapper_pgdir_lock);
|
||||
-static DEFINE_SPINLOCK(fixmap_lock);
|
||||
+static DEFINE_MUTEX(fixmap_lock);
|
||||
|
||||
void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
@@ -330,10 +330,11 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
|
||||
BUG_ON(p4d_bad(p4d));
|
||||
|
||||
/*
|
||||
- * We only have one fixmap entry per page-table level, so take
|
||||
- * the fixmap lock until we're done.
|
||||
+ * No need for locking during early boot. And it doesn't work as
|
||||
+ * expected with KASLR enabled.
|
||||
*/
|
||||
- spin_lock(&fixmap_lock);
|
||||
+ if (system_state != SYSTEM_BOOTING)
|
||||
+ mutex_lock(&fixmap_lock);
|
||||
pudp = pud_set_fixmap_offset(p4dp, addr);
|
||||
do {
|
||||
pud_t old_pud = READ_ONCE(*pudp);
|
||||
@@ -364,7 +365,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
|
||||
} while (pudp++, addr = next, addr != end);
|
||||
|
||||
pud_clear_fixmap();
|
||||
- spin_unlock(&fixmap_lock);
|
||||
+ if (system_state != SYSTEM_BOOTING)
|
||||
+ mutex_unlock(&fixmap_lock);
|
||||
}
|
||||
|
||||
static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
|
||||
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
|
||||
index ce1b3f6ec325..ebabff45935c 100644
|
||||
--- a/drivers/virtio/Kconfig
|
||||
+++ b/drivers/virtio/Kconfig
|
||||
@@ -96,7 +96,7 @@ config VIRTIO_BALLOON
|
||||
config VIRTIO_MEM
|
||||
tristate "Virtio mem driver"
|
||||
default m
|
||||
- depends on X86_64
|
||||
+ depends on X86_64 || ARM64
|
||||
depends on VIRTIO
|
||||
depends on MEMORY_HOTPLUG_SPARSE
|
||||
depends on MEMORY_HOTREMOVE
|
||||
--
|
||||
2.17.1
|
||||
|
Loading…
Reference in New Issue
Block a user