From e654dbd8367371c1b34776445a402d3c90f0dc66 Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Fri, 18 Jan 2019 02:14:43 -0500 Subject: [PATCH 1/2] kernel: Add memory hotplug(add) support for arm64 As memory hotplug for arm64 by acpi is not ready on qemu, we choose "probe" instead. You can refer to [1] to get more infomation about "probe". The process of memory hotplug by "probe" in kata lies below: firstly, add memory in qemu qmp; secondly, echo the start phyical address of that memory to /sys/devices/system/memory/probe, which will be done through kata-agent; thirdly, excute online op, then this newly added memory is capable to be used. All functions in this patch will be called after "echo" op. It can be divided into two parts: 1. create page table for that memory; 2. add that memory to memblock. In this patch, NUMA must be turned off for not all arm64 machine supports NUMA. As the newly added memory should be placed from 2T to 6T which is decided in qemu and phyical address and virtual address will be one-one mapping when create pgd for that memory, we must config ARM64_VA_BITS as 48. Also some configs should be turned on, especially "ARCH_MEMORY_PROBE". We have tested this patch integrated with another patch which performed that echo op. It works well when using "-m" in command line when start a kata-container on aarch64 machine. This patch derived from Maciej Bielski. You can refer to [2] to get full infomation about it. [1] https://www.kernel.org/doc/Documentation/memory-hotplug.txt [2] https://lkml.org/lkml/2017/11/23/183 Fixes: #309 Signed-off-by: Jianyong Wu Signed-off-by: Jia He Signed-off-by: Penny Zheng --- kernel/configs/arm64_kata_kvm_4.14.x | 19 +- .../0003-memory-hotplug-by-probe.patch | 219 ++++++++++++++++++ 2 files changed, 231 insertions(+), 7 deletions(-) create mode 100644 kernel/patches/0003-memory-hotplug-by-probe.patch diff --git a/kernel/configs/arm64_kata_kvm_4.14.x b/kernel/configs/arm64_kata_kvm_4.14.x index c1f1e602d7..8178ddb0b7 100644 --- a/kernel/configs/arm64_kata_kvm_4.14.x +++ b/kernel/configs/arm64_kata_kvm_4.14.x @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/arm64 4.14.72 Kernel Configuration +# Linux/arm64 4.14.67 Kernel Configuration # CONFIG_ARM64=y CONFIG_64BIT=y @@ -9,7 +9,7 @@ CONFIG_MMU=y CONFIG_ARM64_PAGE_SHIFT=12 CONFIG_ARM64_CONT_SHIFT=4 CONFIG_ARCH_MMAP_RND_BITS_MIN=18 -CONFIG_ARCH_MMAP_RND_BITS_MAX=24 +CONFIG_ARCH_MMAP_RND_BITS_MAX=33 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=11 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16 CONFIG_STACKTRACE_SUPPORT=y @@ -32,7 +32,7 @@ CONFIG_SWIOTLB=y CONFIG_IOMMU_HELPER=y CONFIG_KERNEL_MODE_NEON=y CONFIG_FIX_EARLYCON_MEM=y -CONFIG_PGTABLE_LEVELS=3 +CONFIG_PGTABLE_LEVELS=4 CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_ARCH_PROC_KCORE_TEXT=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" @@ -467,16 +467,18 @@ CONFIG_QCOM_FALKOR_ERRATUM_E1041=y CONFIG_ARM64_4K_PAGES=y # CONFIG_ARM64_16K_PAGES is not set # CONFIG_ARM64_64K_PAGES is not set -CONFIG_ARM64_VA_BITS_39=y -# CONFIG_ARM64_VA_BITS_48 is not set -CONFIG_ARM64_VA_BITS=39 +# CONFIG_ARM64_VA_BITS_39 is not set +CONFIG_ARM64_VA_BITS_48=y +CONFIG_ARM64_VA_BITS=48 # CONFIG_CPU_BIG_ENDIAN is not set CONFIG_SCHED_MC=y CONFIG_SCHED_SMT=y CONFIG_NR_CPUS=255 CONFIG_HOTPLUG_CPU=y +CONFIG_ARCH_HAS_ADD_PAGES=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y # CONFIG_NUMA is not set -CONFIG_HOLES_IN_ZONE=y +CONFIG_ARCH_MEMORY_PROBE=y CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set @@ -506,6 +508,9 @@ CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y CONFIG_HAVE_MEMBLOCK=y CONFIG_NO_BOOTMEM=y # CONFIG_HAVE_BOOTMEM_INFO_NODE is not set +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTPLUG_SPARSE=y +# CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE is not set CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MEMORY_BALLOON=y # CONFIG_COMPACTION is not set diff --git a/kernel/patches/0003-memory-hotplug-by-probe.patch b/kernel/patches/0003-memory-hotplug-by-probe.patch new file mode 100644 index 0000000000..6eb58a1839 --- /dev/null +++ b/kernel/patches/0003-memory-hotplug-by-probe.patch @@ -0,0 +1,219 @@ +From 20ecfb98b99dafcd74d44c065de15adcc366ca5d Mon Sep 17 00:00:00 2001 +From: Jianyong Wu +Date: Wed, 2 Jan 2019 03:55:49 -0500 +Subject: [PATCH] memory hotplug + +Signed-off-by: Jianyong Wu +--- + arch/arm64/Kconfig | 13 ++++++ + arch/arm64/include/asm/mmu.h | 3 ++ + arch/arm64/mm/init.c | 86 ++++++++++++++++++++++++++++++++++++ + arch/arm64/mm/mmu.c | 40 +++++++++++++++++ + 4 files changed, 142 insertions(+) + +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index 0df64a6a56d4..99a0c77a39f3 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -641,6 +641,15 @@ config HOTPLUG_CPU + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu. + ++config ARCH_HAS_ADD_PAGES ++ def_bool y ++ depends on ARCH_ENABLE_MEMORY_HOTPLUG ++ ++config ARCH_ENABLE_MEMORY_HOTPLUG ++ def_bool y ++ depends on !NUMA ++ ++ + # Common NUMA Features + config NUMA + bool "Numa Memory Allocation and Scheduler Support" +@@ -653,6 +662,10 @@ config NUMA + local memory of the CPU and add some more + NUMA awareness to the kernel. + ++config ARCH_MEMORY_PROBE ++ def_bool y ++ depends on MEMORY_HOTPLUG ++ + config NODES_SHIFT + int "Maximum NUMA Nodes (as a power of 2)" + range 1 10 +diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h +index 0d34bf0a89c7..2b3fa4d12db0 100644 +--- a/arch/arm64/include/asm/mmu.h ++++ b/arch/arm64/include/asm/mmu.h +@@ -40,5 +40,8 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + pgprot_t prot, bool page_mappings_only); + extern void *fixmap_remap_fdt(phys_addr_t dt_phys); + extern void mark_linear_text_alias_ro(void); ++#ifdef CONFIG_MEMORY_HOTPLUG ++extern void hotplug_paging(phys_addr_t start, phys_addr_t size); ++#endif + + #endif +diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c +index 5960bef0170d..141576031a78 100644 +--- a/arch/arm64/mm/init.c ++++ b/arch/arm64/mm/init.c +@@ -722,3 +722,89 @@ static int __init register_mem_limit_dumper(void) + return 0; + } + __initcall(register_mem_limit_dumper); ++ ++#ifdef CONFIG_MEMORY_HOTPLUG ++int add_pages(int nid, unsigned long start_pfn, ++ unsigned long nr_pages, bool want_memblock) ++{ ++ int ret; ++ u64 start_addr = start_pfn << PAGE_SHIFT; ++ /* +++ * Mark the first page in the range as unusable. This is needed +++ * because __add_section (within __add_pages) wants pfn_valid +++ * of it to be false, and in arm64 pfn falid is implemented by +++ * just checking at the nomap flag for existing blocks. +++ * +++ * A small trick here is that __add_section() requires only +++ * phys_start_pfn (that is the first pfn of a section) to be +++ * invalid. Regardless of whether it was assumed (by the function +++ * author) that all pfns within a section are either all valid +++ * or all invalid, it allows to avoid looping twice (once here, +++ * second when memblock_clear_nomap() is called) through all +++ * pfns of the section and modify only one pfn. Thanks to that, +++ * further, in __add_zone() only this very first pfn is skipped +++ * and corresponding page is not flagged reserved. Therefore it +++ * is enough to correct this setup only for it. +++ * +++ * When arch_add_memory() returns the walk_memory_range() function +++ * is called and passed with online_memory_block() callback, +++ * which execution finally reaches the memory_block_action() +++ * function, where also only the first pfn of a memory block is +++ * checked to be reserved. Above, it was first pfn of a section, +++ * here it is a block but +++ * (drivers/base/memory.c): +++ * sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; +++ * (include/linux/memory.h): +++ * #define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS) +++ * so we can consider block and section equivalently +++ */ ++ memblock_mark_nomap(start_addr, 1<> PAGE_SHIFT; ++ unsigned long nr_pages = size >> PAGE_SHIFT; ++ unsigned long end_pfn = start_pfn + nr_pages; ++ unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); ++ ++ if (end_pfn > max_sparsemem_pfn) { ++ pr_err("end_pfn too big"); ++ return -1; ++ } ++ hotplug_paging(start, size); ++ ++ ret = add_pages(nid, start_pfn, nr_pages, want_memblock); ++ ++ if (ret) ++ pr_warn("%s: Problem encountered in __add_pages() ret=%d\n", ++ __func__, ret); ++ ++ return ret; ++} ++#endif +diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c +index f1eb15e0e864..667a0de4cbaf 100644 +--- a/arch/arm64/mm/mmu.c ++++ b/arch/arm64/mm/mmu.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -40,6 +41,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -615,6 +617,44 @@ void __init paging_init(void) + SWAPPER_DIR_SIZE - PAGE_SIZE); + } + ++#ifdef CONFIG_MEMORY_HOTPLUG ++ ++/* +++ * hotplug_paging() is used by memory hotplug to build new page tables +++ * for hot added memory. +++ */ ++ ++struct mem_range { ++ phys_addr_t base; ++ phys_addr_t size; ++}; ++ ++static int __hotplug_paging(void *data) ++{ ++ int flags = 0; ++ struct mem_range *section = data; ++ ++ if (debug_pagealloc_enabled()) ++ flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; ++ ++ __create_pgd_mapping(swapper_pg_dir, section->base, ++ __phys_to_virt(section->base), section->size, ++ PAGE_KERNEL, pgd_pgtable_alloc, flags); ++ ++ return 0; ++} ++ ++inline void hotplug_paging(phys_addr_t start, phys_addr_t size) ++{ ++ struct mem_range section = { ++ .base = start, ++ .size = size, ++ }; ++ ++ stop_machine(__hotplug_paging, §ion, NULL); ++} ++#endif /* CONFIG_MEMORY_HOTPLUG */ ++ + /* + * Check whether a kernel address is valid (derived from arch/x86/). + */ +-- +2.17.1 + From d2a42cb6d52dc2fc67dae766c7569da150b8ac30 Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Fri, 18 Jan 2019 02:35:30 -0500 Subject: [PATCH 2/2] kernel: config: bump config Fixes: #309 Signed-off-by: Jianyong Wu --- kernel/kata_config_version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kata_config_version b/kernel/kata_config_version index 2bd5a0a98a..409940768f 100644 --- a/kernel/kata_config_version +++ b/kernel/kata_config_version @@ -1 +1 @@ -22 +23