diff --git a/kernel/patches/5.4.x/0001-arm-arm64-Provide-a-wrapper-for-SMCCC-1.1-calls.patch b/kernel/patches/5.4.x/0003-arm-arm64-Provide-a-wrapper-for-SMCCC-1.1-calls.patch similarity index 100% rename from kernel/patches/5.4.x/0001-arm-arm64-Provide-a-wrapper-for-SMCCC-1.1-calls.patch rename to kernel/patches/5.4.x/0003-arm-arm64-Provide-a-wrapper-for-SMCCC-1.1-calls.patch diff --git a/kernel/patches/5.4.x/0002-arm-arm64-smccc-psci-add-arm_smccc_1_1_get_conduit.patch b/kernel/patches/5.4.x/0004-arm-arm64-smccc-psci-add-arm_smccc_1_1_get_conduit.patch similarity index 100% rename from kernel/patches/5.4.x/0002-arm-arm64-smccc-psci-add-arm_smccc_1_1_get_conduit.patch rename to kernel/patches/5.4.x/0004-arm-arm64-smccc-psci-add-arm_smccc_1_1_get_conduit.patch diff --git a/kernel/patches/5.4.x/0003-ptp-arm64-Enable-ptp_kvm-for-arm64.patch b/kernel/patches/5.4.x/0005-ptp-arm64-Enable-ptp_kvm-for-arm64.patch similarity index 100% rename from kernel/patches/5.4.x/0003-ptp-arm64-Enable-ptp_kvm-for-arm64.patch rename to kernel/patches/5.4.x/0005-ptp-arm64-Enable-ptp_kvm-for-arm64.patch diff --git a/kernel/patches/5.4.x/0006-arm64-mm-Enable-memory-hot-remove.patch b/kernel/patches/5.4.x/0006-arm64-mm-Enable-memory-hot-remove.patch new file mode 100644 index 0000000000..3c0b035bf7 --- /dev/null +++ b/kernel/patches/5.4.x/0006-arm64-mm-Enable-memory-hot-remove.patch @@ -0,0 +1,498 @@ +From ba91422b18892bceacf3b4aa60354cf36fcabf9b Mon Sep 17 00:00:00 2001 +From: Penny Zheng +Date: Wed, 8 Apr 2020 10:26:52 +0800 +Subject: [PATCH] arm64/mm: Enable memory hot remove + +Backport Anshuman Khandual's patch series of Enabling memory hot +remove on aarch64(https://patchwork.kernel.org/cover/11419305/) +to v5.4.x. +This patch series has already been merged, and queued for 5.7. + +Signed-off-by: Penny Zheng +--- + arch/arm64/Kconfig | 3 + + arch/arm64/include/asm/memory.h | 1 + + arch/arm64/mm/mmu.c | 379 +++++++++++++++++++++++++++++++- + arch/arm64/mm/ptdump_debugfs.c | 4 + + 4 files changed, 378 insertions(+), 9 deletions(-) + +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index 6ccd2ed30963..d18b716fa569 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -274,6 +274,9 @@ config ZONE_DMA32 + config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y + ++config ARCH_ENABLE_MEMORY_HOTREMOVE ++ def_bool y ++ + config SMP + def_bool y + +diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h +index c23c47360664..dbba06e258f5 100644 +--- a/arch/arm64/include/asm/memory.h ++++ b/arch/arm64/include/asm/memory.h +@@ -54,6 +54,7 @@ + #define MODULES_VADDR (BPF_JIT_REGION_END) + #define MODULES_VSIZE (SZ_128M) + #define VMEMMAP_START (-VMEMMAP_SIZE - SZ_2M) ++#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE) + #define PCI_IO_END (VMEMMAP_START - SZ_2M) + #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) + #define FIXADDR_TOP (PCI_IO_START - SZ_2M) +diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c +index d10247fab0fd..99fec235144e 100644 +--- a/arch/arm64/mm/mmu.c ++++ b/arch/arm64/mm/mmu.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -725,6 +726,312 @@ int kern_addr_valid(unsigned long addr) + + return pfn_valid(pte_pfn(pte)); + } ++ ++#ifdef CONFIG_MEMORY_HOTPLUG ++static void free_hotplug_page_range(struct page *page, size_t size) ++{ ++ WARN_ON(PageReserved(page)); ++ free_pages((unsigned long)page_address(page), get_order(size)); ++} ++ ++static void free_hotplug_pgtable_page(struct page *page) ++{ ++ free_hotplug_page_range(page, PAGE_SIZE); ++} ++ ++static bool pgtable_range_aligned(unsigned long start, unsigned long end, ++ unsigned long floor, unsigned long ceiling, ++ unsigned long mask) ++{ ++ start &= mask; ++ if (start < floor) ++ return false; ++ ++ if (ceiling) { ++ ceiling &= mask; ++ if (!ceiling) ++ return false; ++ } ++ ++ if (end - 1 > ceiling - 1) ++ return false; ++ return true; ++} ++ ++static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr, ++ unsigned long end, bool free_mapped) ++{ ++ pte_t *ptep, pte; ++ ++ do { ++ ptep = pte_offset_kernel(pmdp, addr); ++ pte = READ_ONCE(*ptep); ++ if (pte_none(pte)) ++ continue; ++ ++ WARN_ON(!pte_present(pte)); ++ pte_clear(&init_mm, addr, ptep); ++ flush_tlb_kernel_range(addr, addr + PAGE_SIZE); ++ if (free_mapped) ++ free_hotplug_page_range(pte_page(pte), PAGE_SIZE); ++ } while (addr += PAGE_SIZE, addr < end); ++} ++ ++static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr, ++ unsigned long end, bool free_mapped) ++{ ++ unsigned long next; ++ pmd_t *pmdp, pmd; ++ ++ do { ++ next = pmd_addr_end(addr, end); ++ pmdp = pmd_offset(pudp, addr); ++ pmd = READ_ONCE(*pmdp); ++ if (pmd_none(pmd)) ++ continue; ++ ++ WARN_ON(!pmd_present(pmd)); ++ if (pmd_sect(pmd)) { ++ pmd_clear(pmdp); ++ ++ /* ++ * One TLBI should be sufficient here as the PMD_SIZE ++ * range is mapped with a single block entry. ++ */ ++ flush_tlb_kernel_range(addr, addr + PAGE_SIZE); ++ if (free_mapped) ++ free_hotplug_page_range(pmd_page(pmd), ++ PMD_SIZE); ++ continue; ++ } ++ WARN_ON(!pmd_table(pmd)); ++ unmap_hotplug_pte_range(pmdp, addr, next, free_mapped); ++ } while (addr = next, addr < end); ++} ++ ++static void unmap_hotplug_pud_range(p4d_t *p4dp, unsigned long addr, ++ unsigned long end, bool free_mapped) ++{ ++ unsigned long next; ++ pud_t *pudp, pud; ++ ++ do { ++ next = pud_addr_end(addr, end); ++ pudp = pud_offset(p4dp, addr); ++ pud = READ_ONCE(*pudp); ++ if (pud_none(pud)) ++ continue; ++ ++ WARN_ON(!pud_present(pud)); ++ if (pud_sect(pud)) { ++ pud_clear(pudp); ++ ++ /* ++ * One TLBI should be sufficient here as the PUD_SIZE ++ * range is mapped with a single block entry. ++ */ ++ flush_tlb_kernel_range(addr, addr + PAGE_SIZE); ++ if (free_mapped) ++ free_hotplug_page_range(pud_page(pud), ++ PUD_SIZE); ++ continue; ++ } ++ WARN_ON(!pud_table(pud)); ++ unmap_hotplug_pmd_range(pudp, addr, next, free_mapped); ++ } while (addr = next, addr < end); ++} ++ ++static void unmap_hotplug_p4d_range(pgd_t *pgdp, unsigned long addr, ++ unsigned long end, bool free_mapped) ++{ ++ unsigned long next; ++ p4d_t *p4dp, p4d; ++ ++ do { ++ next = p4d_addr_end(addr, end); ++ p4dp = p4d_offset(pgdp, addr); ++ p4d = READ_ONCE(*p4dp); ++ if (p4d_none(p4d)) ++ continue; ++ ++ WARN_ON(!p4d_present(p4d)); ++ unmap_hotplug_pud_range(p4dp, addr, next, free_mapped); ++ } while (addr = next, addr < end); ++} ++ ++static void unmap_hotplug_range(unsigned long addr, unsigned long end, ++ bool free_mapped) ++{ ++ unsigned long next; ++ pgd_t *pgdp, pgd; ++ ++ do { ++ next = pgd_addr_end(addr, end); ++ pgdp = pgd_offset_k(addr); ++ pgd = READ_ONCE(*pgdp); ++ if (pgd_none(pgd)) ++ continue; ++ ++ WARN_ON(!pgd_present(pgd)); ++ unmap_hotplug_p4d_range(pgdp, addr, next, free_mapped); ++ } while (addr = next, addr < end); ++} ++ ++static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr, ++ unsigned long end, unsigned long floor, ++ unsigned long ceiling) ++{ ++ pte_t *ptep, pte; ++ unsigned long i, start = addr; ++ ++ do { ++ ptep = pte_offset_kernel(pmdp, addr); ++ pte = READ_ONCE(*ptep); ++ ++ /* ++ * This is just a sanity check here which verifies that ++ * pte clearing has been done by earlier unmap loops. ++ */ ++ WARN_ON(!pte_none(pte)); ++ } while (addr += PAGE_SIZE, addr < end); ++ ++ if (!pgtable_range_aligned(start, end, floor, ceiling, PMD_MASK)) ++ return; ++ ++ /* ++ * Check whether we can free the pte page if the rest of the ++ * entries are empty. Overlap with other regions have been ++ * handled by the floor/ceiling check. ++ */ ++ ptep = pte_offset_kernel(pmdp, 0UL); ++ for (i = 0; i < PTRS_PER_PTE; i++) { ++ if (!pte_none(READ_ONCE(ptep[i]))) ++ return; ++ } ++ ++ pmd_clear(pmdp); ++ __flush_tlb_kernel_pgtable(start); ++ free_hotplug_pgtable_page(virt_to_page(ptep)); ++} ++ ++static void free_empty_pmd_table(pud_t *pudp, unsigned long addr, ++ unsigned long end, unsigned long floor, ++ unsigned long ceiling) ++{ ++ pmd_t *pmdp, pmd; ++ unsigned long i, next, start = addr; ++ ++ do { ++ next = pmd_addr_end(addr, end); ++ pmdp = pmd_offset(pudp, addr); ++ pmd = READ_ONCE(*pmdp); ++ if (pmd_none(pmd)) ++ continue; ++ ++ WARN_ON(!pmd_present(pmd) || !pmd_table(pmd) || pmd_sect(pmd)); ++ free_empty_pte_table(pmdp, addr, next, floor, ceiling); ++ } while (addr = next, addr < end); ++ ++ if (CONFIG_PGTABLE_LEVELS <= 2) ++ return; ++ ++ if (!pgtable_range_aligned(start, end, floor, ceiling, PUD_MASK)) ++ return; ++ ++ /* ++ * Check whether we can free the pmd page if the rest of the ++ * entries are empty. Overlap with other regions have been ++ * handled by the floor/ceiling check. ++ */ ++ pmdp = pmd_offset(pudp, 0UL); ++ for (i = 0; i < PTRS_PER_PMD; i++) { ++ if (!pmd_none(READ_ONCE(pmdp[i]))) ++ return; ++ } ++ ++ pud_clear(pudp); ++ __flush_tlb_kernel_pgtable(start); ++ free_hotplug_pgtable_page(virt_to_page(pmdp)); ++} ++ ++static void free_empty_pud_table(p4d_t *p4dp, unsigned long addr, ++ unsigned long end, unsigned long floor, ++ unsigned long ceiling) ++{ ++ pud_t *pudp, pud; ++ unsigned long i, next, start = addr; ++ ++ do { ++ next = pud_addr_end(addr, end); ++ pudp = pud_offset(p4dp, addr); ++ pud = READ_ONCE(*pudp); ++ if (pud_none(pud)) ++ continue; ++ ++ WARN_ON(!pud_present(pud) || !pud_table(pud) || pud_sect(pud)); ++ free_empty_pmd_table(pudp, addr, next, floor, ceiling); ++ } while (addr = next, addr < end); ++ ++ if (CONFIG_PGTABLE_LEVELS <= 3) ++ return; ++ ++ if (!pgtable_range_aligned(start, end, floor, ceiling, PGDIR_MASK)) ++ return; ++ ++ /* ++ * Check whether we can free the pud page if the rest of the ++ * entries are empty. Overlap with other regions have been ++ * handled by the floor/ceiling check. ++ */ ++ pudp = pud_offset(p4dp, 0UL); ++ for (i = 0; i < PTRS_PER_PUD; i++) { ++ if (!pud_none(READ_ONCE(pudp[i]))) ++ return; ++ } ++ ++ p4d_clear(p4dp); ++ __flush_tlb_kernel_pgtable(start); ++ free_hotplug_pgtable_page(virt_to_page(pudp)); ++} ++ ++static void free_empty_p4d_table(pgd_t *pgdp, unsigned long addr, ++ unsigned long end, unsigned long floor, ++ unsigned long ceiling) ++{ ++ unsigned long next; ++ p4d_t *p4dp, p4d; ++ ++ do { ++ next = p4d_addr_end(addr, end); ++ p4dp = p4d_offset(pgdp, addr); ++ p4d = READ_ONCE(*p4dp); ++ if (p4d_none(p4d)) ++ continue; ++ ++ WARN_ON(!p4d_present(p4d)); ++ free_empty_pud_table(p4dp, addr, next, floor, ceiling); ++ } while (addr = next, addr < end); ++} ++ ++static void free_empty_tables(unsigned long addr, unsigned long end, ++ unsigned long floor, unsigned long ceiling) ++{ ++ unsigned long next; ++ pgd_t *pgdp, pgd; ++ ++ do { ++ next = pgd_addr_end(addr, end); ++ pgdp = pgd_offset_k(addr); ++ pgd = READ_ONCE(*pgdp); ++ if (pgd_none(pgd)) ++ continue; ++ ++ WARN_ON(!pgd_present(pgd)); ++ free_empty_p4d_table(pgdp, addr, next, floor, ceiling); ++ } while (addr = next, addr < end); ++} ++#endif ++ + #ifdef CONFIG_SPARSEMEM_VMEMMAP + #if !ARM64_SWAPPER_USES_SECTION_MAPS + int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, +@@ -772,6 +1079,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) + { ++#ifdef CONFIG_MEMORY_HOTPLUG ++ WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); ++ ++ unmap_hotplug_range(start, end, true); ++ free_empty_tables(start, end, VMEMMAP_START, VMEMMAP_END); ++#endif + } + #endif /* CONFIG_SPARSEMEM_VMEMMAP */ + +@@ -1050,10 +1363,21 @@ int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) + } + + #ifdef CONFIG_MEMORY_HOTPLUG ++static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size) ++{ ++ unsigned long end = start + size; ++ ++ WARN_ON(pgdir != init_mm.pgd); ++ WARN_ON((start < PAGE_OFFSET) || (end > PAGE_END)); ++ ++ unmap_hotplug_range(start, end, false); ++ free_empty_tables(start, end, PAGE_OFFSET, PAGE_END); ++} ++ + int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) + { +- int flags = 0; ++ int ret, flags = 0; + + if (rodata_full || debug_pagealloc_enabled()) + flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; +@@ -1061,22 +1385,59 @@ int arch_add_memory(int nid, u64 start, u64 size, + __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), + size, PAGE_KERNEL, __pgd_pgtable_alloc, flags); + +- return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, ++ ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, + restrictions); ++ if (ret) ++ __remove_pgd_mapping(swapper_pg_dir, ++ __phys_to_virt(start), size); ++ return ret; + } ++ + void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) + { + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + +- /* +- * FIXME: Cleanup page tables (also in arch_add_memory() in case +- * adding fails). Until then, this function should only be used +- * during memory hotplug (adding memory), not for memory +- * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be +- * unlocked yet. +- */ + __remove_pages(start_pfn, nr_pages, altmap); ++ __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); ++} ++ ++/* ++ * This memory hotplug notifier helps prevent boot memory from being ++ * inadvertently removed as it blocks pfn range offlining process in ++ * __offline_pages(). Hence this prevents both offlining as well as ++ * removal process for boot memory which is initially always online. ++ * In future if and when boot memory could be removed, this notifier ++ * should be dropped and free_hotplug_page_range() should handle any ++ * reserved pages allocated during boot. ++ */ ++static int prevent_bootmem_remove_notifier(struct notifier_block *nb, ++ unsigned long action, void *data) ++{ ++ struct mem_section *ms; ++ struct memory_notify *arg = data; ++ unsigned long end_pfn = arg->start_pfn + arg->nr_pages; ++ unsigned long pfn = arg->start_pfn; ++ ++ if (action != MEM_GOING_OFFLINE) ++ return NOTIFY_OK; ++ ++ for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) { ++ ms = __pfn_to_section(pfn); ++ if (early_section(ms)) ++ return NOTIFY_BAD; ++ } ++ return NOTIFY_OK; ++} ++ ++static struct notifier_block prevent_bootmem_remove_nb = { ++ .notifier_call = prevent_bootmem_remove_notifier, ++}; ++ ++static int __init prevent_bootmem_remove_init(void) ++{ ++ return register_memory_notifier(&prevent_bootmem_remove_nb); + } ++device_initcall(prevent_bootmem_remove_init); + #endif +diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c +index 064163f25592..b5eebc8c4924 100644 +--- a/arch/arm64/mm/ptdump_debugfs.c ++++ b/arch/arm64/mm/ptdump_debugfs.c +@@ -1,5 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0 + #include ++#include + #include + + #include +@@ -7,7 +8,10 @@ + static int ptdump_show(struct seq_file *m, void *v) + { + struct ptdump_info *info = m->private; ++ ++ get_online_mems(); + ptdump_walk_pgd(m, info); ++ put_online_mems(); + return 0; + } + DEFINE_SHOW_ATTRIBUTE(ptdump); +-- +2.17.1 +