mirror of
https://github.com/linuxkit/linuxkit.git
synced 2025-11-13 08:17:32 +00:00
106 lines
4.4 KiB
Diff
106 lines
4.4 KiB
Diff
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
|
|
Date: Sat, 30 Nov 2019 17:54:33 -0800
|
|
Subject: [PATCH] mm/vmalloc: remove preempt_disable/enable when doing
|
|
preloading
|
|
|
|
Some background. The preemption was disabled before to guarantee that a
|
|
preloaded object is available for a CPU, it was stored for. That was
|
|
achieved by combining the disabling the preemption and taking the spin
|
|
lock while the ne_fit_preload_node is checked.
|
|
|
|
The aim was to not allocate in atomic context when spinlock is taken
|
|
later, for regular vmap allocations. But that approach conflicts with
|
|
CONFIG_PREEMPT_RT philosophy. It means that calling spin_lock() with
|
|
disabled preemption is forbidden in the CONFIG_PREEMPT_RT kernel.
|
|
|
|
Therefore, get rid of preempt_disable() and preempt_enable() when the
|
|
preload is done for splitting purpose. As a result we do not guarantee
|
|
now that a CPU is preloaded, instead we minimize the case when it is
|
|
not, with this change, by populating the per cpu preload pointer under
|
|
the vmap_area_lock.
|
|
|
|
This implies that at least each caller that has done the preallocation
|
|
will not fallback to an atomic allocation later. It is possible that
|
|
the preallocation would be pointless or that no preallocation is done
|
|
because of the race but the data shows that this is really rare.
|
|
|
|
For example i run the special test case that follows the preload pattern
|
|
and path. 20 "unbind" threads run it and each does 1000000 allocations.
|
|
Only 3.5 times among 1000000 a CPU was not preloaded. So it can happen
|
|
but the number is negligible.
|
|
|
|
[mhocko@suse.com: changelog additions]
|
|
Link: http://lkml.kernel.org/r/20191016095438.12391-1-urezki@gmail.com
|
|
Fixes: 82dd23e84be3 ("mm/vmalloc.c: preload a CPU with one object for split purpose")
|
|
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
|
|
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
|
|
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
|
Acked-by: Daniel Wagner <dwagner@suse.de>
|
|
Acked-by: Michal Hocko <mhocko@suse.com>
|
|
Cc: Hillf Danton <hdanton@sina.com>
|
|
Cc: Matthew Wilcox <willy@infradead.org>
|
|
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sonymobile.com>
|
|
Cc: Peter Zijlstra <peterz@infradead.org>
|
|
Cc: Thomas Gleixner <tglx@linutronix.de>
|
|
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
|
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
|
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
|
---
|
|
mm/vmalloc.c | 37 ++++++++++++++++++++-----------------
|
|
1 file changed, 20 insertions(+), 17 deletions(-)
|
|
|
|
--- a/mm/vmalloc.c
|
|
+++ b/mm/vmalloc.c
|
|
@@ -1077,31 +1077,34 @@ static struct vmap_area *alloc_vmap_area
|
|
|
|
retry:
|
|
/*
|
|
- * Preload this CPU with one extra vmap_area object to ensure
|
|
- * that we have it available when fit type of free area is
|
|
- * NE_FIT_TYPE.
|
|
+ * Preload this CPU with one extra vmap_area object. It is used
|
|
+ * when fit type of free area is NE_FIT_TYPE. Please note, it
|
|
+ * does not guarantee that an allocation occurs on a CPU that
|
|
+ * is preloaded, instead we minimize the case when it is not.
|
|
+ * It can happen because of cpu migration, because there is a
|
|
+ * race until the below spinlock is taken.
|
|
*
|
|
* The preload is done in non-atomic context, thus it allows us
|
|
* to use more permissive allocation masks to be more stable under
|
|
- * low memory condition and high memory pressure.
|
|
+ * low memory condition and high memory pressure. In rare case,
|
|
+ * if not preloaded, GFP_NOWAIT is used.
|
|
*
|
|
- * Even if it fails we do not really care about that. Just proceed
|
|
- * as it is. "overflow" path will refill the cache we allocate from.
|
|
+ * Set "pva" to NULL here, because of "retry" path.
|
|
*/
|
|
- preempt_disable();
|
|
- if (!__this_cpu_read(ne_fit_preload_node)) {
|
|
- preempt_enable();
|
|
- pva = kmem_cache_alloc_node(vmap_area_cachep, GFP_KERNEL, node);
|
|
- preempt_disable();
|
|
+ pva = NULL;
|
|
|
|
- if (__this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva)) {
|
|
- if (pva)
|
|
- kmem_cache_free(vmap_area_cachep, pva);
|
|
- }
|
|
- }
|
|
+ if (!this_cpu_read(ne_fit_preload_node))
|
|
+ /*
|
|
+ * Even if it fails we do not really care about that.
|
|
+ * Just proceed as it is. If needed "overflow" path
|
|
+ * will refill the cache we allocate from.
|
|
+ */
|
|
+ pva = kmem_cache_alloc_node(vmap_area_cachep, GFP_KERNEL, node);
|
|
|
|
spin_lock(&vmap_area_lock);
|
|
- preempt_enable();
|
|
+
|
|
+ if (pva && __this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva))
|
|
+ kmem_cache_free(vmap_area_cachep, pva);
|
|
|
|
/*
|
|
* If an allocation fails, the "vend" address is
|